Logo ROOT   6.10/09
Reference Guide
MethodSVM.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Marcin Wolter, Andrzej Zemla
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodSVM *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Marcin Wolter <Marcin.Wolter@cern.ch> - IFJ PAN, Krakow, Poland *
15  * Andrzej Zemla <azemla@cern.ch> - IFJ PAN, Krakow, Poland *
16  * (IFJ PAN: Henryk Niewodniczanski Inst. Nucl. Physics, Krakow, Poland) *
17  * *
18  * Introduction of regression by: *
19  * Krzysztof Danielowski <danielow@cern.ch> - IFJ PAN & AGH, Krakow, Poland *
20  * Kamil Kraszewski <kalq@cern.ch> - IFJ PAN & UJ, Krakow, Poland *
21  * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Krakow, Poland *
22  * *
23  * Introduction of kernel parameter optimisation *
24  * and additional kernel functions by: *
25  * Adrian Bevan <adrian.bevan@cern.ch> - Queen Mary *
26  * University of London, UK *
27  * Tom Stevenson <thomas.james.stevenson@cern.ch> - Queen Mary *
28  * University of London, UK *
29  * *
30  * Copyright (c) 2005: *
31  * CERN, Switzerland *
32  * MPI-K Heidelberg, Germany *
33  * PAN, Krakow, Poland *
34  * *
35  * Redistribution and use in source and binary forms, with or without *
36  * modification, are permitted according to the terms listed in LICENSE *
37  * (http://tmva.sourceforge.net/LICENSE) *
38  **********************************************************************************/
39 
40 /*! \class TMVA::MethodSVM
41 \ingroup TMVA
42 SMO Platt's SVM classifier with Keerthi & Shavade improvements
43 */
44 
45 #include "TMVA/MethodSVM.h"
46 
47 #include "TMVA/Tools.h"
48 #include "TMVA/Timer.h"
49 
50 #include "TMVA/SVWorkingSet.h"
51 
52 #include "TMVA/SVEvent.h"
53 
54 #include "TMVA/SVKernelFunction.h"
55 
56 #include "TMVA/ClassifierFactory.h"
57 #include "TMVA/Configurable.h"
58 #include "TMVA/DataSet.h"
59 #include "TMVA/DataSetInfo.h"
60 #include "TMVA/Event.h"
61 #include "TMVA/IMethod.h"
62 #include "TMVA/MethodBase.h"
63 #include "TMVA/MsgLogger.h"
64 #include "TMVA/Types.h"
65 #include "TMVA/Interval.h"
67 #include "TMVA/Results.h"
69 #include "TMVA/VariableInfo.h"
70 
71 #include "Riostream.h"
72 #include "TFile.h"
73 #include "TVectorD.h"
74 #include "TMath.h"
75 
76 #include <string>
77 
78 using std::vector;
79 using std::string;
80 using std::stringstream;
81 
82 //const Int_t basketsize__ = 1280000;
83 REGISTER_METHOD(SVM)
84 
86 
87 ////////////////////////////////////////////////////////////////////////////////
88 /// standard constructor
89 
90  TMVA::MethodSVM::MethodSVM( const TString& jobName, const TString& methodTitle, DataSetInfo& theData,
91  const TString& theOption )
92  : MethodBase( jobName, Types::kSVM, methodTitle, theData, theOption)
93  , fCost(0)
94  , fTolerance(0)
95  , fMaxIter(0)
96  , fNSubSets(0)
97  , fBparm(0)
98  , fGamma(0)
99  , fWgSet(0)
100  , fInputData(0)
101  , fSupportVectors(0)
102  , fSVKernelFunction(0)
103  , fMinVars(0)
104  , fMaxVars(0)
105  , fDoubleSigmaSquared(0)
106  , fOrder(0)
107  , fTheta(0)
108  , fKappa(0)
109  , fMult(0)
110  ,fNumVars(0)
111  , fGammas("")
112  , fGammaList("")
113  , fDataSize(0)
114  , fLoss(0)
115 {
116  fVarNames.clear();
117  fNumVars = theData.GetVariableInfos().size();
118  for( int i=0; i<fNumVars; i++){
119  fVarNames.push_back(theData.GetVariableInfos().at(i).GetTitle());
120  }
121 }
122 
123 ////////////////////////////////////////////////////////////////////////////////
124 /// constructor from weight file
125 
126 TMVA::MethodSVM::MethodSVM( DataSetInfo& theData, const TString& theWeightFile)
127  : MethodBase( Types::kSVM, theData, theWeightFile)
128  , fCost(0)
129  , fTolerance(0)
130  , fMaxIter(0)
131  , fNSubSets(0)
132  , fBparm(0)
133  , fGamma(0)
134  , fWgSet(0)
135  , fInputData(0)
136  , fSupportVectors(0)
137  , fSVKernelFunction(0)
138  , fMinVars(0)
139  , fMaxVars(0)
140  , fDoubleSigmaSquared(0)
141  , fOrder(0)
142  , fTheta(0)
143  , fKappa(0)
144  , fMult(0)
145  , fNumVars(0)
146  , fGammas("")
147  , fGammaList("")
148  , fDataSize(0)
149  , fLoss(0)
150 {
151  fVarNames.clear();
152  fNumVars = theData.GetVariableInfos().size();
153  for( int i=0;i<fNumVars; i++){
154  fVarNames.push_back(theData.GetVariableInfos().at(i).GetTitle());
155  }
156 }
157 
158 ////////////////////////////////////////////////////////////////////////////////
159 /// destructor
160 
162 {
163  fSupportVectors->clear();
164  for (UInt_t i=0; i<fInputData->size(); i++) {
165  delete fInputData->at(i);
166  }
167  if (fWgSet !=0) { delete fWgSet; fWgSet=0; }
168  if (fSVKernelFunction !=0 ) { delete fSVKernelFunction; fSVKernelFunction = 0; }
169 }
170 
171 ////////////////////////////////////////////////////////////////////////////////
172 // reset the method, as if it had just been instantiated (forget all training etc.)
173 
175 {
176  // reset the method, as if it had just been instantiated (forget all training etc.)
177  fSupportVectors->clear();
178  for (UInt_t i=0; i<fInputData->size(); i++){
179  delete fInputData->at(i);
180  fInputData->at(i)=0;
181  }
182  fInputData->clear();
183  if (fWgSet !=0) { fWgSet=0; }
184  if (fSVKernelFunction !=0 ) { fSVKernelFunction = 0; }
185  if (Data()){
187  }
188 
189  Log() << kDEBUG << " successfully(?) reset the method " << Endl;
190 }
191 
192 ////////////////////////////////////////////////////////////////////////////////
193 /// SVM can handle classification with 2 classes and regression with one regression-target
194 
196 {
197  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
198  if (type == Types::kRegression && numberTargets == 1) return kTRUE;
199  return kFALSE;
200 }
201 
202 ////////////////////////////////////////////////////////////////////////////////
203 /// default initialisation
204 
206 {
207  // SVM always uses normalised input variables
208  SetNormalised( kTRUE );
209 
210  // Helge: do not book a event vector of given size but rather fill the vector
211  // later with pus_back. Anyway, this is NOT what is time consuming in
212  // SVM and it allows to skip totally events with weights == 0 ;)
213  fInputData = new std::vector<TMVA::SVEvent*>(0);
214  fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
215 }
216 
217 ////////////////////////////////////////////////////////////////////////////////
218 /// declare options available for this method
219 
221 {
222  DeclareOptionRef( fTheKernel = "RBF", "Kernel", "Pick which kernel ( RBF or MultiGauss )");
223  // for gaussian kernel parameter(s)
224  DeclareOptionRef( fGamma = 1., "Gamma", "RBF kernel parameter: Gamma (size of the Kernel)");
225  // for polynomial kernel parameter(s)
226  DeclareOptionRef( fOrder = 3, "Order", "Polynomial Kernel parameter: polynomial order");
227  DeclareOptionRef( fTheta = 1., "Theta", "Polynomial Kernel parameter: polynomial theta");
228  // for multi-gaussian kernel parameter(s)
229  DeclareOptionRef( fGammas = "", "GammaList", "MultiGauss parameters" );
230 
231  // for range and step number for kernel parameter optimisation
232  DeclareOptionRef( fTune = "All", "Tune", "Tune Parameters");
233  // for list of kernels to be used with product or sum kernel
234  DeclareOptionRef( fMultiKernels = "None", "KernelList", "Sum or product of kernels");
235  DeclareOptionRef( fLoss = "hinge", "Loss", "Loss function");
236 
237  DeclareOptionRef( fCost, "C", "Cost parameter" );
238  if (DoRegression()) {
239  fCost = 0.002;
240  }else{
241  fCost = 1.;
242  }
243  DeclareOptionRef( fTolerance = 0.01, "Tol", "Tolerance parameter" ); //should be fixed
244  DeclareOptionRef( fMaxIter = 1000, "MaxIter", "Maximum number of training loops" );
245 
246 }
247 
248 ////////////////////////////////////////////////////////////////////////////////
249 /// options that are used ONLY for the READER to ensure backward compatibility
250 
252 {
254  DeclareOptionRef( fNSubSets = 1, "NSubSets", "Number of training subsets" );
255  DeclareOptionRef( fTheKernel = "Gauss", "Kernel", "Uses kernel function");
256  // for gaussian kernel parameter(s)
257  DeclareOptionRef( fDoubleSigmaSquared = 2., "Sigma", "Kernel parameter: sigma");
258  // for polynomial kernel parameter(s)
259  DeclareOptionRef( fOrder = 3, "Order", "Polynomial Kernel parameter: polynomial order");
260  // for sigmoid kernel parameters
261  DeclareOptionRef( fTheta = 1., "Theta", "Sigmoid Kernel parameter: theta");
262  DeclareOptionRef( fKappa = 1., "Kappa", "Sigmoid Kernel parameter: kappa");
263 }
264 
265 ////////////////////////////////////////////////////////////////////////////////
266 /// option post processing (if necessary)
267 
269 {
271  Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
272  << GetMethodTypeName()
273  << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
274  << Endl;
275  }
276 }
277 
278 ////////////////////////////////////////////////////////////////////////////////
279 /// Train SVM
280 
282 {
285 
286  Log() << kDEBUG << "Create event vector"<< Endl;
287 
288  fDataSize = Data()->GetNEvents();
289  Int_t nSignal = Data()->GetNEvtSigTrain();
290  Int_t nBackground = Data()->GetNEvtBkgdTrain();
291  Double_t CSig;
292  Double_t CBkg;
293 
294  // Use number of signal and background from above to weight the cost parameter
295  // so that the training is not biased towards the larger dataset when the signal
296  // and background samples are significantly different sizes.
297  if(nSignal < nBackground){
298  CSig = fCost;
299  CBkg = CSig*((double)nSignal/nBackground);
300  }
301  else{
302  CBkg = fCost;
303  CSig = CBkg*((double)nSignal/nBackground);
304  }
305 
306  // Loop over events and assign the correct cost parameter.
307  for (Int_t ievnt=0; ievnt<Data()->GetNEvents(); ievnt++){
308  if (GetEvent(ievnt)->GetWeight() != 0){
309  if(DataInfo().IsSignal(GetEvent(ievnt))){
310  fInputData->push_back(new SVEvent(GetEvent(ievnt), CSig, DataInfo().IsSignal\
311  (GetEvent(ievnt))));
312  }
313  else{
314  fInputData->push_back(new SVEvent(GetEvent(ievnt), CBkg, DataInfo().IsSignal\
315  (GetEvent(ievnt))));
316  }
317  }
318  }
319 
320  // Set the correct kernel function.
321  // Here we only use valid Mercer kernels. In the literature some people have reported reasonable
322  // results using Sigmoid kernel function however that is not a valid Mercer kernel and is not used here.
323  if( fTheKernel == "RBF"){
325  }
326  else if( fTheKernel == "MultiGauss" ){
327  if(fGammas!=""){
330  }
331  else{
332  if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
333  else{
334  for(Int_t ngammas=0; ngammas<fNumVars; ++ngammas){
335  fmGamma.push_back(1.0);
336  }
338  }
339  }
341  }
342  else if( fTheKernel == "Polynomial" ){
344  }
345  else if( fTheKernel == "Prod" ){
346  if(fGammas!=""){
349  }
350  else{
351  if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
352  }
354  }
355  else if( fTheKernel == "Sum" ){
356  if(fGammas!=""){
359  }
360  else{
361  if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
362  }
364  }
365  else {
366  Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
367  exit(1);
368  }
369 
370  Log()<< kINFO << "Building SVM Working Set...with "<<fInputData->size()<<" event instances"<< Endl;
371  Timer bldwstime( GetName());
373  Log() << kINFO <<"Elapsed time for Working Set build: "<< bldwstime.GetElapsedTime()<<Endl;
374 
375  // timing
376  Timer timer( GetName() );
377  Log() << kINFO << "Sorry, no computing time forecast available for SVM, please wait ..." << Endl;
378 
380 
382 
383  Log() << kINFO << "Elapsed time: " << timer.GetElapsedTime()
384  << " " << Endl;
385 
386  fBparm = fWgSet->GetBpar();
388  delete fWgSet;
389  fWgSet=0;
390 
393 }
394 
395 ////////////////////////////////////////////////////////////////////////////////
396 /// write configuration to xml file
397 
398 void TMVA::MethodSVM::AddWeightsXMLTo( void* parent ) const
399 {
400  void* wght = gTools().AddChild(parent, "Weights");
401  gTools().AddAttr(wght,"fBparm",fBparm);
402  gTools().AddAttr(wght,"fGamma",fGamma);
403  gTools().AddAttr(wght,"fGammaList",fGammaList);
404  gTools().AddAttr(wght,"fTheta",fTheta);
405  gTools().AddAttr(wght,"fOrder",fOrder);
406  gTools().AddAttr(wght,"NSupVec",fSupportVectors->size());
407 
408  for (std::vector<TMVA::SVEvent*>::iterator veciter=fSupportVectors->begin();
409  veciter!=fSupportVectors->end() ; ++veciter ) {
410  TVectorD temp(GetNvar()+4);
411  temp[0] = (*veciter)->GetNs();
412  temp[1] = (*veciter)->GetTypeFlag();
413  temp[2] = (*veciter)->GetAlpha();
414  temp[3] = (*veciter)->GetAlpha_p();
415  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
416  temp[ivar+4] = (*(*veciter)->GetDataVector())[ivar];
417  gTools().WriteTVectorDToXML(wght,"SupportVector",&temp);
418  }
419  // write max/min data values
420  void* maxnode = gTools().AddChild(wght, "Maxima");
421  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
422  gTools().AddAttr(maxnode, "Var"+gTools().StringFromInt(ivar), GetXmax(ivar));
423  void* minnode = gTools().AddChild(wght, "Minima");
424  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
425  gTools().AddAttr(minnode, "Var"+gTools().StringFromInt(ivar), GetXmin(ivar));
426 }
427 
428 ////////////////////////////////////////////////////////////////////////////////
429 
431 {
432  gTools().ReadAttr( wghtnode, "fBparm",fBparm );
433  gTools().ReadAttr( wghtnode, "fGamma",fGamma);
434  gTools().ReadAttr( wghtnode, "fGammaList",fGammaList);
435  gTools().ReadAttr( wghtnode, "fOrder",fOrder);
436  gTools().ReadAttr( wghtnode, "fTheta",fTheta);
437  UInt_t fNsupv=0;
438  gTools().ReadAttr( wghtnode, "NSupVec",fNsupv );
439 
440  Float_t alpha=0.;
441  Float_t alpha_p = 0.;
442 
443  Int_t typeFlag=-1;
444  // UInt_t ns = 0;
445  std::vector<Float_t>* svector = new std::vector<Float_t>(GetNvar());
446 
447  if (fMaxVars!=0) delete fMaxVars;
448  fMaxVars = new TVectorD( GetNvar() );
449  if (fMinVars!=0) delete fMinVars;
450  fMinVars = new TVectorD( GetNvar() );
451  if (fSupportVectors!=0) {
452  for (vector< SVEvent* >::iterator it = fSupportVectors->begin(); it!=fSupportVectors->end(); ++it)
453  delete *it;
454  delete fSupportVectors;
455  }
456  fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
457  void* supportvectornode = gTools().GetChild(wghtnode);
458  for (UInt_t ievt = 0; ievt < fNsupv; ievt++) {
459  TVectorD temp(GetNvar()+4);
460  gTools().ReadTVectorDFromXML(supportvectornode,"SupportVector",&temp);
461  // ns=(UInt_t)temp[0];
462  typeFlag=(int)temp[1];
463  alpha=temp[2];
464  alpha_p=temp[3];
465  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) (*svector)[ivar]=temp[ivar+4];
466 
467  fSupportVectors->push_back(new SVEvent(svector,alpha,alpha_p,typeFlag));
468  supportvectornode = gTools().GetNextChild(supportvectornode);
469  }
470 
471  void* maxminnode = supportvectornode;
472  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
473  gTools().ReadAttr( maxminnode,"Var"+gTools().StringFromInt(ivar),(*fMaxVars)[ivar]);
474  maxminnode = gTools().GetNextChild(maxminnode);
475  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
476  gTools().ReadAttr( maxminnode,"Var"+gTools().StringFromInt(ivar),(*fMinVars)[ivar]);
477  if (fSVKernelFunction!=0) delete fSVKernelFunction;
478  if( fTheKernel == "RBF" ){
480  }
481  else if( fTheKernel == "MultiGauss" ){
484  }
485  else if( fTheKernel == "Polynomial" ){
487  }
488  else if( fTheKernel == "Prod" ){
491  }
492  else if( fTheKernel == "Sum" ){
495  }
496  else {
497  Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
498  exit(1);
499  }
500  delete svector;
501 }
502 
503 ////////////////////////////////////////////////////////////////////////////////
504 ///TODO write IT
505 /// write training sample (TTree) to file
506 
508 {
509 }
510 
511 ////////////////////////////////////////////////////////////////////////////////
512 
513 void TMVA::MethodSVM::ReadWeightsFromStream( std::istream& istr )
514 {
515  if (fSupportVectors !=0) { delete fSupportVectors; fSupportVectors = 0;}
516  fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
517 
518  // read configuration from input stream
519  istr >> fBparm;
520 
521  UInt_t fNsupv;
522  // coverity[tainted_data_argument]
523  istr >> fNsupv;
524  fSupportVectors->reserve(fNsupv);
525 
526  Float_t typeTalpha=0.;
527  Float_t alpha=0.;
528  Int_t typeFlag=-1;
529  UInt_t ns = 0;
530  std::vector<Float_t>* svector = new std::vector<Float_t>(GetNvar());
531 
532  fMaxVars = new TVectorD( GetNvar() );
533  fMinVars = new TVectorD( GetNvar() );
534 
535  for (UInt_t ievt = 0; ievt < fNsupv; ievt++) {
536  istr>>ns;
537  istr>>typeTalpha;
538  typeFlag = typeTalpha<0?-1:1;
539  alpha = typeTalpha<0?-typeTalpha:typeTalpha;
540  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> svector->at(ivar);
541 
542  fSupportVectors->push_back(new SVEvent(svector,alpha,typeFlag,ns));
543  }
544 
545  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMaxVars)[ivar];
546 
547  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMinVars)[ivar];
548 
549  delete fSVKernelFunction;
550  if (fTheKernel == "Gauss" ) {
552  }
553  else {
555  if(fTheKernel == "Linear") k = SVKernelFunction::kLinear;
556  else if (fTheKernel == "Polynomial") k = SVKernelFunction::kPolynomial;
557  else if (fTheKernel == "Sigmoid" ) k = SVKernelFunction::kSigmoidal;
558  else {
559  Log() << kFATAL <<"Unknown kernel function found in weight file!" << Endl;
560  }
563  }
564  delete svector;
565 }
566 
567 ////////////////////////////////////////////////////////////////////////////////
568 /// TODO write IT
569 
571 {
572 }
573 
574 ////////////////////////////////////////////////////////////////////////////////
575 /// returns MVA value for given event
576 
578 {
579  Double_t myMVA = 0;
580 
581  // TODO: avoid creation of a new SVEvent every time (Joerg)
582  SVEvent* ev = new SVEvent( GetEvent(), 0. ); // check for specificators
583 
584  for (UInt_t ievt = 0; ievt < fSupportVectors->size() ; ievt++) {
585  myMVA += ( fSupportVectors->at(ievt)->GetAlpha()
586  * fSupportVectors->at(ievt)->GetTypeFlag()
587  * fSVKernelFunction->Evaluate( fSupportVectors->at(ievt), ev ) );
588  }
589 
590  delete ev;
591 
592  myMVA -= fBparm;
593 
594  // cannot determine error
595  NoErrorCalc(err, errUpper);
596 
597  // 08/12/09: changed sign here to make results agree with convention signal=1
598  return 1.0/(1.0 + TMath::Exp(myMVA));
599 }
600 ////////////////////////////////////////////////////////////////////////////////
601 
602 const std::vector<Float_t>& TMVA::MethodSVM::GetRegressionValues()
603 {
604  if( fRegressionReturnVal == NULL )
605  fRegressionReturnVal = new std::vector<Float_t>();
606  fRegressionReturnVal->clear();
607 
608  Double_t myMVA = 0;
609 
610  const Event *baseev = GetEvent();
611  SVEvent* ev = new SVEvent( baseev,0. ); //check for specificators
612 
613  for (UInt_t ievt = 0; ievt < fSupportVectors->size() ; ievt++) {
614  myMVA += ( fSupportVectors->at(ievt)->GetDeltaAlpha()
615  *fSVKernelFunction->Evaluate( fSupportVectors->at(ievt), ev ) );
616  }
617  myMVA += fBparm;
618  Event * evT = new Event(*baseev);
619  evT->SetTarget(0,myMVA);
620 
621  const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
622 
623  fRegressionReturnVal->push_back(evT2->GetTarget(0));
624 
625  delete evT;
626 
627  delete ev;
628 
629  return *fRegressionReturnVal;
630 }
631 
632 ////////////////////////////////////////////////////////////////////////////////
633 /// write specific classifier response
634 
635 void TMVA::MethodSVM::MakeClassSpecific( std::ostream& fout, const TString& className ) const
636 {
637  const int fNsupv = fSupportVectors->size();
638  fout << " // not implemented for class: \"" << className << "\"" << std::endl;
639  fout << " float fBparameter;" << std::endl;
640  fout << " int fNOfSuppVec;" << std::endl;
641  fout << " static float fAllSuppVectors[][" << fNsupv << "];" << std::endl;
642  fout << " static float fAlphaTypeCoef[" << fNsupv << "];" << std::endl;
643  fout << std::endl;
644  fout << " // Kernel parameter(s) " << std::endl;
645  fout << " float fGamma;" << std::endl;
646  fout << "};" << std::endl;
647  fout << "" << std::endl;
648 
649  //Initialize function definition
650  fout << "inline void " << className << "::Initialize() " << std::endl;
651  fout << "{" << std::endl;
652  fout << " fBparameter = " << fBparm << ";" << std::endl;
653  fout << " fNOfSuppVec = " << fNsupv << ";" << std::endl;
654  fout << " fGamma = " << fGamma << ";" <<std::endl;
655  fout << "}" << std::endl;
656  fout << std::endl;
657 
658  // GetMvaValue__ function definition
659  fout << "inline double " << className << "::GetMvaValue__(const std::vector<double>& inputValues ) const" << std::endl;
660  fout << "{" << std::endl;
661  fout << " double mvaval = 0; " << std::endl;
662  fout << " double temp = 0; " << std::endl;
663  fout << std::endl;
664  fout << " for (int ievt = 0; ievt < fNOfSuppVec; ievt++ ){" << std::endl;
665  fout << " temp = 0;" << std::endl;
666  fout << " for ( unsigned int ivar = 0; ivar < GetNvar(); ivar++ ) {" << std::endl;
667 
668  fout << " temp += (fAllSuppVectors[ivar][ievt] - inputValues[ivar]) " << std::endl;
669  fout << " * (fAllSuppVectors[ivar][ievt] - inputValues[ivar]); " << std::endl;
670  fout << " }" << std::endl;
671  fout << " mvaval += fAlphaTypeCoef[ievt] * exp( -fGamma * temp ); " << std::endl;
672 
673  fout << " }" << std::endl;
674  fout << " mvaval -= fBparameter;" << std::endl;
675  fout << " return 1./(1. + exp(mvaval));" << std::endl;
676  fout << "}" << std::endl;
677  fout << "// Clean up" << std::endl;
678  fout << "inline void " << className << "::Clear() " << std::endl;
679  fout << "{" << std::endl;
680  fout << " // nothing to clear " << std::endl;
681  fout << "}" << std::endl;
682  fout << "" << std::endl;
683 
684  // define support vectors
685  fout << "float " << className << "::fAlphaTypeCoef[] =" << std::endl;
686  fout << "{ ";
687  for (Int_t isv = 0; isv < fNsupv; isv++) {
688  fout << fSupportVectors->at(isv)->GetDeltaAlpha() * fSupportVectors->at(isv)->GetTypeFlag();
689  if (isv < fNsupv-1) fout << ", ";
690  }
691  fout << " };" << std::endl << std::endl;
692 
693  fout << "float " << className << "::fAllSuppVectors[][" << fNsupv << "] =" << std::endl;
694  fout << "{";
695  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
696  fout << std::endl;
697  fout << " { ";
698  for (Int_t isv = 0; isv < fNsupv; isv++){
699  fout << fSupportVectors->at(isv)->GetDataVector()->at(ivar);
700  if (isv < fNsupv-1) fout << ", ";
701  }
702  fout << " }";
703  if (ivar < GetNvar()-1) fout << ", " << std::endl;
704  else fout << std::endl;
705  }
706  fout << "};" << std::endl<< std::endl;
707 }
708 
709 ////////////////////////////////////////////////////////////////////////////////
710 /// get help message text
711 ///
712 /// typical length of text line:
713 /// "|--------------------------------------------------------------|"
714 
716 {
717  Log() << Endl;
718  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
719  Log() << Endl;
720  Log() << "The Support Vector Machine (SVM) builds a hyperplane separating" << Endl;
721  Log() << "signal and background events (vectors) using the minimal subset of " << Endl;
722  Log() << "all vectors used for training (support vectors). The extension to" << Endl;
723  Log() << "the non-linear case is performed by mapping input vectors into a " << Endl;
724  Log() << "higher-dimensional feature space in which linear separation is " << Endl;
725  Log() << "possible. The use of the kernel functions thereby eliminates the " << Endl;
726  Log() << "explicit transformation to the feature space. The implemented SVM " << Endl;
727  Log() << "algorithm performs the classification tasks using linear, polynomial, " << Endl;
728  Log() << "Gaussian and sigmoidal kernel functions. The Gaussian kernel allows " << Endl;
729  Log() << "to apply any discriminant shape in the input space." << Endl;
730  Log() << Endl;
731  Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
732  Log() << Endl;
733  Log() << "SVM is a general purpose non-linear classification method, which " << Endl;
734  Log() << "does not require data preprocessing like decorrelation or Principal " << Endl;
735  Log() << "Component Analysis. It generalises quite well and can handle analyses " << Endl;
736  Log() << "with large numbers of input variables." << Endl;
737  Log() << Endl;
738  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
739  Log() << Endl;
740  Log() << "Optimal performance requires primarily a proper choice of the kernel " << Endl;
741  Log() << "parameters (the width \"Sigma\" in case of Gaussian kernel) and the" << Endl;
742  Log() << "cost parameter \"C\". The user must optimise them empirically by running" << Endl;
743  Log() << "SVM several times with different parameter sets. The time needed for " << Endl;
744  Log() << "each evaluation scales like the square of the number of training " << Endl;
745  Log() << "events so that a coarse preliminary tuning should be performed on " << Endl;
746  Log() << "reduced data sets." << Endl;
747 }
748 
749 ////////////////////////////////////////////////////////////////////////////////
750 /// Optimize Tuning Parameters
751 /// This is used to optimise the kernel function parameters and cost. All kernel parameters
752 /// are optimised by default with default ranges, however the parameters to be optimised can
753 /// be set when booking the method with the option Tune.
754 ///
755 /// Example:
756 ///
757 /// "Tune=Gamma[0.01;1.0;100]" would only tune the RBF Gamma between 0.01 and 1.0
758 /// with 100 steps.
759 
760 std::map<TString,Double_t> TMVA::MethodSVM::OptimizeTuningParameters(TString fomType, TString fitType)
761 {
762  // Call the Optimizer with the set of kernel parameters and ranges that are meant to be tuned.
763  std::map< TString,std::vector<Double_t> > optVars;
764  // Get parameters and options specified in booking of method.
765  if(fTune != "All"){
766  optVars= GetTuningOptions();
767  }
768  std::map< TString,std::vector<Double_t> >::iterator iter;
769  // Fill all the tuning parameters that should be optimized into a map
770  std::map<TString,TMVA::Interval*> tuneParameters;
771  std::map<TString,Double_t> tunedParameters;
772  // Note: the 3rd parameter in the interval is the "number of bins", NOT the stepsize!!
773  // The actual values are always read from the middle of the bins.
774  Log() << kINFO << "Using the " << fTheKernel << " kernel." << Endl;
775  // Setup map of parameters based on the specified options or defaults.
776  if( fTheKernel == "RBF" ){
777  if(fTune == "All"){
778  tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
779  tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
780  }
781  else{
782  for(iter=optVars.begin(); iter!=optVars.end(); iter++){
783  if( iter->first == "Gamma" || iter->first == "C"){
784  tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
785  }
786  else{
787  Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
788  exit(1);
789  }
790  }
791  }
792  }
793  else if( fTheKernel == "Polynomial" ){
794  if (fTune == "All"){
795  tuneParameters.insert(std::pair<TString,Interval*>("Order", new Interval(1,10,10)));
796  tuneParameters.insert(std::pair<TString,Interval*>("Theta", new Interval(0.01,1.,100)));
797  tuneParameters.insert(std::pair<TString,Interval*>("C", new Interval(0.01,1.,100)));
798  }
799  else{
800  for(iter=optVars.begin(); iter!=optVars.end(); iter++){
801  if( iter->first == "Theta" || iter->first == "C"){
802  tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
803  }
804  else if( iter->first == "Order"){
805  tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
806  }
807  else{
808  Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
809  exit(1);
810  }
811  }
812  }
813  }
814  else if( fTheKernel == "MultiGauss" ){
815  if (fTune == "All"){
816  for(int i=0; i<fNumVars; i++){
817  stringstream s;
818  s << fVarNames.at(i);
819  string str = "Gamma_" + s.str();
820  tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
821  }
822  tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
823  } else {
824  for(iter=optVars.begin(); iter!=optVars.end(); iter++){
825  if( iter->first == "GammaList"){
826  for(int j=0; j<fNumVars; j++){
827  stringstream s;
828  s << fVarNames.at(j);
829  string str = "Gamma_" + s.str();
830  tuneParameters.insert(std::pair<TString,Interval*>(str, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
831  }
832  }
833  else if( iter->first == "C"){
834  tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
835  }
836  else{
837  Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
838  exit(1);
839  }
840  }
841  }
842  }
843  else if( fTheKernel == "Prod" ){
844  std::stringstream tempstring(fMultiKernels);
845  std::string value;
846  while (std::getline(tempstring,value,'*')){
847  if(value == "RBF"){
848  tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
849  }
850  else if(value == "MultiGauss"){
851  for(int i=0; i<fNumVars; i++){
852  stringstream s;
853  s << fVarNames.at(i);
854  string str = "Gamma_" + s.str();
855  tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
856  }
857  }
858  else if(value == "Polynomial"){
859  tuneParameters.insert(std::pair<TString,Interval*>("Order",new Interval(1,10,10)));
860  tuneParameters.insert(std::pair<TString,Interval*>("Theta",new Interval(0.0,1.0,101)));
861  }
862  else {
863  Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
864  exit(1);
865  }
866  }
867  tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
868  }
869  else if( fTheKernel == "Sum" ){
870  std::stringstream tempstring(fMultiKernels);
871  std::string value;
872  while (std::getline(tempstring,value,'+')){
873  if(value == "RBF"){
874  tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
875  }
876  else if(value == "MultiGauss"){
877  for(int i=0; i<fNumVars; i++){
878  stringstream s;
879  s << fVarNames.at(i);
880  string str = "Gamma_" + s.str();
881  tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
882  }
883  }
884  else if(value == "Polynomial"){
885  tuneParameters.insert(std::pair<TString,Interval*>("Order",new Interval(1,10,10)));
886  tuneParameters.insert(std::pair<TString,Interval*>("Theta",new Interval(0.0,1.0,101)));
887  }
888  else {
889  Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
890  exit(1);
891  }
892  }
893  tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
894  }
895  else {
896  Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
897  exit(1);
898  }
899  Log() << kINFO << " the following SVM parameters will be tuned on the respective *grid*\n" << Endl;
900  std::map<TString,TMVA::Interval*>::iterator it;
901  for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
902  Log() << kWARNING << it->first <<Endl;
903  std::ostringstream oss;
904  (it->second)->Print(oss);
905  Log()<<oss.str();
906  Log()<<Endl;
907  }
908  OptimizeConfigParameters optimize(this, tuneParameters, fomType, fitType);
909  tunedParameters=optimize.optimize();
910 
911  return tunedParameters;
912 
913 }
914 
915 ////////////////////////////////////////////////////////////////////////////////
916 /// Set the tuning parameters according to the argument
917 void TMVA::MethodSVM::SetTuneParameters(std::map<TString,Double_t> tuneParameters)
918 {
919  std::map<TString,Double_t>::iterator it;
920  if( fTheKernel == "RBF" ){
921  for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
922  Log() << kWARNING << it->first << " = " << it->second << Endl;
923  if (it->first == "Gamma"){
924  SetGamma (it->second);
925  }
926  else if(it->first == "C"){
927  SetCost (it->second);
928  }
929  else {
930  Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
931  }
932  }
933  }
934  else if( fTheKernel == "MultiGauss" ){
935  fmGamma.clear();
936  for(int i=0; i<fNumVars; i++){
937  stringstream s;
938  s << fVarNames.at(i);
939  string str = "Gamma_" + s.str();
940  Log() << kWARNING << tuneParameters.find(str)->first << " = " << tuneParameters.find(str)->second << Endl;
941  fmGamma.push_back(tuneParameters.find(str)->second);
942  }
943  for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
944  if (it->first == "C"){
945  Log() << kWARNING << it->first << " = " << it->second << Endl;
946  SetCost(it->second);
947  break;
948  }
949  }
950  }
951  else if( fTheKernel == "Polynomial" ){
952  for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
953  Log() << kWARNING << it->first << " = " << it->second << Endl;
954  if (it->first == "Order"){
955  SetOrder(it->second);
956  }
957  else if (it->first == "Theta"){
958  SetTheta(it->second);
959  }
960  else if(it->first == "C"){ SetCost (it->second);
961  }
962  else if(it->first == "Mult"){
963  SetMult(it->second);
964  }
965  else{
966  Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
967  }
968  }
969  }
970  else if( fTheKernel == "Prod" || fTheKernel == "Sum"){
971  fmGamma.clear();
972  for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
973  bool foundParam = false;
974  Log() << kWARNING << it->first << " = " << it->second << Endl;
975  for(int i=0; i<fNumVars; i++){
976  stringstream s;
977  s << fVarNames.at(i);
978  string str = "Gamma_" + s.str();
979  if(it->first == str){
980  fmGamma.push_back(it->second);
981  foundParam = true;
982  }
983  }
984  if (it->first == "Gamma"){
985  SetGamma (it->second);
986  foundParam = true;
987  }
988  else if (it->first == "Order"){
989  SetOrder (it->second);
990  foundParam = true;
991  }
992  else if (it->first == "Theta"){
993  SetTheta (it->second);
994  foundParam = true;
995  }
996  else if (it->first == "C"){ SetCost (it->second);
997  SetCost (it->second);
998  foundParam = true;
999  }
1000  else{
1001  if(!foundParam){
1002  Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
1003  }
1004  }
1005  }
1006  }
1007  else {
1008  Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
1009  exit(1);
1010  }
1011 }
1012 
1013 ////////////////////////////////////////////////////////////////////////////////
1014 /// Takes as input a string of values for multigaussian gammas and splits it, filling the
1015 /// gamma vector required by the SVKernelFunction. Example: "GammaList=0.1,0.2,0.3" would
1016 /// make a vector with Gammas of 0.1,0.2 & 0.3 corresponding to input variables 1,2 & 3
1017 /// respectively.
1018 void TMVA::MethodSVM::SetMGamma(std::string & mg){
1019  std::stringstream tempstring(mg);
1020  Float_t value;
1021  while (tempstring >> value){
1022  fmGamma.push_back(value);
1023 
1024  if (tempstring.peek() == ','){
1025  tempstring.ignore();
1026  }
1027  }
1028 }
1029 
1030 ////////////////////////////////////////////////////////////////////////////////
1031 /// Produces GammaList string for multigaussian kernel to be written to xml file
1032 void TMVA::MethodSVM::GetMGamma(const std::vector<float> & gammas){
1033  std::ostringstream tempstring;
1034  for(UInt_t i = 0; i<gammas.size(); ++i){
1035  tempstring << gammas.at(i);
1036  if(i!=(gammas.size()-1)){
1037  tempstring << ",";
1038  }
1039  }
1040  fGammaList= tempstring.str();
1041 }
1042 
1043 ////////////////////////////////////////////////////////////////////////////////
1044 /// MakeKernelList
1045 /// Function providing string manipulation for product or sum of kernels functions
1046 /// to take list of kernels specified in the booking of the method and provide a vector
1047 /// of SV kernels to iterate over in SVKernelFunction.
1048 ///
1049 /// Example:
1050 ///
1051 /// "KernelList=RBF*Polynomial" would use a product of the RBF and Polynomial
1052 /// kernels.
1053 
1054 std::vector<TMVA::SVKernelFunction::EKernelType> TMVA::MethodSVM::MakeKernelList(std::string multiKernels, TString kernel)
1055 {
1056  std::vector<TMVA::SVKernelFunction::EKernelType> kernelsList;
1057  std::stringstream tempstring(multiKernels);
1058  std::string value;
1059  if(kernel=="Prod"){
1060  while (std::getline(tempstring,value,'*')){
1061  if(value == "RBF"){ kernelsList.push_back(SVKernelFunction::kRBF);}
1062  else if(value == "MultiGauss"){
1063  kernelsList.push_back(SVKernelFunction::kMultiGauss);
1064  if(fGammas!=""){
1065  SetMGamma(fGammas);
1066  }
1067  }
1068  else if(value == "Polynomial"){ kernelsList.push_back(SVKernelFunction::kPolynomial);}
1069  else {
1070  Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
1071  exit(1);
1072  }
1073  }
1074  }
1075  else if(kernel=="Sum"){
1076  while (std::getline(tempstring,value,'+')){
1077  if(value == "RBF"){ kernelsList.push_back(SVKernelFunction::kRBF);}
1078  else if(value == "MultiGauss"){
1079  kernelsList.push_back(SVKernelFunction::kMultiGauss);
1080  if(fGammas!=""){
1081  SetMGamma(fGammas);
1082  }
1083  }
1084  else if(value == "Polynomial"){ kernelsList.push_back(SVKernelFunction::kPolynomial);}
1085  else {
1086  Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
1087  exit(1);
1088  }
1089  }
1090  }
1091  else {
1092  Log() << kWARNING << "Unable to split MultiKernels. Delimiters */+ required." << Endl;
1093  exit(1);
1094  }
1095  return kernelsList;
1096 }
1097 
1098 ////////////////////////////////////////////////////////////////////////////////
1099 /// GetTuningOptions
1100 /// Function to allow for ranges and number of steps (for scan) when optimising kernel
1101 /// function parameters. Specified when booking the method after the parameter to be
1102 /// optimised between square brackets with each value separated by ;, the first value
1103 /// is the lower limit, the second the upper limit and the third is the number of steps.
1104 /// Example: "Tune=Gamma[0.01;1.0;100]" would only tune the RBF Gamma between 0.01 and
1105 /// 100 steps.
1106 std::map< TString,std::vector<Double_t> > TMVA::MethodSVM::GetTuningOptions()
1107 {
1108  std::map< TString,std::vector<Double_t> > optVars;
1109  std::stringstream tempstring(fTune);
1110  std::string value;
1111  while (std::getline(tempstring,value,',')){
1112  unsigned first = value.find('[')+1;
1113  unsigned last = value.find_last_of(']');
1114  std::string optParam = value.substr(0,first-1);
1115  std::stringstream strNew (value.substr(first,last-first));
1116  Double_t optInterval;
1117  std::vector<Double_t> tempVec;
1118  UInt_t i = 0;
1119  while (strNew >> optInterval){
1120  tempVec.push_back(optInterval);
1121  if (strNew.peek() == ';'){
1122  strNew.ignore();
1123  }
1124  ++i;
1125  }
1126  if(i != 3 && i == tempVec.size()){
1127  if(optParam == "C" || optParam == "Gamma" || optParam == "GammaList" || optParam == "Theta"){
1128  switch(i){
1129  case 0:
1130  tempVec.push_back(0.01);
1131  case 1:
1132  tempVec.push_back(1.);
1133  case 2:
1134  tempVec.push_back(100);
1135  }
1136  }
1137  else if(optParam == "Order"){
1138  switch(i){
1139  case 0:
1140  tempVec.push_back(1);
1141  case 1:
1142  tempVec.push_back(10);
1143  case 2:
1144  tempVec.push_back(10);
1145  }
1146  }
1147  else{
1148  Log() << kWARNING << optParam << " is not a recognised tuneable parameter." << Endl;
1149  exit(1);
1150  }
1151  }
1152  optVars.insert(std::pair<TString,std::vector<Double_t> >(optParam,tempVec));
1153  }
1154  return optVars;
1155 }
1156 
1157 ////////////////////////////////////////////////////////////////////////////////
1158 /// getLoss
1159 /// Calculates loss for testing dataset. The loss function can be specified when
1160 /// booking the method, otherwise defaults to hinge loss. Currently not used however
1161 /// is accesible if required.
1162 
1164  Double_t loss = 0.0;
1165  Double_t sumW = 0.0;
1166  Double_t temp = 0.0;
1169  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1170  const Event* ev = GetEvent(ievt);
1171  Float_t v = (*mvaRes)[ievt][0];
1172  Float_t w = ev->GetWeight();
1173  if(DataInfo().IsSignal(ev)){
1174  if(lossFunction == "hinge"){
1175  temp += w*(1-v);
1176  }
1177  else if(lossFunction == "exp"){
1178  temp += w*TMath::Exp(-v);
1179  }
1180  else if(lossFunction == "binomial"){
1181  temp += w*TMath::Log(1+TMath::Exp(-2*v));
1182  }
1183  else{
1184  Log() << kWARNING << lossFunction << " is not a recognised loss function." << Endl;
1185  exit(1);
1186  }
1187  }
1188  else{
1189  if(lossFunction == "hinge"){
1190  temp += w*v;
1191  }
1192  else if(lossFunction == "exp"){
1193  temp += w*TMath::Exp(-(1-v));
1194  }
1195  else if(lossFunction == "binomial"){
1196  temp += w*TMath::Log(1+TMath::Exp(-2*(1-v)));
1197  }
1198  else{
1199  Log() << kWARNING << lossFunction << " is not a recognised loss function." << Endl;
1200  exit(1);
1201  }
1202  }
1203  sumW += w;
1204  }
1205  loss = temp/sumW;
1206 
1207  return loss;
1208 }
void Train(void)
Train SVM.
Definition: MethodSVM.cxx:281
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodSVM.cxx:635
std::vector< TMVA::SVKernelFunction::EKernelType > MakeKernelList(std::string multiKernels, TString kernel)
MakeKernelList Function providing string manipulation for product or sum of kernels functions to take...
Definition: MethodSVM.cxx:1054
Float_t fTolerance
Definition: MethodSVM.h:130
TString fTheKernel
Definition: MethodSVM.h:144
void SetMult(Double_t m)
Definition: MethodSVM.h:109
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
Definition: MethodSVM.cxx:251
Kernel for Support Vector Machine.
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Float_t fNumVars
Definition: MethodSVM.h:151
Singleton class for Global types used by TMVA.
Definition: Types.h:73
long long Long64_t
Definition: RtypesCore.h:69
Float_t fDoubleSigmaSquared
Definition: MethodSVM.h:145
Double_t Log(Double_t x)
Definition: TMath.h:649
float Float_t
Definition: RtypesCore.h:53
std::map< TString, std::vector< Double_t > > GetTuningOptions()
GetTuningOptions Function to allow for ranges and number of steps (for scan) when optimising kernel f...
Definition: MethodSVM.cxx:1106
UInt_t GetNvar() const
Definition: MethodBase.h:328
UShort_t fNSubSets
Definition: MethodSVM.h:132
MsgLogger & Log() const
Definition: Configurable.h:122
std::string fTune
Definition: MethodSVM.h:155
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
EAnalysisType
Definition: Types.h:125
Virtual base Class for all MVA method.
Definition: MethodBase.h:106
std::string fMultiKernels
Definition: MethodSVM.h:156
void Train(UInt_t nIter=1000)
train the SVM
bool fExitFromTraining
Definition: MethodBase.h:431
std::vector< TMVA::SVEvent * > * fInputData
Definition: MethodSVM.h:136
Basic string class.
Definition: TString.h:129
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:378
Float_t fTheta
Definition: MethodSVM.h:147
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::string fGammas
Definition: MethodSVM.h:153
void AddWeightsXMLTo(void *parent) const
write configuration to xml file
Definition: MethodSVM.cxx:398
#define NULL
Definition: RtypesCore.h:88
void ProcessOptions()
option post processing (if necessary)
Definition: MethodSVM.cxx:268
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:308
void setCompatibilityParams(EKernelType k, UInt_t order, Float_t theta, Float_t kappa)
set old options for compatibility mode
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1135
std::vector< TString > fVarNames
Definition: MethodSVM.h:152
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:422
TVectorD * fMinVars
Definition: MethodSVM.h:140
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
Set the tuning parameters according to the argument.
Definition: MethodSVM.cxx:917
Float_t fCost
Definition: MethodSVM.h:129
SMO Platt&#39;s SVM classifier with Keerthi & Shavade improvements.
Definition: MethodSVM.h:57
Float_t fKappa
Definition: MethodSVM.h:148
TStopwatch timer
Definition: pirndm.C:37
std::vector< Float_t > fmGamma
Definition: MethodSVM.h:150
void SetOrder(Double_t o)
Definition: MethodSVM.h:106
const Event * GetEvent() const
Definition: MethodBase.h:733
DataSet * Data() const
Definition: MethodBase.h:393
void GetMGamma(const std::vector< float > &gammas)
Produces GammaList string for multigaussian kernel to be written to xml file.
Definition: MethodSVM.cxx:1032
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1161
TVectorD * fMaxVars
Definition: MethodSVM.h:141
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:340
DataSetInfo & DataInfo() const
Definition: MethodBase.h:394
Bool_t DoRegression() const
Definition: MethodBase.h:422
Class that contains all the data information.
Definition: DataSetInfo.h:60
Float_t fBparm
Definition: MethodSVM.h:133
void DeclareOptions()
declare options available for this method
Definition: MethodSVM.cxx:220
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
TVectorT< Double_t > TVectorD
Definition: TVectorDfwd.h:22
UInt_t fIPyCurrentIter
Definition: MethodBase.h:432
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Definition: TNamed.cxx:119
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:401
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:341
void SetGamma(Double_t g)
Definition: MethodSVM.h:103
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:97
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodSVM.cxx:430
void ReadWeightsFromStream(std::istream &istr)
Definition: MethodSVM.cxx:513
MethodSVM(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
Definition: MethodSVM.cxx:90
SVector< double, 2 > v
Definition: Dict.h:5
const char * GetName() const
Definition: MethodBase.h:318
std::map< TString, Double_t > optimize()
The TMVA::Interval Class.
Definition: Interval.h:61
void Init(void)
default initialisation
Definition: MethodSVM.cxx:205
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particular Method instance.
Definition: DataSet.cxx:316
UInt_t fIPyMaxIter
Definition: MethodBase.h:432
SVKernelFunction * fSVKernelFunction
Definition: MethodSVM.h:138
void SetTheta(Double_t t)
Definition: MethodSVM.h:107
unsigned int UInt_t
Definition: RtypesCore.h:42
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
TString fLoss
Definition: MethodSVM.h:159
const TString & GetMethodName() const
Definition: MethodBase.h:315
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:360
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:290
Tools & gTools()
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="Minuit")
Optimize Tuning Parameters This is used to optimise the kernel function parameters and cost...
Definition: MethodSVM.cxx:760
Event class for Support Vector Machine.
Definition: SVEvent.h:40
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
SVM can handle classification with 2 classes and regression with one regression-target.
Definition: MethodSVM.cxx:195
void SetNormalised(Bool_t norm)
Definition: MethodBase.h:479
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
Definition: MethodSVM.cxx:577
std::vector< TMVA::SVEvent * > * fSupportVectors
Definition: MethodSVM.h:137
const Bool_t kFALSE
Definition: RtypesCore.h:92
void WriteWeightsToStream(TFile &fout) const
TODO write IT write training sample (TTree) to file.
Definition: MethodSVM.cxx:507
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:668
Double_t Exp(Double_t x)
Definition: TMath.h:622
#define ClassImp(name)
Definition: Rtypes.h:336
double Double_t
Definition: RtypesCore.h:55
Double_t getLoss(TString lossFunction)
getLoss Calculates loss for testing dataset.
Definition: MethodSVM.cxx:1163
std::vector< TMVA::SVEvent * > * GetSupportVectors()
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:414
int type
Definition: TGX11.cxx:120
void SetCost(Double_t c)
Definition: MethodSVM.h:104
void WriteTVectorDToXML(void *node, const char *name, TVectorD *vec)
Definition: Tools.cxx:1270
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1173
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
void ExitFromTraining()
Definition: MethodBase.h:446
void GetHelpMessage() const
get help message text
Definition: MethodSVM.cxx:715
Working class for Support Vector Machine.
Definition: SVWorkingSet.h:42
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
IPythonInteractive * fInteractive
Definition: MethodBase.h:430
virtual ~MethodSVM(void)
destructor
Definition: MethodSVM.cxx:161
TString GetMethodTypeName() const
Definition: MethodBase.h:316
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
std::string fGammaList
Definition: MethodSVM.h:154
Float_t fGamma
Definition: MethodSVM.h:134
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:215
Bool_t IsSignal(const Event *ev) const
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:579
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:421
Definition: first.py:1
void ReadTVectorDFromXML(void *node, const char *name, TVectorD *vec)
Definition: Tools.cxx:1278
UInt_t fMaxIter
Definition: MethodSVM.h:131
const Bool_t kTRUE
Definition: RtypesCore.h:91
void Reset(void)
Definition: MethodSVM.cxx:174
void SetIPythonInteractive(bool *ExitFromTraining, UInt_t *fIPyCurrentIter_)
Definition: SVWorkingSet.h:65
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
void SetMGamma(std::string &mg)
Takes as input a string of values for multigaussian gammas and splits it, filling the gamma vector re...
Definition: MethodSVM.cxx:1018
std::vector< VariableInfo > & GetVariableInfos()
Definition: DataSetInfo.h:94
SVWorkingSet * fWgSet
Definition: MethodSVM.h:135
Class that is the base-class for a vector of result.
Float_t Evaluate(SVEvent *ev1, SVEvent *ev2)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:829
const std::vector< Float_t > & GetRegressionValues()
Definition: MethodSVM.cxx:602