Logo ROOT  
Reference Guide
MethodSVM.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Marcin Wolter, Andrzej Zemla
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodSVM *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation *
12 * *
13 * Authors (alphabetical): *
14 * Marcin Wolter <Marcin.Wolter@cern.ch> - IFJ PAN, Krakow, Poland *
15 * Andrzej Zemla <azemla@cern.ch> - IFJ PAN, Krakow, Poland *
16 * (IFJ PAN: Henryk Niewodniczanski Inst. Nucl. Physics, Krakow, Poland) *
17 * *
18 * Introduction of regression by: *
19 * Krzysztof Danielowski <danielow@cern.ch> - IFJ PAN & AGH, Krakow, Poland *
20 * Kamil Kraszewski <kalq@cern.ch> - IFJ PAN & UJ, Krakow, Poland *
21 * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Krakow, Poland *
22 * *
23 * Introduction of kernel parameter optimisation *
24 * and additional kernel functions by: *
25 * Adrian Bevan <adrian.bevan@cern.ch> - Queen Mary *
26 * University of London, UK *
27 * Tom Stevenson <thomas.james.stevenson@cern.ch> - Queen Mary *
28 * University of London, UK *
29 * *
30 * Copyright (c) 2005: *
31 * CERN, Switzerland *
32 * MPI-K Heidelberg, Germany *
33 * PAN, Krakow, Poland *
34 * *
35 * Redistribution and use in source and binary forms, with or without *
36 * modification, are permitted according to the terms listed in LICENSE *
37 * (http://tmva.sourceforge.net/LICENSE) *
38 **********************************************************************************/
39
40/*! \class TMVA::MethodSVM
41\ingroup TMVA
42SMO Platt's SVM classifier with Keerthi & Shavade improvements
43*/
44
45#include "TMVA/MethodSVM.h"
46
47#include "TMVA/Tools.h"
48#include "TMVA/Timer.h"
49
50#include "TMVA/SVWorkingSet.h"
51
52#include "TMVA/SVEvent.h"
53
55
57#include "TMVA/Configurable.h"
58#include "TMVA/DataSet.h"
59#include "TMVA/DataSetInfo.h"
60#include "TMVA/Event.h"
61#include "TMVA/IMethod.h"
62#include "TMVA/MethodBase.h"
63#include "TMVA/MsgLogger.h"
64#include "TMVA/Types.h"
65#include "TMVA/Interval.h"
67#include "TMVA/Results.h"
69#include "TMVA/VariableInfo.h"
70
71#include "TFile.h"
72#include "TVectorD.h"
73#include "TMath.h"
74
75#include <iostream>
76#include <string>
77
78using std::vector;
79using std::string;
80using std::stringstream;
81
82//const Int_t basketsize__ = 1280000;
84
86
87////////////////////////////////////////////////////////////////////////////////
88/// standard constructor
89
90 TMVA::MethodSVM::MethodSVM( const TString& jobName, const TString& methodTitle, DataSetInfo& theData,
91 const TString& theOption )
92 : MethodBase( jobName, Types::kSVM, methodTitle, theData, theOption)
93 , fCost(0)
94 , fTolerance(0)
95 , fMaxIter(0)
96 , fNSubSets(0)
97 , fBparm(0)
98 , fGamma(0)
99 , fWgSet(0)
100 , fInputData(0)
101 , fSupportVectors(0)
102 , fSVKernelFunction(0)
103 , fMinVars(0)
104 , fMaxVars(0)
105 , fDoubleSigmaSquared(0)
106 , fOrder(0)
107 , fTheta(0)
108 , fKappa(0)
109 , fMult(0)
110 ,fNumVars(0)
111 , fGammas("")
112 , fGammaList("")
113 , fDataSize(0)
114 , fLoss(0)
115{
116 fVarNames.clear();
117 fNumVars = theData.GetVariableInfos().size();
118 for( int i=0; i<fNumVars; i++){
119 fVarNames.push_back(theData.GetVariableInfos().at(i).GetTitle());
120 }
121}
122
123////////////////////////////////////////////////////////////////////////////////
124/// constructor from weight file
125
126TMVA::MethodSVM::MethodSVM( DataSetInfo& theData, const TString& theWeightFile)
127 : MethodBase( Types::kSVM, theData, theWeightFile)
128 , fCost(0)
129 , fTolerance(0)
130 , fMaxIter(0)
131 , fNSubSets(0)
132 , fBparm(0)
133 , fGamma(0)
134 , fWgSet(0)
135 , fInputData(0)
136 , fSupportVectors(0)
137 , fSVKernelFunction(0)
138 , fMinVars(0)
139 , fMaxVars(0)
140 , fDoubleSigmaSquared(0)
141 , fOrder(0)
142 , fTheta(0)
143 , fKappa(0)
144 , fMult(0)
145 , fNumVars(0)
146 , fGammas("")
147 , fGammaList("")
148 , fDataSize(0)
149 , fLoss(0)
150{
151 fVarNames.clear();
152 fNumVars = theData.GetVariableInfos().size();
153 for( int i=0;i<fNumVars; i++){
154 fVarNames.push_back(theData.GetVariableInfos().at(i).GetTitle());
155 }
156}
157
158////////////////////////////////////////////////////////////////////////////////
159/// destructor
160
162{
163 fSupportVectors->clear();
164 for (UInt_t i=0; i<fInputData->size(); i++) {
165 delete fInputData->at(i);
166 }
167 if (fWgSet !=0) { delete fWgSet; fWgSet=0; }
168 if (fSVKernelFunction !=0 ) { delete fSVKernelFunction; fSVKernelFunction = 0; }
169}
170
171////////////////////////////////////////////////////////////////////////////////
172// reset the method, as if it had just been instantiated (forget all training etc.)
173
175{
176 // reset the method, as if it had just been instantiated (forget all training etc.)
177 fSupportVectors->clear();
178 for (UInt_t i=0; i<fInputData->size(); i++){
179 delete fInputData->at(i);
180 fInputData->at(i)=0;
181 }
182 fInputData->clear();
183 if (fWgSet !=0) { fWgSet=0; }
184 if (fSVKernelFunction !=0 ) { fSVKernelFunction = 0; }
185 if (Data()){
186 Data()->DeleteResults(GetMethodName(), Types::kTraining, GetAnalysisType());
187 }
188
189 Log() << kDEBUG << " successfully(?) reset the method " << Endl;
190}
191
192////////////////////////////////////////////////////////////////////////////////
193/// SVM can handle classification with 2 classes and regression with one regression-target
194
196{
197 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
198 if (type == Types::kRegression && numberTargets == 1) return kTRUE;
199 return kFALSE;
200}
201
202////////////////////////////////////////////////////////////////////////////////
203/// default initialisation
204
206{
207 // SVM always uses normalised input variables
208 SetNormalised( kTRUE );
209
210 // Helge: do not book a event vector of given size but rather fill the vector
211 // later with pus_back. Anyway, this is NOT what is time consuming in
212 // SVM and it allows to skip totally events with weights == 0 ;)
213 fInputData = new std::vector<TMVA::SVEvent*>(0);
214 fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
215}
216
217////////////////////////////////////////////////////////////////////////////////
218/// declare options available for this method
219
221{
222 DeclareOptionRef( fTheKernel = "RBF", "Kernel", "Pick which kernel ( RBF or MultiGauss )");
223 // for gaussian kernel parameter(s)
224 DeclareOptionRef( fGamma = 1., "Gamma", "RBF kernel parameter: Gamma (size of the Kernel)");
225 // for polynomial kernel parameter(s)
226 DeclareOptionRef( fOrder = 3, "Order", "Polynomial Kernel parameter: polynomial order");
227 DeclareOptionRef( fTheta = 1., "Theta", "Polynomial Kernel parameter: polynomial theta");
228 // for multi-gaussian kernel parameter(s)
229 DeclareOptionRef( fGammas = "", "GammaList", "MultiGauss parameters" );
230
231 // for range and step number for kernel parameter optimisation
232 DeclareOptionRef( fTune = "All", "Tune", "Tune Parameters");
233 // for list of kernels to be used with product or sum kernel
234 DeclareOptionRef( fMultiKernels = "None", "KernelList", "Sum or product of kernels");
235 DeclareOptionRef( fLoss = "hinge", "Loss", "Loss function");
236
237 DeclareOptionRef( fCost, "C", "Cost parameter" );
238 if (DoRegression()) {
239 fCost = 0.002;
240 }else{
241 fCost = 1.;
242 }
243 DeclareOptionRef( fTolerance = 0.01, "Tol", "Tolerance parameter" ); //should be fixed
244 DeclareOptionRef( fMaxIter = 1000, "MaxIter", "Maximum number of training loops" );
245
246}
247
248////////////////////////////////////////////////////////////////////////////////
249/// options that are used ONLY for the READER to ensure backward compatibility
250
252{
254 DeclareOptionRef( fNSubSets = 1, "NSubSets", "Number of training subsets" );
255 DeclareOptionRef( fTheKernel = "Gauss", "Kernel", "Uses kernel function");
256 // for gaussian kernel parameter(s)
257 DeclareOptionRef( fDoubleSigmaSquared = 2., "Sigma", "Kernel parameter: sigma");
258 // for polynomial kernel parameter(s)
259 DeclareOptionRef( fOrder = 3, "Order", "Polynomial Kernel parameter: polynomial order");
260 // for sigmoid kernel parameters
261 DeclareOptionRef( fTheta = 1., "Theta", "Sigmoid Kernel parameter: theta");
262 DeclareOptionRef( fKappa = 1., "Kappa", "Sigmoid Kernel parameter: kappa");
263}
264
265////////////////////////////////////////////////////////////////////////////////
266/// option post processing (if necessary)
267
269{
270 if (IgnoreEventsWithNegWeightsInTraining()) {
271 Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
272 << GetMethodTypeName()
273 << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
274 << Endl;
275 }
276}
277
278////////////////////////////////////////////////////////////////////////////////
279/// Train SVM
280
282{
283 fIPyMaxIter = fMaxIter;
284 Data()->SetCurrentType(Types::kTraining);
285
286 Log() << kDEBUG << "Create event vector"<< Endl;
287
288 fDataSize = Data()->GetNEvents();
289 Int_t nSignal = Data()->GetNEvtSigTrain();
290 Int_t nBackground = Data()->GetNEvtBkgdTrain();
291 Double_t CSig;
292 Double_t CBkg;
293
294 // Use number of signal and background from above to weight the cost parameter
295 // so that the training is not biased towards the larger dataset when the signal
296 // and background samples are significantly different sizes.
297 if(nSignal < nBackground){
298 CSig = fCost;
299 CBkg = CSig*((double)nSignal/nBackground);
300 }
301 else{
302 CBkg = fCost;
303 CSig = CBkg*((double)nSignal/nBackground);
304 }
305
306 // Loop over events and assign the correct cost parameter.
307 for (Int_t ievnt=0; ievnt<Data()->GetNEvents(); ievnt++){
308 if (GetEvent(ievnt)->GetWeight() != 0){
309 if(DataInfo().IsSignal(GetEvent(ievnt))){
310 fInputData->push_back(new SVEvent(GetEvent(ievnt), CSig, DataInfo().IsSignal\
311 (GetEvent(ievnt))));
312 }
313 else{
314 fInputData->push_back(new SVEvent(GetEvent(ievnt), CBkg, DataInfo().IsSignal\
315 (GetEvent(ievnt))));
316 }
317 }
318 }
319
320 // Set the correct kernel function.
321 // Here we only use valid Mercer kernels. In the literature some people have reported reasonable
322 // results using Sigmoid kernel function however that is not a valid Mercer kernel and is not used here.
323 if( fTheKernel == "RBF"){
324 fSVKernelFunction = new SVKernelFunction( SVKernelFunction::kRBF, fGamma);
325 }
326 else if( fTheKernel == "MultiGauss" ){
327 if(fGammas!=""){
328 SetMGamma(fGammas);
329 fGammaList=fGammas;
330 }
331 else{
332 if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
333 else{
334 for(Int_t ngammas=0; ngammas<fNumVars; ++ngammas){
335 fmGamma.push_back(1.0);
336 }
337 GetMGamma(fmGamma);
338 }
339 }
340 fSVKernelFunction = new SVKernelFunction(fmGamma);
341 }
342 else if( fTheKernel == "Polynomial" ){
343 fSVKernelFunction = new SVKernelFunction( SVKernelFunction::kPolynomial, fOrder,fTheta);
344 }
345 else if( fTheKernel == "Prod" ){
346 if(fGammas!=""){
347 SetMGamma(fGammas);
348 fGammaList=fGammas;
349 }
350 else{
351 if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
352 }
353 fSVKernelFunction = new SVKernelFunction( SVKernelFunction::kProd, MakeKernelList(fMultiKernels,fTheKernel), fmGamma, fGamma, fOrder, fTheta );
354 }
355 else if( fTheKernel == "Sum" ){
356 if(fGammas!=""){
357 SetMGamma(fGammas);
358 fGammaList=fGammas;
359 }
360 else{
361 if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
362 }
363 fSVKernelFunction = new SVKernelFunction( SVKernelFunction::kSum, MakeKernelList(fMultiKernels,fTheKernel), fmGamma, fGamma, fOrder, fTheta );
364 }
365 else {
366 Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
367 exit(1);
368 }
369
370 Log()<< kINFO << "Building SVM Working Set...with "<<fInputData->size()<<" event instances"<< Endl;
371 Timer bldwstime( GetName());
372 fWgSet = new SVWorkingSet( fInputData, fSVKernelFunction,fTolerance, DoRegression() );
373 Log() << kINFO <<"Elapsed time for Working Set build: "<< bldwstime.GetElapsedTime()<<Endl;
374
375 // timing
376 Timer timer( GetName() );
377 Log() << kINFO << "Sorry, no computing time forecast available for SVM, please wait ..." << Endl;
378
379 if (fInteractive) fWgSet->SetIPythonInteractive(&fExitFromTraining, &fIPyCurrentIter);
380
381 fWgSet->Train(fMaxIter);
382
383 Log() << kINFO << "Elapsed time: " << timer.GetElapsedTime()
384 << " " << Endl;
385
386 fBparm = fWgSet->GetBpar();
387 fSupportVectors = fWgSet->GetSupportVectors();
388 delete fWgSet;
389 fWgSet=0;
390
391 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
392 ExitFromTraining();
393}
394
395////////////////////////////////////////////////////////////////////////////////
396/// write configuration to xml file
397
398void TMVA::MethodSVM::AddWeightsXMLTo( void* parent ) const
399{
400 void* wght = gTools().AddChild(parent, "Weights");
401 gTools().AddAttr(wght,"fBparm",fBparm);
402 gTools().AddAttr(wght,"fGamma",fGamma);
403 gTools().AddAttr(wght,"fGammaList",fGammaList);
404 gTools().AddAttr(wght,"fTheta",fTheta);
405 gTools().AddAttr(wght,"fOrder",fOrder);
406 gTools().AddAttr(wght,"NSupVec",fSupportVectors->size());
407
408 for (std::vector<TMVA::SVEvent*>::iterator veciter=fSupportVectors->begin();
409 veciter!=fSupportVectors->end() ; ++veciter ) {
410 TVectorD temp(GetNvar()+4);
411 temp[0] = (*veciter)->GetNs();
412 temp[1] = (*veciter)->GetTypeFlag();
413 temp[2] = (*veciter)->GetAlpha();
414 temp[3] = (*veciter)->GetAlpha_p();
415 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
416 temp[ivar+4] = (*(*veciter)->GetDataVector())[ivar];
417 gTools().WriteTVectorDToXML(wght,"SupportVector",&temp);
418 }
419 // write max/min data values
420 void* maxnode = gTools().AddChild(wght, "Maxima");
421 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
422 gTools().AddAttr(maxnode, "Var"+gTools().StringFromInt(ivar), GetXmax(ivar));
423 void* minnode = gTools().AddChild(wght, "Minima");
424 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
425 gTools().AddAttr(minnode, "Var"+gTools().StringFromInt(ivar), GetXmin(ivar));
426}
427
428////////////////////////////////////////////////////////////////////////////////
429
431{
432 gTools().ReadAttr( wghtnode, "fBparm",fBparm );
433 gTools().ReadAttr( wghtnode, "fGamma",fGamma);
434 gTools().ReadAttr( wghtnode, "fGammaList",fGammaList);
435 gTools().ReadAttr( wghtnode, "fOrder",fOrder);
436 gTools().ReadAttr( wghtnode, "fTheta",fTheta);
437 UInt_t fNsupv=0;
438 gTools().ReadAttr( wghtnode, "NSupVec",fNsupv );
439
440 Float_t alpha=0.;
441 Float_t alpha_p = 0.;
442
443 Int_t typeFlag=-1;
444 // UInt_t ns = 0;
445 std::vector<Float_t>* svector = new std::vector<Float_t>(GetNvar());
446
447 if (fMaxVars!=0) delete fMaxVars;
448 fMaxVars = new TVectorD( GetNvar() );
449 if (fMinVars!=0) delete fMinVars;
450 fMinVars = new TVectorD( GetNvar() );
451 if (fSupportVectors!=0) {
452 for (vector< SVEvent* >::iterator it = fSupportVectors->begin(); it!=fSupportVectors->end(); ++it)
453 delete *it;
454 delete fSupportVectors;
455 }
456 fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
457 void* supportvectornode = gTools().GetChild(wghtnode);
458 for (UInt_t ievt = 0; ievt < fNsupv; ievt++) {
459 TVectorD temp(GetNvar()+4);
460 gTools().ReadTVectorDFromXML(supportvectornode,"SupportVector",&temp);
461 // ns=(UInt_t)temp[0];
462 typeFlag=(int)temp[1];
463 alpha=temp[2];
464 alpha_p=temp[3];
465 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) (*svector)[ivar]=temp[ivar+4];
466
467 fSupportVectors->push_back(new SVEvent(svector,alpha,alpha_p,typeFlag));
468 supportvectornode = gTools().GetNextChild(supportvectornode);
469 }
470
471 void* maxminnode = supportvectornode;
472 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
473 gTools().ReadAttr( maxminnode,"Var"+gTools().StringFromInt(ivar),(*fMaxVars)[ivar]);
474 maxminnode = gTools().GetNextChild(maxminnode);
475 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
476 gTools().ReadAttr( maxminnode,"Var"+gTools().StringFromInt(ivar),(*fMinVars)[ivar]);
477 if (fSVKernelFunction!=0) delete fSVKernelFunction;
478 if( fTheKernel == "RBF" ){
479 fSVKernelFunction = new SVKernelFunction(SVKernelFunction::kRBF, fGamma);
480 }
481 else if( fTheKernel == "MultiGauss" ){
482 SetMGamma(fGammaList);
483 fSVKernelFunction = new SVKernelFunction(fmGamma);
484 }
485 else if( fTheKernel == "Polynomial" ){
486 fSVKernelFunction = new SVKernelFunction(SVKernelFunction::kPolynomial, fOrder, fTheta);
487 }
488 else if( fTheKernel == "Prod" ){
489 SetMGamma(fGammaList);
490 fSVKernelFunction = new SVKernelFunction(SVKernelFunction::kSum, MakeKernelList(fMultiKernels,fTheKernel), fmGamma, fGamma, fOrder, fTheta);
491 }
492 else if( fTheKernel == "Sum" ){
493 SetMGamma(fGammaList);
494 fSVKernelFunction = new SVKernelFunction(SVKernelFunction::kSum, MakeKernelList(fMultiKernels,fTheKernel), fmGamma, fGamma, fOrder, fTheta);
495 }
496 else {
497 Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
498 exit(1);
499 }
500 delete svector;
501}
502
503////////////////////////////////////////////////////////////////////////////////
504///TODO write IT
505/// write training sample (TTree) to file
506
508{
509}
510
511////////////////////////////////////////////////////////////////////////////////
512
514{
515 if (fSupportVectors !=0) { delete fSupportVectors; fSupportVectors = 0;}
516 fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
517
518 // read configuration from input stream
519 istr >> fBparm;
520
521 UInt_t fNsupv;
522 // coverity[tainted_data_argument]
523 istr >> fNsupv;
524 fSupportVectors->reserve(fNsupv);
525
526 Float_t typeTalpha=0.;
527 Float_t alpha=0.;
528 Int_t typeFlag=-1;
529 UInt_t ns = 0;
530 std::vector<Float_t>* svector = new std::vector<Float_t>(GetNvar());
531
532 fMaxVars = new TVectorD( GetNvar() );
533 fMinVars = new TVectorD( GetNvar() );
534
535 for (UInt_t ievt = 0; ievt < fNsupv; ievt++) {
536 istr>>ns;
537 istr>>typeTalpha;
538 typeFlag = typeTalpha<0?-1:1;
539 alpha = typeTalpha<0?-typeTalpha:typeTalpha;
540 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> svector->at(ivar);
541
542 fSupportVectors->push_back(new SVEvent(svector,alpha,typeFlag,ns));
543 }
544
545 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMaxVars)[ivar];
546
547 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMinVars)[ivar];
548
549 delete fSVKernelFunction;
550 if (fTheKernel == "Gauss" ) {
551 fSVKernelFunction = new SVKernelFunction(1/fDoubleSigmaSquared);
552 }
553 else {
555 if(fTheKernel == "Linear") k = SVKernelFunction::kLinear;
556 else if (fTheKernel == "Polynomial") k = SVKernelFunction::kPolynomial;
557 else if (fTheKernel == "Sigmoid" ) k = SVKernelFunction::kSigmoidal;
558 else {
559 Log() << kFATAL <<"Unknown kernel function found in weight file!" << Endl;
560 }
561 fSVKernelFunction = new SVKernelFunction();
562 fSVKernelFunction->setCompatibilityParams(k, fOrder, fTheta, fKappa);
563 }
564 delete svector;
565}
566
567////////////////////////////////////////////////////////////////////////////////
568/// TODO write IT
569
571{
572}
573
574////////////////////////////////////////////////////////////////////////////////
575/// returns MVA value for given event
576
578{
579 Double_t myMVA = 0;
580
581 // TODO: avoid creation of a new SVEvent every time (Joerg)
582 SVEvent* ev = new SVEvent( GetEvent(), 0. ); // check for specificators
583
584 for (UInt_t ievt = 0; ievt < fSupportVectors->size() ; ievt++) {
585 myMVA += ( fSupportVectors->at(ievt)->GetAlpha()
586 * fSupportVectors->at(ievt)->GetTypeFlag()
587 * fSVKernelFunction->Evaluate( fSupportVectors->at(ievt), ev ) );
588 }
589
590 delete ev;
591
592 myMVA -= fBparm;
593
594 // cannot determine error
595 NoErrorCalc(err, errUpper);
596
597 // 08/12/09: changed sign here to make results agree with convention signal=1
598 return 1.0/(1.0 + TMath::Exp(myMVA));
599}
600////////////////////////////////////////////////////////////////////////////////
601
602const std::vector<Float_t>& TMVA::MethodSVM::GetRegressionValues()
603{
604 if( fRegressionReturnVal == NULL )
605 fRegressionReturnVal = new std::vector<Float_t>();
606 fRegressionReturnVal->clear();
607
608 Double_t myMVA = 0;
609
610 const Event *baseev = GetEvent();
611 SVEvent* ev = new SVEvent( baseev,0. ); //check for specificators
612
613 for (UInt_t ievt = 0; ievt < fSupportVectors->size() ; ievt++) {
614 myMVA += ( fSupportVectors->at(ievt)->GetDeltaAlpha()
615 *fSVKernelFunction->Evaluate( fSupportVectors->at(ievt), ev ) );
616 }
617 myMVA += fBparm;
618 Event * evT = new Event(*baseev);
619 evT->SetTarget(0,myMVA);
620
621 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
622
623 fRegressionReturnVal->push_back(evT2->GetTarget(0));
624
625 delete evT;
626
627 delete ev;
628
629 return *fRegressionReturnVal;
630}
631
632////////////////////////////////////////////////////////////////////////////////
633/// write specific classifier response
634
635void TMVA::MethodSVM::MakeClassSpecific( std::ostream& fout, const TString& className ) const
636{
637 const int fNsupv = fSupportVectors->size();
638 fout << " // not implemented for class: \"" << className << "\"" << std::endl;
639 fout << " float fBparameter;" << std::endl;
640 fout << " int fNOfSuppVec;" << std::endl;
641 fout << " static float fAllSuppVectors[][" << fNsupv << "];" << std::endl;
642 fout << " static float fAlphaTypeCoef[" << fNsupv << "];" << std::endl;
643 fout << std::endl;
644 fout << " // Kernel parameter(s) " << std::endl;
645 fout << " float fGamma;" << std::endl;
646 fout << "};" << std::endl;
647 fout << "" << std::endl;
648
649 //Initialize function definition
650 fout << "inline void " << className << "::Initialize() " << std::endl;
651 fout << "{" << std::endl;
652 fout << " fBparameter = " << fBparm << ";" << std::endl;
653 fout << " fNOfSuppVec = " << fNsupv << ";" << std::endl;
654 fout << " fGamma = " << fGamma << ";" <<std::endl;
655 fout << "}" << std::endl;
656 fout << std::endl;
657
658 // GetMvaValue__ function definition
659 fout << "inline double " << className << "::GetMvaValue__(const std::vector<double>& inputValues ) const" << std::endl;
660 fout << "{" << std::endl;
661 fout << " double mvaval = 0; " << std::endl;
662 fout << " double temp = 0; " << std::endl;
663 fout << std::endl;
664 fout << " for (int ievt = 0; ievt < fNOfSuppVec; ievt++ ){" << std::endl;
665 fout << " temp = 0;" << std::endl;
666 fout << " for ( unsigned int ivar = 0; ivar < GetNvar(); ivar++ ) {" << std::endl;
667
668 fout << " temp += (fAllSuppVectors[ivar][ievt] - inputValues[ivar]) " << std::endl;
669 fout << " * (fAllSuppVectors[ivar][ievt] - inputValues[ivar]); " << std::endl;
670 fout << " }" << std::endl;
671 fout << " mvaval += fAlphaTypeCoef[ievt] * exp( -fGamma * temp ); " << std::endl;
672
673 fout << " }" << std::endl;
674 fout << " mvaval -= fBparameter;" << std::endl;
675 fout << " return 1./(1. + exp(mvaval));" << std::endl;
676 fout << "}" << std::endl;
677 fout << "// Clean up" << std::endl;
678 fout << "inline void " << className << "::Clear() " << std::endl;
679 fout << "{" << std::endl;
680 fout << " // nothing to clear " << std::endl;
681 fout << "}" << std::endl;
682 fout << "" << std::endl;
683
684 // define support vectors
685 fout << "float " << className << "::fAlphaTypeCoef[] =" << std::endl;
686 fout << "{ ";
687 for (Int_t isv = 0; isv < fNsupv; isv++) {
688 fout << fSupportVectors->at(isv)->GetDeltaAlpha() * fSupportVectors->at(isv)->GetTypeFlag();
689 if (isv < fNsupv-1) fout << ", ";
690 }
691 fout << " };" << std::endl << std::endl;
692
693 fout << "float " << className << "::fAllSuppVectors[][" << fNsupv << "] =" << std::endl;
694 fout << "{";
695 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
696 fout << std::endl;
697 fout << " { ";
698 for (Int_t isv = 0; isv < fNsupv; isv++){
699 fout << fSupportVectors->at(isv)->GetDataVector()->at(ivar);
700 if (isv < fNsupv-1) fout << ", ";
701 }
702 fout << " }";
703 if (ivar < GetNvar()-1) fout << ", " << std::endl;
704 else fout << std::endl;
705 }
706 fout << "};" << std::endl<< std::endl;
707}
708
709////////////////////////////////////////////////////////////////////////////////
710/// get help message text
711///
712/// typical length of text line:
713/// "|--------------------------------------------------------------|"
714
716{
717 Log() << Endl;
718 Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
719 Log() << Endl;
720 Log() << "The Support Vector Machine (SVM) builds a hyperplane separating" << Endl;
721 Log() << "signal and background events (vectors) using the minimal subset of " << Endl;
722 Log() << "all vectors used for training (support vectors). The extension to" << Endl;
723 Log() << "the non-linear case is performed by mapping input vectors into a " << Endl;
724 Log() << "higher-dimensional feature space in which linear separation is " << Endl;
725 Log() << "possible. The use of the kernel functions thereby eliminates the " << Endl;
726 Log() << "explicit transformation to the feature space. The implemented SVM " << Endl;
727 Log() << "algorithm performs the classification tasks using linear, polynomial, " << Endl;
728 Log() << "Gaussian and sigmoidal kernel functions. The Gaussian kernel allows " << Endl;
729 Log() << "to apply any discriminant shape in the input space." << Endl;
730 Log() << Endl;
731 Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
732 Log() << Endl;
733 Log() << "SVM is a general purpose non-linear classification method, which " << Endl;
734 Log() << "does not require data preprocessing like decorrelation or Principal " << Endl;
735 Log() << "Component Analysis. It generalises quite well and can handle analyses " << Endl;
736 Log() << "with large numbers of input variables." << Endl;
737 Log() << Endl;
738 Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
739 Log() << Endl;
740 Log() << "Optimal performance requires primarily a proper choice of the kernel " << Endl;
741 Log() << "parameters (the width \"Sigma\" in case of Gaussian kernel) and the" << Endl;
742 Log() << "cost parameter \"C\". The user must optimise them empirically by running" << Endl;
743 Log() << "SVM several times with different parameter sets. The time needed for " << Endl;
744 Log() << "each evaluation scales like the square of the number of training " << Endl;
745 Log() << "events so that a coarse preliminary tuning should be performed on " << Endl;
746 Log() << "reduced data sets." << Endl;
747}
748
749////////////////////////////////////////////////////////////////////////////////
750/// Optimize Tuning Parameters
751/// This is used to optimise the kernel function parameters and cost. All kernel parameters
752/// are optimised by default with default ranges, however the parameters to be optimised can
753/// be set when booking the method with the option Tune.
754///
755/// Example:
756///
757/// "Tune=Gamma[0.01;1.0;100]" would only tune the RBF Gamma between 0.01 and 1.0
758/// with 100 steps.
759
760std::map<TString,Double_t> TMVA::MethodSVM::OptimizeTuningParameters(TString fomType, TString fitType)
761{
762 // Call the Optimizer with the set of kernel parameters and ranges that are meant to be tuned.
763 std::map< TString,std::vector<Double_t> > optVars;
764 // Get parameters and options specified in booking of method.
765 if(fTune != "All"){
766 optVars= GetTuningOptions();
767 }
768 std::map< TString,std::vector<Double_t> >::iterator iter;
769 // Fill all the tuning parameters that should be optimized into a map
770 std::map<TString,TMVA::Interval*> tuneParameters;
771 std::map<TString,Double_t> tunedParameters;
772 // Note: the 3rd parameter in the interval is the "number of bins", NOT the stepsize!!
773 // The actual values are always read from the middle of the bins.
774 Log() << kINFO << "Using the " << fTheKernel << " kernel." << Endl;
775 // Setup map of parameters based on the specified options or defaults.
776 if( fTheKernel == "RBF" ){
777 if(fTune == "All"){
778 tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
779 tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
780 }
781 else{
782 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
783 if( iter->first == "Gamma" || iter->first == "C"){
784 tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
785 }
786 else{
787 Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
788 exit(1);
789 }
790 }
791 }
792 }
793 else if( fTheKernel == "Polynomial" ){
794 if (fTune == "All"){
795 tuneParameters.insert(std::pair<TString,Interval*>("Order", new Interval(1,10,10)));
796 tuneParameters.insert(std::pair<TString,Interval*>("Theta", new Interval(0.01,1.,100)));
797 tuneParameters.insert(std::pair<TString,Interval*>("C", new Interval(0.01,1.,100)));
798 }
799 else{
800 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
801 if( iter->first == "Theta" || iter->first == "C"){
802 tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
803 }
804 else if( iter->first == "Order"){
805 tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
806 }
807 else{
808 Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
809 exit(1);
810 }
811 }
812 }
813 }
814 else if( fTheKernel == "MultiGauss" ){
815 if (fTune == "All"){
816 for(int i=0; i<fNumVars; i++){
817 stringstream s;
818 s << fVarNames.at(i);
819 string str = "Gamma_" + s.str();
820 tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
821 }
822 tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
823 } else {
824 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
825 if( iter->first == "GammaList"){
826 for(int j=0; j<fNumVars; j++){
827 stringstream s;
828 s << fVarNames.at(j);
829 string str = "Gamma_" + s.str();
830 tuneParameters.insert(std::pair<TString,Interval*>(str, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
831 }
832 }
833 else if( iter->first == "C"){
834 tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
835 }
836 else{
837 Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
838 exit(1);
839 }
840 }
841 }
842 }
843 else if( fTheKernel == "Prod" ){
844 std::stringstream tempstring(fMultiKernels);
845 std::string value;
846 while (std::getline(tempstring,value,'*')){
847 if(value == "RBF"){
848 tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
849 }
850 else if(value == "MultiGauss"){
851 for(int i=0; i<fNumVars; i++){
852 stringstream s;
853 s << fVarNames.at(i);
854 string str = "Gamma_" + s.str();
855 tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
856 }
857 }
858 else if(value == "Polynomial"){
859 tuneParameters.insert(std::pair<TString,Interval*>("Order",new Interval(1,10,10)));
860 tuneParameters.insert(std::pair<TString,Interval*>("Theta",new Interval(0.0,1.0,101)));
861 }
862 else {
863 Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
864 exit(1);
865 }
866 }
867 tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
868 }
869 else if( fTheKernel == "Sum" ){
870 std::stringstream tempstring(fMultiKernels);
871 std::string value;
872 while (std::getline(tempstring,value,'+')){
873 if(value == "RBF"){
874 tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
875 }
876 else if(value == "MultiGauss"){
877 for(int i=0; i<fNumVars; i++){
878 stringstream s;
879 s << fVarNames.at(i);
880 string str = "Gamma_" + s.str();
881 tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
882 }
883 }
884 else if(value == "Polynomial"){
885 tuneParameters.insert(std::pair<TString,Interval*>("Order",new Interval(1,10,10)));
886 tuneParameters.insert(std::pair<TString,Interval*>("Theta",new Interval(0.0,1.0,101)));
887 }
888 else {
889 Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
890 exit(1);
891 }
892 }
893 tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
894 }
895 else {
896 Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
897 exit(1);
898 }
899 Log() << kINFO << " the following SVM parameters will be tuned on the respective *grid*\n" << Endl;
900 std::map<TString,TMVA::Interval*>::iterator it;
901 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
902 Log() << kWARNING << it->first <<Endl;
903 std::ostringstream oss;
904 (it->second)->Print(oss);
905 Log()<<oss.str();
906 Log()<<Endl;
907 }
908 OptimizeConfigParameters optimize(this, tuneParameters, fomType, fitType);
909 tunedParameters=optimize.optimize();
910
911 return tunedParameters;
912
913}
914
915////////////////////////////////////////////////////////////////////////////////
916/// Set the tuning parameters according to the argument
917void TMVA::MethodSVM::SetTuneParameters(std::map<TString,Double_t> tuneParameters)
918{
919 std::map<TString,Double_t>::iterator it;
920 if( fTheKernel == "RBF" ){
921 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
922 Log() << kWARNING << it->first << " = " << it->second << Endl;
923 if (it->first == "Gamma"){
924 SetGamma (it->second);
925 }
926 else if(it->first == "C"){
927 SetCost (it->second);
928 }
929 else {
930 Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
931 }
932 }
933 }
934 else if( fTheKernel == "MultiGauss" ){
935 fmGamma.clear();
936 for(int i=0; i<fNumVars; i++){
937 stringstream s;
938 s << fVarNames.at(i);
939 string str = "Gamma_" + s.str();
940 Log() << kWARNING << tuneParameters.find(str)->first << " = " << tuneParameters.find(str)->second << Endl;
941 fmGamma.push_back(tuneParameters.find(str)->second);
942 }
943 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
944 if (it->first == "C"){
945 Log() << kWARNING << it->first << " = " << it->second << Endl;
946 SetCost(it->second);
947 break;
948 }
949 }
950 }
951 else if( fTheKernel == "Polynomial" ){
952 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
953 Log() << kWARNING << it->first << " = " << it->second << Endl;
954 if (it->first == "Order"){
955 SetOrder(it->second);
956 }
957 else if (it->first == "Theta"){
958 SetTheta(it->second);
959 }
960 else if(it->first == "C"){ SetCost (it->second);
961 }
962 else if(it->first == "Mult"){
963 SetMult(it->second);
964 }
965 else{
966 Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
967 }
968 }
969 }
970 else if( fTheKernel == "Prod" || fTheKernel == "Sum"){
971 fmGamma.clear();
972 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
973 bool foundParam = false;
974 Log() << kWARNING << it->first << " = " << it->second << Endl;
975 for(int i=0; i<fNumVars; i++){
976 stringstream s;
977 s << fVarNames.at(i);
978 string str = "Gamma_" + s.str();
979 if(it->first == str){
980 fmGamma.push_back(it->second);
981 foundParam = true;
982 }
983 }
984 if (it->first == "Gamma"){
985 SetGamma (it->second);
986 foundParam = true;
987 }
988 else if (it->first == "Order"){
989 SetOrder (it->second);
990 foundParam = true;
991 }
992 else if (it->first == "Theta"){
993 SetTheta (it->second);
994 foundParam = true;
995 }
996 else if (it->first == "C"){ SetCost (it->second);
997 SetCost (it->second);
998 foundParam = true;
999 }
1000 else{
1001 if(!foundParam){
1002 Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
1003 }
1004 }
1005 }
1006 }
1007 else {
1008 Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
1009 exit(1);
1010 }
1011}
1012
1013////////////////////////////////////////////////////////////////////////////////
1014/// Takes as input a string of values for multigaussian gammas and splits it, filling the
1015/// gamma vector required by the SVKernelFunction. Example: "GammaList=0.1,0.2,0.3" would
1016/// make a vector with Gammas of 0.1,0.2 & 0.3 corresponding to input variables 1,2 & 3
1017/// respectively.
1018void TMVA::MethodSVM::SetMGamma(std::string & mg){
1019 std::stringstream tempstring(mg);
1020 Float_t value;
1021 while (tempstring >> value){
1022 fmGamma.push_back(value);
1023
1024 if (tempstring.peek() == ','){
1025 tempstring.ignore();
1026 }
1027 }
1028}
1029
1030////////////////////////////////////////////////////////////////////////////////
1031/// Produces GammaList string for multigaussian kernel to be written to xml file
1032void TMVA::MethodSVM::GetMGamma(const std::vector<float> & gammas){
1033 std::ostringstream tempstring;
1034 for(UInt_t i = 0; i<gammas.size(); ++i){
1035 tempstring << gammas.at(i);
1036 if(i!=(gammas.size()-1)){
1037 tempstring << ",";
1038 }
1039 }
1040 fGammaList= tempstring.str();
1041}
1042
1043////////////////////////////////////////////////////////////////////////////////
1044/// MakeKernelList
1045/// Function providing string manipulation for product or sum of kernels functions
1046/// to take list of kernels specified in the booking of the method and provide a vector
1047/// of SV kernels to iterate over in SVKernelFunction.
1048///
1049/// Example:
1050///
1051/// "KernelList=RBF*Polynomial" would use a product of the RBF and Polynomial
1052/// kernels.
1053
1054std::vector<TMVA::SVKernelFunction::EKernelType> TMVA::MethodSVM::MakeKernelList(std::string multiKernels, TString kernel)
1055{
1056 std::vector<TMVA::SVKernelFunction::EKernelType> kernelsList;
1057 std::stringstream tempstring(multiKernels);
1058 std::string value;
1059 if(kernel=="Prod"){
1060 while (std::getline(tempstring,value,'*')){
1061 if(value == "RBF"){ kernelsList.push_back(SVKernelFunction::kRBF);}
1062 else if(value == "MultiGauss"){
1063 kernelsList.push_back(SVKernelFunction::kMultiGauss);
1064 if(fGammas!=""){
1065 SetMGamma(fGammas);
1066 }
1067 }
1068 else if(value == "Polynomial"){ kernelsList.push_back(SVKernelFunction::kPolynomial);}
1069 else {
1070 Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
1071 exit(1);
1072 }
1073 }
1074 }
1075 else if(kernel=="Sum"){
1076 while (std::getline(tempstring,value,'+')){
1077 if(value == "RBF"){ kernelsList.push_back(SVKernelFunction::kRBF);}
1078 else if(value == "MultiGauss"){
1079 kernelsList.push_back(SVKernelFunction::kMultiGauss);
1080 if(fGammas!=""){
1081 SetMGamma(fGammas);
1082 }
1083 }
1084 else if(value == "Polynomial"){ kernelsList.push_back(SVKernelFunction::kPolynomial);}
1085 else {
1086 Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
1087 exit(1);
1088 }
1089 }
1090 }
1091 else {
1092 Log() << kWARNING << "Unable to split MultiKernels. Delimiters */+ required." << Endl;
1093 exit(1);
1094 }
1095 return kernelsList;
1096}
1097
1098////////////////////////////////////////////////////////////////////////////////
1099/// GetTuningOptions
1100/// Function to allow for ranges and number of steps (for scan) when optimising kernel
1101/// function parameters. Specified when booking the method after the parameter to be
1102/// optimised between square brackets with each value separated by ;, the first value
1103/// is the lower limit, the second the upper limit and the third is the number of steps.
1104/// Example: "Tune=Gamma[0.01;1.0;100]" would only tune the RBF Gamma between 0.01 and
1105/// 100 steps.
1106std::map< TString,std::vector<Double_t> > TMVA::MethodSVM::GetTuningOptions()
1107{
1108 std::map< TString,std::vector<Double_t> > optVars;
1109 std::stringstream tempstring(fTune);
1110 std::string value;
1111 while (std::getline(tempstring,value,',')){
1112 unsigned first = value.find('[')+1;
1113 unsigned last = value.find_last_of(']');
1114 std::string optParam = value.substr(0,first-1);
1115 std::stringstream strNew (value.substr(first,last-first));
1116 Double_t optInterval;
1117 std::vector<Double_t> tempVec;
1118 UInt_t i = 0;
1119 while (strNew >> optInterval){
1120 tempVec.push_back(optInterval);
1121 if (strNew.peek() == ';'){
1122 strNew.ignore();
1123 }
1124 ++i;
1125 }
1126 if(i != 3 && i == tempVec.size()){
1127 if(optParam == "C" || optParam == "Gamma" || optParam == "GammaList" || optParam == "Theta"){
1128 switch(i){
1129 case 0:
1130 tempVec.push_back(0.01);
1131 case 1:
1132 tempVec.push_back(1.);
1133 case 2:
1134 tempVec.push_back(100);
1135 }
1136 }
1137 else if(optParam == "Order"){
1138 switch(i){
1139 case 0:
1140 tempVec.push_back(1);
1141 case 1:
1142 tempVec.push_back(10);
1143 case 2:
1144 tempVec.push_back(10);
1145 }
1146 }
1147 else{
1148 Log() << kWARNING << optParam << " is not a recognised tuneable parameter." << Endl;
1149 exit(1);
1150 }
1151 }
1152 optVars.insert(std::pair<TString,std::vector<Double_t> >(optParam,tempVec));
1153 }
1154 return optVars;
1155}
1156
1157////////////////////////////////////////////////////////////////////////////////
1158/// getLoss
1159/// Calculates loss for testing dataset. The loss function can be specified when
1160/// booking the method, otherwise defaults to hinge loss. Currently not used however
1161/// is accesible if required.
1162
1164 Double_t loss = 0.0;
1165 Double_t sumW = 0.0;
1166 Double_t temp = 0.0;
1167 Data()->SetCurrentType(Types::kTesting);
1168 ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*> ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
1169 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1170 const Event* ev = GetEvent(ievt);
1171 Float_t v = (*mvaRes)[ievt][0];
1172 Float_t w = ev->GetWeight();
1173 if(DataInfo().IsSignal(ev)){
1174 if(lossFunction == "hinge"){
1175 temp += w*(1-v);
1176 }
1177 else if(lossFunction == "exp"){
1178 temp += w*TMath::Exp(-v);
1179 }
1180 else if(lossFunction == "binomial"){
1181 temp += w*TMath::Log(1+TMath::Exp(-2*v));
1182 }
1183 else{
1184 Log() << kWARNING << lossFunction << " is not a recognised loss function." << Endl;
1185 exit(1);
1186 }
1187 }
1188 else{
1189 if(lossFunction == "hinge"){
1190 temp += w*v;
1191 }
1192 else if(lossFunction == "exp"){
1193 temp += w*TMath::Exp(-(1-v));
1194 }
1195 else if(lossFunction == "binomial"){
1196 temp += w*TMath::Log(1+TMath::Exp(-2*(1-v)));
1197 }
1198 else{
1199 Log() << kWARNING << lossFunction << " is not a recognised loss function." << Endl;
1200 exit(1);
1201 }
1202 }
1203 sumW += w;
1204 }
1205 loss = temp/sumW;
1206
1207 return loss;
1208}
#define REGISTER_METHOD(CLASS)
for example
double
Definition: Converters.cxx:939
int Int_t
Definition: RtypesCore.h:45
unsigned int UInt_t
Definition: RtypesCore.h:46
const Bool_t kFALSE
Definition: RtypesCore.h:101
bool Bool_t
Definition: RtypesCore.h:63
double Double_t
Definition: RtypesCore.h:59
long long Long64_t
Definition: RtypesCore.h:80
float Float_t
Definition: RtypesCore.h:57
const Bool_t kTRUE
Definition: RtypesCore.h:100
#define ClassImp(name)
Definition: Rtypes.h:364
int type
Definition: TGX11.cxx:121
TVectorT< Double_t > TVectorD
Definition: TVectorDfwd.h:22
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
Class that contains all the data information.
Definition: DataSetInfo.h:62
std::vector< VariableInfo > & GetVariableInfos()
Definition: DataSetInfo.h:103
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:359
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition: Event.cxx:381
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
The TMVA::Interval Class.
Definition: Interval.h:61
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:596
SMO Platt's SVM classifier with Keerthi & Shavade improvements.
Definition: MethodSVM.h:61
Double_t getLoss(TString lossFunction)
getLoss Calculates loss for testing dataset.
Definition: MethodSVM.cxx:1163
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
Set the tuning parameters according to the argument.
Definition: MethodSVM.cxx:917
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
Definition: MethodSVM.cxx:577
void DeclareOptions()
declare options available for this method
Definition: MethodSVM.cxx:220
std::vector< TString > fVarNames
Definition: MethodSVM.h:156
void WriteWeightsToStream(TFile &fout) const
TODO write IT write training sample (TTree) to file.
Definition: MethodSVM.cxx:507
void SetMGamma(std::string &mg)
Takes as input a string of values for multigaussian gammas and splits it, filling the gamma vector re...
Definition: MethodSVM.cxx:1018
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
SVM can handle classification with 2 classes and regression with one regression-target.
Definition: MethodSVM.cxx:195
void ReadWeightsFromStream(std::istream &istr)
Definition: MethodSVM.cxx:513
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="Minuit")
Optimize Tuning Parameters This is used to optimise the kernel function parameters and cost.
Definition: MethodSVM.cxx:760
void GetMGamma(const std::vector< float > &gammas)
Produces GammaList string for multigaussian kernel to be written to xml file.
Definition: MethodSVM.cxx:1032
void AddWeightsXMLTo(void *parent) const
write configuration to xml file
Definition: MethodSVM.cxx:398
Float_t fNumVars
Definition: MethodSVM.h:155
void Reset(void)
Definition: MethodSVM.cxx:174
std::map< TString, std::vector< Double_t > > GetTuningOptions()
GetTuningOptions Function to allow for ranges and number of steps (for scan) when optimising kernel f...
Definition: MethodSVM.cxx:1106
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodSVM.cxx:430
void ProcessOptions()
option post processing (if necessary)
Definition: MethodSVM.cxx:268
void Train(void)
Train SVM.
Definition: MethodSVM.cxx:281
void Init(void)
default initialisation
Definition: MethodSVM.cxx:205
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodSVM.cxx:635
virtual ~MethodSVM(void)
destructor
Definition: MethodSVM.cxx:161
const std::vector< Float_t > & GetRegressionValues()
Definition: MethodSVM.cxx:602
MethodSVM(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
Definition: MethodSVM.cxx:90
void GetHelpMessage() const
get help message text
Definition: MethodSVM.cxx:715
std::vector< TMVA::SVKernelFunction::EKernelType > MakeKernelList(std::string multiKernels, TString kernel)
MakeKernelList Function providing string manipulation for product or sum of kernels functions to take...
Definition: MethodSVM.cxx:1054
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
Definition: MethodSVM.cxx:251
std::map< TString, Double_t > optimize()
Class that is the base-class for a vector of result.
Event class for Support Vector Machine.
Definition: SVEvent.h:40
Kernel for Support Vector Machine.
Working class for Support Vector Machine.
Definition: SVWorkingSet.h:42
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:146
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1162
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1124
void ReadTVectorDFromXML(void *node, const char *name, TVectorD *vec)
Definition: Tools.cxx:1267
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:828
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1150
void WriteTVectorDToXML(void *node, const char *name, TVectorD *vec)
Definition: Tools.cxx:1259
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:329
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:347
TString StringFromInt(Long_t i)
string tools
Definition: Tools.cxx:1223
Singleton class for Global types used by TMVA.
Definition: Types.h:71
EAnalysisType
Definition: Types.h:126
@ kClassification
Definition: Types.h:127
@ kRegression
Definition: Types.h:128
@ kTraining
Definition: Types.h:143
@ kTesting
Definition: Types.h:144
@ kDEBUG
Definition: Types.h:56
@ kINFO
Definition: Types.h:58
@ kWARNING
Definition: Types.h:59
@ kFATAL
Definition: Types.h:61
Basic string class.
Definition: TString.h:136
void Print(std::ostream &os, const OptionType &opt)
static constexpr double s
static constexpr double ns
static constexpr double mg
void GetMethodName(TString &name, TKey *mkey)
Definition: tmvaglob.cxx:342
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:148
Double_t Exp(Double_t x)
Definition: TMath.h:677
Double_t Log(Double_t x)
Definition: TMath.h:710
Definition: first.py:1