80 using std::stringstream;
102 , fSVKernelFunction(0)
105 , fDoubleSigmaSquared(0)
117 fNumVars = theData.GetVariableInfos().size();
118 for(
int i=0; i<fNumVars; i++){
119 fVarNames.push_back(theData.GetVariableInfos().at(i).GetTitle());
137 , fSVKernelFunction(0)
140 , fDoubleSigmaSquared(0)
189 Log() << kDEBUG <<
" successfully(?) reset the method " <<
Endl;
213 fInputData =
new std::vector<TMVA::SVEvent*>(0);
271 Log() << kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: " 273 <<
" --> please remove \"IgnoreNegWeightsInTraining\" option from booking string." 286 Log() << kDEBUG <<
"Create event vector"<<
Endl;
297 if(nSignal < nBackground){
299 CBkg = CSig*((double)nSignal/nBackground);
303 CSig = CBkg*((double)nSignal/nBackground);
366 Log() << kWARNING <<
fTheKernel <<
" is not a recognised kernel function." <<
Endl;
370 Log()<< kINFO <<
"Building SVM Working Set...with "<<
fInputData->size()<<
" event instances"<<
Endl;
377 Log() << kINFO <<
"Sorry, no computing time forecast available for SVM, please wait ..." <<
Endl;
408 for (std::vector<TMVA::SVEvent*>::iterator veciter=
fSupportVectors->begin();
411 temp[0] = (*veciter)->GetNs();
412 temp[1] = (*veciter)->GetTypeFlag();
413 temp[2] = (*veciter)->GetAlpha();
414 temp[3] = (*veciter)->GetAlpha_p();
416 temp[ivar+4] = (*(*veciter)->GetDataVector())[ivar];
445 std::vector<Float_t>* svector =
new std::vector<Float_t>(
GetNvar());
458 for (
UInt_t ievt = 0; ievt < fNsupv; ievt++) {
462 typeFlag=(int)temp[1];
465 for (
UInt_t ivar = 0; ivar <
GetNvar(); ivar++) (*svector)[ivar]=temp[ivar+4];
471 void* maxminnode = supportvectornode;
497 Log() << kWARNING <<
fTheKernel <<
" is not a recognised kernel function." <<
Endl;
530 std::vector<Float_t>* svector =
new std::vector<Float_t>(
GetNvar());
535 for (
UInt_t ievt = 0; ievt < fNsupv; ievt++) {
538 typeFlag = typeTalpha<0?-1:1;
539 alpha = typeTalpha<0?-typeTalpha:typeTalpha;
540 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> svector->at(ivar);
545 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMaxVars)[ivar];
547 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMinVars)[ivar];
559 Log() << kFATAL <<
"Unknown kernel function found in weight file!" <<
Endl;
638 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
639 fout <<
" float fBparameter;" << std::endl;
640 fout <<
" int fNOfSuppVec;" << std::endl;
641 fout <<
" static float fAllSuppVectors[][" << fNsupv <<
"];" << std::endl;
642 fout <<
" static float fAlphaTypeCoef[" << fNsupv <<
"];" << std::endl;
644 fout <<
" // Kernel parameter(s) " << std::endl;
645 fout <<
" float fGamma;" << std::endl;
646 fout <<
"};" << std::endl;
647 fout <<
"" << std::endl;
650 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
651 fout <<
"{" << std::endl;
652 fout <<
" fBparameter = " <<
fBparm <<
";" << std::endl;
653 fout <<
" fNOfSuppVec = " << fNsupv <<
";" << std::endl;
654 fout <<
" fGamma = " <<
fGamma <<
";" <<std::endl;
655 fout <<
"}" << std::endl;
659 fout <<
"inline double " << className <<
"::GetMvaValue__(const std::vector<double>& inputValues ) const" << std::endl;
660 fout <<
"{" << std::endl;
661 fout <<
" double mvaval = 0; " << std::endl;
662 fout <<
" double temp = 0; " << std::endl;
664 fout <<
" for (int ievt = 0; ievt < fNOfSuppVec; ievt++ ){" << std::endl;
665 fout <<
" temp = 0;" << std::endl;
666 fout <<
" for ( unsigned int ivar = 0; ivar < GetNvar(); ivar++ ) {" << std::endl;
668 fout <<
" temp += (fAllSuppVectors[ivar][ievt] - inputValues[ivar]) " << std::endl;
669 fout <<
" * (fAllSuppVectors[ivar][ievt] - inputValues[ivar]); " << std::endl;
670 fout <<
" }" << std::endl;
671 fout <<
" mvaval += fAlphaTypeCoef[ievt] * exp( -fGamma * temp ); " << std::endl;
673 fout <<
" }" << std::endl;
674 fout <<
" mvaval -= fBparameter;" << std::endl;
675 fout <<
" return 1./(1. + exp(mvaval));" << std::endl;
676 fout <<
"}" << std::endl;
677 fout <<
"// Clean up" << std::endl;
678 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
679 fout <<
"{" << std::endl;
680 fout <<
" // nothing to clear " << std::endl;
681 fout <<
"}" << std::endl;
682 fout <<
"" << std::endl;
685 fout <<
"float " << className <<
"::fAlphaTypeCoef[] =" << std::endl;
687 for (
Int_t isv = 0; isv < fNsupv; isv++) {
689 if (isv < fNsupv-1) fout <<
", ";
691 fout <<
" };" << std::endl << std::endl;
693 fout <<
"float " << className <<
"::fAllSuppVectors[][" << fNsupv <<
"] =" << std::endl;
698 for (
Int_t isv = 0; isv < fNsupv; isv++){
700 if (isv < fNsupv-1) fout <<
", ";
703 if (ivar <
GetNvar()-1) fout <<
", " << std::endl;
704 else fout << std::endl;
706 fout <<
"};" << std::endl<< std::endl;
720 Log() <<
"The Support Vector Machine (SVM) builds a hyperplane separating" <<
Endl;
721 Log() <<
"signal and background events (vectors) using the minimal subset of " <<
Endl;
722 Log() <<
"all vectors used for training (support vectors). The extension to" <<
Endl;
723 Log() <<
"the non-linear case is performed by mapping input vectors into a " <<
Endl;
724 Log() <<
"higher-dimensional feature space in which linear separation is " <<
Endl;
725 Log() <<
"possible. The use of the kernel functions thereby eliminates the " <<
Endl;
726 Log() <<
"explicit transformation to the feature space. The implemented SVM " <<
Endl;
727 Log() <<
"algorithm performs the classification tasks using linear, polynomial, " <<
Endl;
728 Log() <<
"Gaussian and sigmoidal kernel functions. The Gaussian kernel allows " <<
Endl;
729 Log() <<
"to apply any discriminant shape in the input space." <<
Endl;
733 Log() <<
"SVM is a general purpose non-linear classification method, which " <<
Endl;
734 Log() <<
"does not require data preprocessing like decorrelation or Principal " <<
Endl;
735 Log() <<
"Component Analysis. It generalises quite well and can handle analyses " <<
Endl;
736 Log() <<
"with large numbers of input variables." <<
Endl;
740 Log() <<
"Optimal performance requires primarily a proper choice of the kernel " <<
Endl;
741 Log() <<
"parameters (the width \"Sigma\" in case of Gaussian kernel) and the" <<
Endl;
742 Log() <<
"cost parameter \"C\". The user must optimise them empirically by running" <<
Endl;
743 Log() <<
"SVM several times with different parameter sets. The time needed for " <<
Endl;
744 Log() <<
"each evaluation scales like the square of the number of training " <<
Endl;
745 Log() <<
"events so that a coarse preliminary tuning should be performed on " <<
Endl;
746 Log() <<
"reduced data sets." <<
Endl;
763 std::map< TString,std::vector<Double_t> > optVars;
768 std::map< TString,std::vector<Double_t> >::iterator iter;
770 std::map<TString,TMVA::Interval*> tuneParameters;
771 std::map<TString,Double_t> tunedParameters;
778 tuneParameters.insert(std::pair<TString,Interval*>(
"Gamma",
new Interval(0.01,1.,100)));
779 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
782 for(iter=optVars.begin(); iter!=optVars.end(); iter++){
783 if( iter->first ==
"Gamma" || iter->first ==
"C"){
784 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
787 Log() << kWARNING << iter->first <<
" is not a recognised tuneable parameter." <<
Endl;
795 tuneParameters.insert(std::pair<TString,Interval*>(
"Order",
new Interval(1,10,10)));
796 tuneParameters.insert(std::pair<TString,Interval*>(
"Theta",
new Interval(0.01,1.,100)));
797 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
800 for(iter=optVars.begin(); iter!=optVars.end(); iter++){
801 if( iter->first ==
"Theta" || iter->first ==
"C"){
802 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
804 else if( iter->first ==
"Order"){
805 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
808 Log() << kWARNING << iter->first <<
" is not a recognised tuneable parameter." <<
Endl;
819 string str =
"Gamma_" + s.str();
820 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(0.01,1.,100)));
822 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
824 for(iter=optVars.begin(); iter!=optVars.end(); iter++){
825 if( iter->first ==
"GammaList"){
829 string str =
"Gamma_" + s.str();
830 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
833 else if( iter->first ==
"C"){
834 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
837 Log() << kWARNING << iter->first <<
" is not a recognised tuneable parameter." <<
Endl;
846 while (std::getline(tempstring,value,
'*')){
848 tuneParameters.insert(std::pair<TString,Interval*>(
"Gamma",
new Interval(0.01,1.,100)));
850 else if(value ==
"MultiGauss"){
854 string str =
"Gamma_" + s.str();
855 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(0.01,1.,100)));
858 else if(value ==
"Polynomial"){
859 tuneParameters.insert(std::pair<TString,Interval*>(
"Order",
new Interval(1,10,10)));
860 tuneParameters.insert(std::pair<TString,Interval*>(
"Theta",
new Interval(0.0,1.0,101)));
863 Log() << kWARNING << value <<
" is not a recognised kernel function." <<
Endl;
867 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
872 while (std::getline(tempstring,value,
'+')){
874 tuneParameters.insert(std::pair<TString,Interval*>(
"Gamma",
new Interval(0.01,1.,100)));
876 else if(value ==
"MultiGauss"){
880 string str =
"Gamma_" + s.str();
881 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(0.01,1.,100)));
884 else if(value ==
"Polynomial"){
885 tuneParameters.insert(std::pair<TString,Interval*>(
"Order",
new Interval(1,10,10)));
886 tuneParameters.insert(std::pair<TString,Interval*>(
"Theta",
new Interval(0.0,1.0,101)));
889 Log() << kWARNING << value <<
" is not a recognised kernel function." <<
Endl;
893 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
896 Log() << kWARNING <<
fTheKernel <<
" is not a recognised kernel function." <<
Endl;
899 Log() << kINFO <<
" the following SVM parameters will be tuned on the respective *grid*\n" <<
Endl;
900 std::map<TString,TMVA::Interval*>::iterator it;
901 for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
902 Log() << kWARNING << it->first <<
Endl;
903 std::ostringstream oss;
904 (it->second)->
Print(oss);
909 tunedParameters=optimize.
optimize();
911 return tunedParameters;
919 std::map<TString,Double_t>::iterator it;
921 for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
922 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
923 if (it->first ==
"Gamma"){
926 else if(it->first ==
"C"){
930 Log() << kFATAL <<
" SetParameter for " << it->first <<
" not implemented " <<
Endl;
939 string str =
"Gamma_" + s.str();
940 Log() << kWARNING << tuneParameters.find(str)->first <<
" = " << tuneParameters.find(str)->second <<
Endl;
941 fmGamma.push_back(tuneParameters.find(str)->second);
943 for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
944 if (it->first ==
"C"){
945 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
952 for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
953 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
954 if (it->first ==
"Order"){
957 else if (it->first ==
"Theta"){
960 else if(it->first ==
"C"){
SetCost (it->second);
962 else if(it->first ==
"Mult"){
966 Log() << kFATAL <<
" SetParameter for " << it->first <<
" not implemented " <<
Endl;
972 for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
973 bool foundParam =
false;
974 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
978 string str =
"Gamma_" + s.str();
979 if(it->first == str){
984 if (it->first ==
"Gamma"){
988 else if (it->first ==
"Order"){
992 else if (it->first ==
"Theta"){
996 else if (it->first ==
"C"){
SetCost (it->second);
1002 Log() << kFATAL <<
" SetParameter for " << it->first <<
" not implemented " <<
Endl;
1008 Log() << kWARNING <<
fTheKernel <<
" is not a recognised kernel function." <<
Endl;
1019 std::stringstream tempstring(mg);
1021 while (tempstring >> value){
1024 if (tempstring.peek() ==
','){
1025 tempstring.ignore();
1033 std::ostringstream tempstring;
1034 for(
UInt_t i = 0; i<gammas.size(); ++i){
1035 tempstring << gammas.at(i);
1036 if(i!=(gammas.size()-1)){
1056 std::vector<TMVA::SVKernelFunction::EKernelType> kernelsList;
1057 std::stringstream tempstring(multiKernels);
1060 while (std::getline(tempstring,value,
'*')){
1062 else if(value ==
"MultiGauss"){
1070 Log() << kWARNING << value <<
" is not a recognised kernel function." <<
Endl;
1075 else if(kernel==
"Sum"){
1076 while (std::getline(tempstring,value,
'+')){
1078 else if(value ==
"MultiGauss"){
1086 Log() << kWARNING << value <<
" is not a recognised kernel function." <<
Endl;
1092 Log() << kWARNING <<
"Unable to split MultiKernels. Delimiters */+ required." <<
Endl;
1108 std::map< TString,std::vector<Double_t> > optVars;
1109 std::stringstream tempstring(
fTune);
1111 while (std::getline(tempstring,value,
',')){
1112 unsigned first = value.find(
'[')+1;
1113 unsigned last = value.find_last_of(
']');
1114 std::string optParam = value.substr(0,first-1);
1115 std::stringstream strNew (value.substr(first,last-first));
1117 std::vector<Double_t> tempVec;
1119 while (strNew >> optInterval){
1120 tempVec.push_back(optInterval);
1121 if (strNew.peek() ==
';'){
1126 if(i != 3 && i == tempVec.size()){
1127 if(optParam ==
"C" || optParam ==
"Gamma" || optParam ==
"GammaList" || optParam ==
"Theta"){
1130 tempVec.push_back(0.01);
1132 tempVec.push_back(1.);
1134 tempVec.push_back(100);
1137 else if(optParam ==
"Order"){
1140 tempVec.push_back(1);
1142 tempVec.push_back(10);
1144 tempVec.push_back(10);
1148 Log() << kWARNING << optParam <<
" is not a recognised tuneable parameter." <<
Endl;
1152 optVars.insert(std::pair<
TString,std::vector<Double_t> >(optParam,tempVec));
1174 if(lossFunction ==
"hinge"){
1177 else if(lossFunction ==
"exp"){
1180 else if(lossFunction ==
"binomial"){
1184 Log() << kWARNING << lossFunction <<
" is not a recognised loss function." <<
Endl;
1189 if(lossFunction ==
"hinge"){
1192 else if(lossFunction ==
"exp"){
1195 else if(lossFunction ==
"binomial"){
1199 Log() << kWARNING << lossFunction <<
" is not a recognised loss function." <<
Endl;
void Train(void)
Train SVM.
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
std::vector< TMVA::SVKernelFunction::EKernelType > MakeKernelList(std::string multiKernels, TString kernel)
MakeKernelList Function providing string manipulation for product or sum of kernels functions to take...
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
Kernel for Support Vector Machine.
MsgLogger & Endl(MsgLogger &ml)
Singleton class for Global types used by TMVA.
Float_t fDoubleSigmaSquared
std::map< TString, std::vector< Double_t > > GetTuningOptions()
GetTuningOptions Function to allow for ranges and number of steps (for scan) when optimising kernel f...
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Virtual base Class for all MVA method.
std::string fMultiKernels
void Train(UInt_t nIter=1000)
train the SVM
std::vector< TMVA::SVEvent * > * fInputData
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
void AddWeightsXMLTo(void *parent) const
write configuration to xml file
void ProcessOptions()
option post processing (if necessary)
void setCompatibilityParams(EKernelType k, UInt_t order, Float_t theta, Float_t kappa)
set old options for compatibility mode
std::vector< TString > fVarNames
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
Set the tuning parameters according to the argument.
SMO Platt's SVM classifier with Keerthi & Shavade improvements.
std::vector< Float_t > fmGamma
void SetOrder(Double_t o)
const Event * GetEvent() const
void GetMGamma(const std::vector< float > &gammas)
Produces GammaList string for multigaussian kernel to be written to xml file.
Double_t GetXmin(Int_t ivar) const
DataSetInfo & DataInfo() const
Bool_t DoRegression() const
Class that contains all the data information.
void DeclareOptions()
declare options available for this method
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
TVectorT< Double_t > TVectorD
virtual void Print(Option_t *option="") const
Print TNamed name and title.
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Double_t GetXmax(Int_t ivar) const
void SetGamma(Double_t g)
Float_t GetTarget(UInt_t itgt) const
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
void ReadWeightsFromXML(void *wghtnode)
void ReadWeightsFromStream(std::istream &istr)
MethodSVM(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
const char * GetName() const
std::map< TString, Double_t > optimize()
The TMVA::Interval Class.
void Init(void)
default initialisation
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particular Method instance.
SVKernelFunction * fSVKernelFunction
void SetTheta(Double_t t)
const TString & GetMethodName() const
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="Minuit")
Optimize Tuning Parameters This is used to optimise the kernel function parameters and cost...
Event class for Support Vector Machine.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
SVM can handle classification with 2 classes and regression with one regression-target.
void SetNormalised(Bool_t norm)
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
std::vector< TMVA::SVEvent * > * fSupportVectors
void WriteWeightsToStream(TFile &fout) const
TODO write IT write training sample (TTree) to file.
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Double_t getLoss(TString lossFunction)
getLoss Calculates loss for testing dataset.
std::vector< TMVA::SVEvent * > * GetSupportVectors()
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
void SetCurrentType(Types::ETreeType type) const
void GetHelpMessage() const
get help message text
Working class for Support Vector Machine.
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
IPythonInteractive * fInteractive
virtual ~MethodSVM(void)
destructor
TString GetMethodTypeName() const
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Bool_t IsSignal(const Event *ev) const
std::vector< Float_t > * fRegressionReturnVal
Types::EAnalysisType GetAnalysisType() const
void SetIPythonInteractive(bool *ExitFromTraining, UInt_t *fIPyCurrentIter_)
Timing information for training and evaluation of MVA methods.
void SetMGamma(std::string &mg)
Takes as input a string of values for multigaussian gammas and splits it, filling the gamma vector re...
std::vector< VariableInfo > & GetVariableInfos()
Class that is the base-class for a vector of result.
Float_t Evaluate(SVEvent *ev1, SVEvent *ev2)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
const std::vector< Float_t > & GetRegressionValues()