80using std::stringstream;
189 Log() << kDEBUG <<
" successfully(?) reset the method " <<
Endl;
213 fInputData =
new std::vector<TMVA::SVEvent*>(0);
271 Log() << kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: "
273 <<
" --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
286 Log() << kDEBUG <<
"Create event vector"<<
Endl;
289 Int_t nSignal =
Data()->GetNEvtSigTrain();
290 Int_t nBackground =
Data()->GetNEvtBkgdTrain();
297 if(nSignal < nBackground){
299 CBkg = CSig*((
double)nSignal/nBackground);
303 CSig = CBkg*((
double)nSignal/nBackground);
307 for (
Int_t ievnt=0; ievnt<
Data()->GetNEvents(); ievnt++){
308 if (
GetEvent(ievnt)->GetWeight() != 0){
366 Log() << kWARNING <<
fTheKernel <<
" is not a recognised kernel function." <<
Endl;
370 Log()<< kINFO <<
"Building SVM Working Set...with "<<
fInputData->size()<<
" event instances"<<
Endl;
377 Log() << kINFO <<
"Sorry, no computing time forecast available for SVM, please wait ..." <<
Endl;
408 for (std::vector<TMVA::SVEvent*>::iterator veciter=
fSupportVectors->begin();
411 temp[0] = (*veciter)->GetNs();
412 temp[1] = (*veciter)->GetTypeFlag();
413 temp[2] = (*veciter)->GetAlpha();
414 temp[3] = (*veciter)->GetAlpha_p();
416 temp[ivar+4] = (*(*veciter)->GetDataVector())[ivar];
445 std::vector<Float_t>* svector =
new std::vector<Float_t>(
GetNvar());
458 for (
UInt_t ievt = 0; ievt < fNsupv; ievt++) {
462 typeFlag=(
int)temp[1];
465 for (
UInt_t ivar = 0; ivar <
GetNvar(); ivar++) (*svector)[ivar]=temp[ivar+4];
471 void* maxminnode = supportvectornode;
497 Log() << kWARNING <<
fTheKernel <<
" is not a recognised kernel function." <<
Endl;
530 std::vector<Float_t>* svector =
new std::vector<Float_t>(
GetNvar());
535 for (
UInt_t ievt = 0; ievt < fNsupv; ievt++) {
538 typeFlag = typeTalpha<0?-1:1;
539 alpha = typeTalpha<0?-typeTalpha:typeTalpha;
540 for (
UInt_t ivar = 0; ivar <
GetNvar(); ivar++) istr >> svector->at(ivar);
559 Log() << kFATAL <<
"Unknown kernel function found in weight file!" <<
Endl;
638 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
639 fout <<
" float fBparameter;" << std::endl;
640 fout <<
" int fNOfSuppVec;" << std::endl;
641 fout <<
" static float fAllSuppVectors[][" << fNsupv <<
"];" << std::endl;
642 fout <<
" static float fAlphaTypeCoef[" << fNsupv <<
"];" << std::endl;
644 fout <<
" // Kernel parameter(s) " << std::endl;
645 fout <<
" float fGamma;" << std::endl;
646 fout <<
"};" << std::endl;
647 fout <<
"" << std::endl;
650 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
651 fout <<
"{" << std::endl;
652 fout <<
" fBparameter = " <<
fBparm <<
";" << std::endl;
653 fout <<
" fNOfSuppVec = " << fNsupv <<
";" << std::endl;
654 fout <<
" fGamma = " <<
fGamma <<
";" <<std::endl;
655 fout <<
"}" << std::endl;
659 fout <<
"inline double " << className <<
"::GetMvaValue__(const std::vector<double>& inputValues ) const" << std::endl;
660 fout <<
"{" << std::endl;
661 fout <<
" double mvaval = 0; " << std::endl;
662 fout <<
" double temp = 0; " << std::endl;
664 fout <<
" for (int ievt = 0; ievt < fNOfSuppVec; ievt++ ){" << std::endl;
665 fout <<
" temp = 0;" << std::endl;
666 fout <<
" for ( unsigned int ivar = 0; ivar < GetNvar(); ivar++ ) {" << std::endl;
668 fout <<
" temp += (fAllSuppVectors[ivar][ievt] - inputValues[ivar]) " << std::endl;
669 fout <<
" * (fAllSuppVectors[ivar][ievt] - inputValues[ivar]); " << std::endl;
670 fout <<
" }" << std::endl;
671 fout <<
" mvaval += fAlphaTypeCoef[ievt] * exp( -fGamma * temp ); " << std::endl;
673 fout <<
" }" << std::endl;
674 fout <<
" mvaval -= fBparameter;" << std::endl;
675 fout <<
" return 1./(1. + exp(mvaval));" << std::endl;
676 fout <<
"}" << std::endl;
677 fout <<
"// Clean up" << std::endl;
678 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
679 fout <<
"{" << std::endl;
680 fout <<
" // nothing to clear " << std::endl;
681 fout <<
"}" << std::endl;
682 fout <<
"" << std::endl;
685 fout <<
"float " << className <<
"::fAlphaTypeCoef[] =" << std::endl;
687 for (
Int_t isv = 0; isv < fNsupv; isv++) {
689 if (isv < fNsupv-1) fout <<
", ";
691 fout <<
" };" << std::endl << std::endl;
693 fout <<
"float " << className <<
"::fAllSuppVectors[][" << fNsupv <<
"] =" << std::endl;
698 for (
Int_t isv = 0; isv < fNsupv; isv++){
700 if (isv < fNsupv-1) fout <<
", ";
703 if (ivar <
GetNvar()-1) fout <<
", " << std::endl;
704 else fout << std::endl;
706 fout <<
"};" << std::endl<< std::endl;
720 Log() <<
"The Support Vector Machine (SVM) builds a hyperplane separating" <<
Endl;
721 Log() <<
"signal and background events (vectors) using the minimal subset of " <<
Endl;
722 Log() <<
"all vectors used for training (support vectors). The extension to" <<
Endl;
723 Log() <<
"the non-linear case is performed by mapping input vectors into a " <<
Endl;
724 Log() <<
"higher-dimensional feature space in which linear separation is " <<
Endl;
725 Log() <<
"possible. The use of the kernel functions thereby eliminates the " <<
Endl;
726 Log() <<
"explicit transformation to the feature space. The implemented SVM " <<
Endl;
727 Log() <<
"algorithm performs the classification tasks using linear, polynomial, " <<
Endl;
728 Log() <<
"Gaussian and sigmoidal kernel functions. The Gaussian kernel allows " <<
Endl;
729 Log() <<
"to apply any discriminant shape in the input space." <<
Endl;
733 Log() <<
"SVM is a general purpose non-linear classification method, which " <<
Endl;
734 Log() <<
"does not require data preprocessing like decorrelation or Principal " <<
Endl;
735 Log() <<
"Component Analysis. It generalises quite well and can handle analyses " <<
Endl;
736 Log() <<
"with large numbers of input variables." <<
Endl;
740 Log() <<
"Optimal performance requires primarily a proper choice of the kernel " <<
Endl;
741 Log() <<
"parameters (the width \"Sigma\" in case of Gaussian kernel) and the" <<
Endl;
742 Log() <<
"cost parameter \"C\". The user must optimise them empirically by running" <<
Endl;
743 Log() <<
"SVM several times with different parameter sets. The time needed for " <<
Endl;
744 Log() <<
"each evaluation scales like the square of the number of training " <<
Endl;
745 Log() <<
"events so that a coarse preliminary tuning should be performed on " <<
Endl;
746 Log() <<
"reduced data sets." <<
Endl;
763 std::map< TString,std::vector<Double_t> > optVars;
768 std::map< TString,std::vector<Double_t> >::iterator iter;
770 std::map<TString,TMVA::Interval*> tuneParameters;
771 std::map<TString,Double_t> tunedParameters;
778 tuneParameters.insert(std::pair<TString,Interval*>(
"Gamma",
new Interval(0.01,1.,100)));
779 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
782 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
783 if( iter->first ==
"Gamma" || iter->first ==
"C"){
784 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
787 Log() << kWARNING << iter->first <<
" is not a recognised tuneable parameter." <<
Endl;
795 tuneParameters.insert(std::pair<TString,Interval*>(
"Order",
new Interval(1,10,10)));
796 tuneParameters.insert(std::pair<TString,Interval*>(
"Theta",
new Interval(0.01,1.,100)));
797 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
800 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
801 if( iter->first ==
"Theta" || iter->first ==
"C"){
802 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
804 else if( iter->first ==
"Order"){
805 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
808 Log() << kWARNING << iter->first <<
" is not a recognised tuneable parameter." <<
Endl;
819 string str =
"Gamma_" + s.str();
820 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(0.01,1.,100)));
822 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
824 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
825 if( iter->first ==
"GammaList"){
829 string str =
"Gamma_" + s.str();
830 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
833 else if( iter->first ==
"C"){
834 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
837 Log() << kWARNING << iter->first <<
" is not a recognised tuneable parameter." <<
Endl;
846 while (std::getline(tempstring,
value,
'*')){
848 tuneParameters.insert(std::pair<TString,Interval*>(
"Gamma",
new Interval(0.01,1.,100)));
850 else if(
value ==
"MultiGauss"){
854 string str =
"Gamma_" + s.str();
855 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(0.01,1.,100)));
858 else if(
value ==
"Polynomial"){
859 tuneParameters.insert(std::pair<TString,Interval*>(
"Order",
new Interval(1,10,10)));
860 tuneParameters.insert(std::pair<TString,Interval*>(
"Theta",
new Interval(0.0,1.0,101)));
863 Log() << kWARNING <<
value <<
" is not a recognised kernel function." <<
Endl;
867 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
872 while (std::getline(tempstring,
value,
'+')){
874 tuneParameters.insert(std::pair<TString,Interval*>(
"Gamma",
new Interval(0.01,1.,100)));
876 else if(
value ==
"MultiGauss"){
880 string str =
"Gamma_" + s.str();
881 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(0.01,1.,100)));
884 else if(
value ==
"Polynomial"){
885 tuneParameters.insert(std::pair<TString,Interval*>(
"Order",
new Interval(1,10,10)));
886 tuneParameters.insert(std::pair<TString,Interval*>(
"Theta",
new Interval(0.0,1.0,101)));
889 Log() << kWARNING <<
value <<
" is not a recognised kernel function." <<
Endl;
893 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
896 Log() << kWARNING <<
fTheKernel <<
" is not a recognised kernel function." <<
Endl;
899 Log() << kINFO <<
" the following SVM parameters will be tuned on the respective *grid*\n" <<
Endl;
900 std::map<TString,TMVA::Interval*>::iterator it;
901 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
902 Log() << kWARNING << it->first <<
Endl;
903 std::ostringstream oss;
904 (it->second)->
Print(oss);
909 tunedParameters=optimize.
optimize();
911 return tunedParameters;
919 std::map<TString,Double_t>::iterator it;
921 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
922 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
923 if (it->first ==
"Gamma"){
926 else if(it->first ==
"C"){
930 Log() << kFATAL <<
" SetParameter for " << it->first <<
" not implemented " <<
Endl;
939 string str =
"Gamma_" + s.str();
940 Log() << kWARNING << tuneParameters.find(str)->first <<
" = " << tuneParameters.find(str)->second <<
Endl;
941 fmGamma.push_back(tuneParameters.find(str)->second);
943 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
944 if (it->first ==
"C"){
945 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
952 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
953 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
954 if (it->first ==
"Order"){
957 else if (it->first ==
"Theta"){
960 else if(it->first ==
"C"){
SetCost (it->second);
962 else if(it->first ==
"Mult"){
966 Log() << kFATAL <<
" SetParameter for " << it->first <<
" not implemented " <<
Endl;
972 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
973 bool foundParam =
false;
974 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
978 string str =
"Gamma_" + s.str();
979 if(it->first == str){
984 if (it->first ==
"Gamma"){
988 else if (it->first ==
"Order"){
992 else if (it->first ==
"Theta"){
996 else if (it->first ==
"C"){
SetCost (it->second);
1002 Log() << kFATAL <<
" SetParameter for " << it->first <<
" not implemented " <<
Endl;
1008 Log() << kWARNING <<
fTheKernel <<
" is not a recognised kernel function." <<
Endl;
1019 std::stringstream tempstring(mg);
1021 while (tempstring >>
value){
1024 if (tempstring.peek() ==
','){
1025 tempstring.ignore();
1033 std::ostringstream tempstring;
1034 for(
UInt_t i = 0;
i<gammas.size(); ++
i){
1035 tempstring << gammas.at(
i);
1036 if(
i!=(gammas.size()-1)){
1056 std::vector<TMVA::SVKernelFunction::EKernelType> kernelsList;
1057 std::stringstream tempstring(multiKernels);
1060 while (std::getline(tempstring,
value,
'*')){
1062 else if(
value ==
"MultiGauss"){
1070 Log() << kWARNING <<
value <<
" is not a recognised kernel function." <<
Endl;
1075 else if(kernel==
"Sum"){
1076 while (std::getline(tempstring,
value,
'+')){
1078 else if(
value ==
"MultiGauss"){
1086 Log() << kWARNING <<
value <<
" is not a recognised kernel function." <<
Endl;
1092 Log() << kWARNING <<
"Unable to split MultiKernels. Delimiters */+ required." <<
Endl;
1108 std::map< TString,std::vector<Double_t> > optVars;
1109 std::stringstream tempstring(
fTune);
1111 while (std::getline(tempstring,
value,
',')){
1112 unsigned first =
value.find(
'[')+1;
1113 unsigned last =
value.find_last_of(
']');
1114 std::string optParam =
value.substr(0,first-1);
1115 std::stringstream strNew (
value.substr(first,last-first));
1117 std::vector<Double_t> tempVec;
1119 while (strNew >> optInterval){
1120 tempVec.push_back(optInterval);
1121 if (strNew.peek() ==
';'){
1126 if(
i != 3 &&
i == tempVec.size()){
1127 if(optParam ==
"C" || optParam ==
"Gamma" || optParam ==
"GammaList" || optParam ==
"Theta"){
1130 tempVec.push_back(0.01);
1132 tempVec.push_back(1.);
1134 tempVec.push_back(100);
1137 else if(optParam ==
"Order"){
1140 tempVec.push_back(1);
1142 tempVec.push_back(10);
1144 tempVec.push_back(10);
1148 Log() << kWARNING << optParam <<
" is not a recognised tuneable parameter." <<
Endl;
1152 optVars.insert(std::pair<
TString,std::vector<Double_t> >(optParam,tempVec));
1174 if(lossFunction ==
"hinge"){
1177 else if(lossFunction ==
"exp"){
1180 else if(lossFunction ==
"binomial"){
1184 Log() << kWARNING << lossFunction <<
" is not a recognised loss function." <<
Endl;
1189 if(lossFunction ==
"hinge"){
1192 else if(lossFunction ==
"exp"){
1195 else if(lossFunction ==
"binomial"){
1199 Log() << kWARNING << lossFunction <<
" is not a recognised loss function." <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void w
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
void Print(GNN_Data &d, std::string txt="")
TVectorT< Double_t > TVectorD
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Class that contains all the data information.
std::vector< VariableInfo > & GetVariableInfos()
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetTarget(UInt_t itgt) const
The TMVA::Interval Class.
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
Double_t GetXmin(Int_t ivar) const
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString GetMethodTypeName() const
const char * GetName() const
Types::EAnalysisType GetAnalysisType() const
Bool_t IgnoreEventsWithNegWeightsInTraining() const
const TString & GetMethodName() const
UInt_t GetNEvents() const
Bool_t DoRegression() const
std::vector< Float_t > * fRegressionReturnVal
const Event * GetEvent() const
DataSetInfo & DataInfo() const
void SetNormalised(Bool_t norm)
Double_t GetXmax(Int_t ivar) const
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
IPythonInteractive * fInteractive
temporary dataset used when evaluating on a different data (used by MethodCategory::GetMvaValues)
SMO Platt's SVM classifier with Keerthi & Shavade improvements.
Double_t getLoss(TString lossFunction)
getLoss Calculates loss for testing dataset.
Float_t fTolerance
tolerance parameter
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
Set the tuning parameters according to the argument.
TVectorD * fMaxVars
for normalization //is it still needed??
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
returns MVA value for given event
TVectorD * fMinVars
for normalization //is it still needed??
void DeclareOptions()
declare options available for this method
std::vector< TString > fVarNames
void WriteWeightsToStream(TFile &fout) const
TODO write IT write training sample (TTree) to file.
void SetMGamma(std::string &mg)
Takes as input a string of values for multigaussian gammas and splits it, filling the gamma vector re...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
SVM can handle classification with 2 classes and regression with one regression-target.
SVKernelFunction * fSVKernelFunction
kernel function
Float_t fBparm
free plane coefficient
void ReadWeightsFromStream(std::istream &istr)
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="Minuit")
Optimize Tuning Parameters This is used to optimise the kernel function parameters and cost.
Float_t fDoubleSigmaSquared
for RBF Kernel
void GetMGamma(const std::vector< float > &gammas)
Produces GammaList string for multigaussian kernel to be written to xml file.
void AddWeightsXMLTo(void *parent) const
write configuration to xml file
Float_t fNumVars
number of input variables for multi-gaussian
Int_t fOrder
for Polynomial Kernel ( polynomial order )
void SetTheta(Double_t t)
void SetGamma(Double_t g)
std::vector< TMVA::SVEvent * > * fSupportVectors
contains support vectors
Float_t fKappa
for Sigmoidal Kernel
Float_t fGamma
RBF Kernel parameter.
void SetOrder(Double_t o)
UShort_t fNSubSets
nr of subsets, default 1
std::map< TString, std::vector< Double_t > > GetTuningOptions()
GetTuningOptions Function to allow for ranges and number of steps (for scan) when optimising kernel f...
void ReadWeightsFromXML(void *wghtnode)
std::vector< Float_t > fmGamma
vector of gammas for multi-gaussian kernel
void ProcessOptions()
option post processing (if necessary)
void Train(void)
Train SVM.
std::vector< TMVA::SVEvent * > * fInputData
vector of training data in SVM format
void Init(void)
default initialisation
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
virtual ~MethodSVM(void)
destructor
TString fTheKernel
kernel name
std::string fMultiKernels
Float_t fTheta
for Sigmoidal Kernel
const std::vector< Float_t > & GetRegressionValues()
MethodSVM(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
std::string fTune
Specify parameters to be tuned.
void GetHelpMessage() const
get help message text
UInt_t fMaxIter
max number of iteration
std::vector< TMVA::SVKernelFunction::EKernelType > MakeKernelList(std::string multiKernels, TString kernel)
MakeKernelList Function providing string manipulation for product or sum of kernels functions to take...
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
SVWorkingSet * fWgSet
svm working set
std::map< TString, Double_t > optimize()
Class that is the base-class for a vector of result.
Event class for Support Vector Machine.
Kernel for Support Vector Machine.
Working class for Support Vector Machine.
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Singleton class for Global types used by TMVA.
MsgLogger & Endl(MsgLogger &ml)
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Double_t Log(Double_t x)
Returns the natural logarithm of x.