77using std::stringstream;
199 <<
"<CreateFormula> Formula contains expression: \"" <<
TString::Format(
"(%i)",ipar) <<
"\", "
200 <<
"which cannot be attributed to a parameter; "
201 <<
"it may be that the number of variable ranges given via \"ParRanges\" "
202 <<
"does not match the number of parameters in the formula expression, please verify!"
215 <<
"<CreateFormula> Formula contains expression: \"" <<
TString::Format(
"x%i",ivar) <<
"\", "
216 <<
"which cannot be attributed to an input variable" <<
Endl;
221 Log() << kDEBUG <<
"Creating and compiling formula" <<
Endl;
229 Log() << kFATAL <<
"<ProcessOptions> Formula expression could not be properly compiled" <<
Endl;
233 Log() << kFATAL <<
"<ProcessOptions> Dubious number of parameters in formula expression: "
251 Log() << kFATAL <<
"<ProcessOptions> Mismatch in parameter string: "
252 <<
"the number of parameters: " <<
fNPars <<
" != ranges defined: "
253 << parList->
GetSize() <<
"; the format of the \"ParRanges\" string "
254 <<
"must be: \"(-1.2,3.4);(-2.3,4.55);...\", "
255 <<
"where the numbers in \"(a,b)\" correspond to the a=min, b=max parameter ranges; "
256 <<
"each parameter defined in the function string must have a corresponding rang."
266 Ssiz_t istr = str.First(
',' );
268 TString pmaxS(str(istr+1,str.Length()-2-istr));
270 stringstream stmin;
Float_t pmin=0; stmin << pminS.
Data(); stmin >> pmin;
271 stringstream stmax;
Float_t pmax=0; stmax << pmaxS.
Data(); stmax >> pmax;
274 if (
TMath::Abs(pmax-pmin) < 1.e-30) pmax = pmin;
275 if (pmin > pmax)
Log() << kFATAL <<
"<ProcessOptions> max > min in interval for parameter: ["
276 << ipar <<
"] : [" << pmin <<
", " << pmax <<
"] " <<
Endl;
278 Log() << kINFO <<
"Create parameter interval for parameter " << ipar <<
" : [" << pmin <<
"," << pmax <<
"]" <<
Endl;
317 Log() << kFATAL <<
"<Train> Do not understand fit method:" <<
fFitMethod <<
Endl;
320 fFitter->CheckForUnusedOptions();
388 Log() << kFATAL <<
"<Train> Troubles in sum of weights: "
393 Log() << kFATAL <<
"<Train> Troubles in sum of weights: "
399 for (std::vector<Interval*>::const_iterator parIt =
fParRange.begin(); parIt !=
fParRange.end(); ++parIt) {
400 fBestPars.push_back( (*parIt)->GetMean() );
424 Log() << kHEADER <<
"Results for parameter fit using \"" << fitter <<
"\" fitter:" <<
Endl;
425 std::vector<TString> parNames;
426 for (
UInt_t ipar=0; ipar<pars.size(); ipar++) parNames.push_back(
TString::Format(
"Par(%i)",ipar ) );
429 Log() <<
"Value of estimator at minimum: " << estimator <<
Endl;
453 estimator[2] += deviation * ev->
GetWeight();
456 estimator[2] /= sumOfWeights[2];
475 estimator[2] /= sumOfWeights[2];
484 desired = (
DataInfo().IsSignal(ev) ? 1.0 : 0.0);
489 estimator[0] /= sumOfWeights[0];
490 estimator[1] /= sumOfWeights[1];
492 return estimator[0] + estimator[1];
503 for( std::vector<Double_t>::iterator it = parBegin; it != parEnd; ++it ){
505 fFormula->SetParameter( ipar, (*it) );
557 std::vector<Float_t> temp;
565 for(
UInt_t iClass=0; iClass<nClasses; iClass++){
567 for(
UInt_t j=0;j<nClasses;j++){
569 norm+=exp(temp[j]-temp[iClass]);
571 (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
595 values.push_back(
value );
639 if(
gTools().HasAttr( wghtnode,
"NDim")) {
676 fout <<
" double fParameter[" <<
fNPars <<
"];" << std::endl;
677 fout <<
"};" << std::endl;
678 fout <<
"" << std::endl;
679 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
680 fout <<
"{" << std::endl;
682 fout <<
" fParameter[" << ipar <<
"] = " <<
fBestPars[ipar] <<
";" << std::endl;
684 fout <<
"}" << std::endl;
686 fout <<
"inline double " << className <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
687 fout <<
"{" << std::endl;
688 fout <<
" // interpret the formula" << std::endl;
701 fout <<
" double retval = " << str <<
";" << std::endl;
703 fout <<
" return retval; " << std::endl;
704 fout <<
"}" << std::endl;
706 fout <<
"// Clean up" << std::endl;
707 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
708 fout <<
"{" << std::endl;
709 fout <<
" // nothing to clear" << std::endl;
710 fout <<
"}" << std::endl;
724 Log() <<
"The function discriminant analysis (FDA) is a classifier suitable " <<
Endl;
725 Log() <<
"to solve linear or simple nonlinear discrimination problems." <<
Endl;
727 Log() <<
"The user provides the desired function with adjustable parameters" <<
Endl;
728 Log() <<
"via the configuration option string, and FDA fits the parameters to" <<
Endl;
729 Log() <<
"it, requiring the signal (background) function value to be as close" <<
Endl;
730 Log() <<
"as possible to 1 (0). Its advantage over the more involved and" <<
Endl;
731 Log() <<
"automatic nonlinear discriminators is the simplicity and transparency " <<
Endl;
732 Log() <<
"of the discrimination expression. A shortcoming is that FDA will" <<
Endl;
733 Log() <<
"underperform for involved problems with complicated, phase space" <<
Endl;
734 Log() <<
"dependent nonlinear correlations." <<
Endl;
736 Log() <<
"Please consult the Users Guide for the format of the formula string" <<
Endl;
737 Log() <<
"and the allowed parameter ranges:" <<
Endl;
738 if (
gConfig().WriteOptionsReference()) {
739 Log() <<
"<a href=\"https://github.com/root-project/root/blob/master/documentation/tmva/UsersGuide/TMVAUsersGuide.pdf\">"
740 <<
"TMVAUsersGuide.pdf</a>" <<
Endl;
742 else Log() <<
"documentation/tmva/UsersGuide/TMVAUsersGuide.pdf" <<
Endl;
746 Log() <<
"The FDA performance depends on the complexity and fidelity of the" <<
Endl;
747 Log() <<
"user-defined discriminator function. As a general rule, it should" <<
Endl;
748 Log() <<
"be able to reproduce the discrimination power of any linear" <<
Endl;
749 Log() <<
"discriminant analysis. To reach into the nonlinear domain, it is" <<
Endl;
750 Log() <<
"useful to inspect the correlation profiles of the input variables," <<
Endl;
751 Log() <<
"and add quadratic and higher polynomial terms between variables as" <<
Endl;
752 Log() <<
"necessary. Comparison with more involved nonlinear classifiers can" <<
Endl;
753 Log() <<
"be used as a guide." <<
Endl;
757 Log() <<
"Depending on the function used, the choice of \"FitMethod\" is" <<
Endl;
758 Log() <<
"crucial for getting valuable solutions with FDA. As a guideline it" <<
Endl;
759 Log() <<
"is recommended to start with \"FitMethod=MINUIT\". When more complex" <<
Endl;
760 Log() <<
"functions are used where MINUIT does not converge to reasonable" <<
Endl;
761 Log() <<
"results, the user should switch to non-gradient FitMethods such" <<
Endl;
762 Log() <<
"as GeneticAlgorithm (GA) or Monte Carlo (MC). It might prove to be" <<
Endl;
763 Log() <<
"useful to combine GA (or MC) with MINUIT by setting the option" <<
Endl;
764 Log() <<
"\"Converger=MINUIT\". GA (MC) will then set the starting parameters" <<
Endl;
765 Log() <<
"for MINUIT such that the basic quality of GA (MC) of finding global" <<
Endl;
766 Log() <<
"minima is combined with the efficacy of MINUIT of finding local" <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void w
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
TObject * At(Int_t idx) const override
Returns the object at position idx. Returns 0 if idx is out of range.
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
const TString & GetOptions() const
void SetOptions(const TString &s)
Class that contains all the data information.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetTarget(UInt_t itgt) const
Fitter using a Genetic Algorithm.
IFitterTarget()
constructor
The TMVA::Interval Class.
Fitter using Monte Carlo sampling of parameters.
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
Bool_t DoMulticlass() const
const char * GetName() const
UInt_t GetNEvents() const
Bool_t DoRegression() const
std::vector< Float_t > * fRegressionReturnVal
std::vector< Float_t > * fMulticlassReturnVal
const Event * GetEvent() const
DataSetInfo & DataInfo() const
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Function discriminant analysis (FDA).
void Train(void)
FDA training.
TString fFormulaStringT
string with function
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
Double_t EstimatorFunction(std::vector< Double_t > &)
compute estimator for given parameter set (to be minimised)
virtual ~MethodFDA(void)
destructor
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > ¶meters, std::vector< Float_t > &values)
calculate the values for multiclass
void ReadWeightsFromStream(std::istream &i)
read back the training results from a file (stream)
virtual const std::vector< Float_t > & GetMulticlassValues()
Double_t fSumOfWeightsBkg
sum of weights (background)
Int_t fOutputDimensions
number of output values
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
void Init(void)
default initialisation
void ClearAll()
delete and clear all class members
std::vector< Interval * > fParRange
ranges of parameters
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
void MakeClassSpecific(std::ostream &, const TString &) const
write FDA-specific classifier response
Double_t fSumOfWeightsSig
sum of weights (signal)
TString fParRangeStringP
string with ranges of parameters
TFormula * fFormula
the discrimination function
virtual const std::vector< Float_t > & GetRegressionValues()
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
std::vector< Double_t > fBestPars
the pars that optimise (minimise) the estimator
IFitterTarget * fConvergerFitter
intermediate fitter
FitterBase * fFitter
the fitter used in the training
Double_t fSumOfWeights
sum of weights
TString fParRangeStringT
string with ranges of parameters
TString fFitMethod
estimator optimisation method
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
void DeclareOptions()
define the options (their key words) that can be set in the option string
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
returns MVA value for given event
UInt_t fNPars
number of parameters
TString fConverger
fit method uses fConverger as intermediate step to converge into local minimas
void GetHelpMessage() const
get help message text
TString fFormulaStringP
string with function
Fitter using a Simulated Annealing Algorithm.
Singleton class for Global types used by TMVA.
Collectable string class.
const char * Data() const
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
MsgLogger & Endl(MsgLogger &ml)
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Returns x raised to the power y.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.