148 fFitMethod ( kUseGeneticAlgorithm ),
149 fEffMethod ( kUseEventSelection ),
174 fVarHistS_smooth( 0 ),
175 fVarHistB_smooth( 0 ),
186 const TString& theWeightFile) :
334 DeclareOptionRef(
fFitMethodS =
"GA",
"FitMethod",
"Minimisation Method (GA, SA, and MC are the primary methods to be used; the others have been introduced for testing purposes and are depreciated)");
378 Log() <<
kWARNING <<
"Normalisation of the input variables for cut optimisation is not" <<
Endl;
379 Log() <<
kWARNING <<
"supported because this provides intransparent cut values, and no" <<
Endl;
380 Log() <<
kWARNING <<
"improvement in the performance of the algorithm." <<
Endl;
381 Log() <<
kWARNING <<
"Please remove \"Normalise\" option from booking option string" <<
Endl;
382 Log() <<
kWARNING <<
"==> Will reset normalisation flag to \"False\"" <<
Endl;
387 Log() <<
kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: " 389 <<
" --> Please remove \"IgnoreNegWeightsInTraining\" option from booking string." 399 Log() <<
kWARNING <<
"poor performance of MINUIT in MethodCuts; preferred fit method: GA" <<
Endl;
409 Log() <<
kINFO <<
Form(
"Use optimization method: \"%s\"",
414 Log() <<
kINFO <<
Form(
"Use efficiency computation method: \"%s\"",
431 <<
"\' for fit parameter option " <<
Form(
"VarProp[%i]",ivar) <<
Endl;
433 (*fFitParams)[ivar] = theFitP;
437 <<
"\" cuts for variable: " <<
"'" << (*fInputVars)[ivar] <<
"'" <<
Endl;
451 Log() <<
kFATAL <<
"<Eval_Cuts> fCutMin/Max have zero pointer. " 452 <<
"Did you book Cuts ?" <<
Endl;
461 if (ibin < 0 ) ibin = 0;
469 return passed ? 1. : 0. ;
479 std::vector<Double_t> cutsMin;
480 std::vector<Double_t> cutsMax;
486 std::vector<TString>* varVec = 0;
489 varVec =
new std::vector<TString>;
490 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
491 varVec->push_back(
DataInfo().GetVariableInfo(ivar).GetLabel() );
500 varVec =
new std::vector<TString>;
501 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
502 varVec->push_back(
DataInfo().GetVariableInfo(ivar).GetLabel() +
" [transformed]" );
507 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
508 if ((
UInt_t)(*varVec)[ivar].Length() > maxL) maxL = (*varVec)[ivar].Length();
510 UInt_t maxLine = 20+maxL+16;
512 for (
UInt_t i=0; i<maxLine; i++)
Log() <<
"-";
514 Log() <<
kHEADER <<
"Cut values for requested signal efficiency: " << trueEffS <<
Endl;
517 Log() <<
kINFO <<
"Transformation applied to input variables : \"" 522 <<
" transformations applied in transformation chain; cuts applied on transformed quantities ] " <<
Endl;
525 Log() <<
kINFO <<
"Transformation applied to input variables : None" <<
Endl;
527 for (
UInt_t i=0; i<maxLine; i++)
Log() <<
"-";
529 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
531 <<
"Cut[" << std::setw(2) << ivar <<
"]: " 532 << std::setw(10) << cutsMin[ivar]
534 << std::setw(maxL) << (*varVec)[ivar]
536 << std::setw(10) << cutsMax[ivar] <<
Endl;
538 for (
UInt_t i=0; i<maxLine; i++)
Log() <<
"-";
550 std::vector<Double_t> cMin(
GetNvar() );
551 std::vector<Double_t> cMax(
GetNvar() );
554 cutMin[ivar] = cMin[ivar];
555 cutMax[ivar] = cMax[ivar];
564 std::vector<Double_t>& cutMin,
565 std::vector<Double_t>& cutMax )
const 574 if (ibin < 0 ) ibin = 0;
580 cutMin.push_back(
fCutMin[ivar][ibin] );
581 cutMax.push_back(
fCutMax[ivar][ibin] );
632 std::vector<TH1F*> signalDist, bkgDist;
650 std::vector<Interval*> ranges;
707 for (
UInt_t ivar=0; ivar<ranges.size(); ivar++)
delete ranges[ivar];
718 Int_t nsamples =
Int_t(0.5*nevents*(nevents - 1));
723 for (
Int_t ievt1=0; ievt1<nevents; ievt1++) {
724 for (
Int_t ievt2=ievt1+1; ievt2<nevents; ievt2++) {
739 Int_t nsamples = 200000;
741 DeclareOptionRef( nsamples,
"SampleSize",
"Number of Monte-Carlo-Event samples" );
742 DeclareOptionRef( seed,
"Seed",
"Seed for the random generator (0 takes random seeds)" );
755 Log() <<
kINFO <<
"Running Monte-Carlo-Event sampling over " << nsamples <<
" events" <<
Endl;
756 std::vector<Double_t> pars( 2*
GetNvar() );
758 for (
Int_t itoy=0; itoy<nsamples; itoy++) {
777 evt1 = ev1->GetValue( ivar );
783 if (nbreak++ > 10000)
Log() <<
kFATAL <<
"<MCEvents>: could not find signal events" 784 <<
" after 10000 trials - do you have signal events in your sample ?" 790 if (evt1 > evt2) {
Double_t z = evt1; evt1 = evt2; evt2 =
z; }
792 pars[2*ivar+1] = evt2 - evt1;
847 if (!
DataInfo().IsSignal(ev1))
return -1;
850 if (!
DataInfo().IsSignal(ev2))
return -1;
856 for (
Int_t ivar=0; ivar<nvar; ivar++) {
862 std::vector<Double_t> pars;
863 for (
Int_t ivar=0; ivar<nvar; ivar++) {
866 if (evt1[ivar] < evt2[ivar]) {
875 pars.push_back( cutMin );
876 pars.push_back( cutMax - cutMin );
936 Double_t average = 0.5*(effBH_left + effBH_right);
937 if (effBH < effB) average = effBH;
941 eta = ( -
TMath::Abs(effBH-average) + (1.0 - (effBH - effB))) / (1.0 + effS);
948 if (effBH < 0 || effBH > effB) {
970 penalty+=4.*diff*diff;
973 if (effS<1.
e-4)
return 10.0+penalty;
974 else return 10.*(1.-10.*effS);
987 cutMin[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] : pars[ipar] - pars[ipar+1];
988 cutMax[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] + pars[ipar+1] : pars[ipar];
998 if (ibin < 1 || ibin >
fNbins)
Log() <<
kFATAL <<
"::MatchCutsToPars: bin error: " 1004 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
1005 cutMin[ivar] = cutMinAll[ivar][ibin-1];
1006 cutMax[ivar] = cutMaxAll[ivar][ibin-1];
1021 Int_t ipar = 2*ivar;
1022 pars[ipar] = ((*fRangeSign)[ivar] > 0) ? cutMin[ivar] : cutMax[ivar];
1023 pars[ipar+1] = cutMax[ivar] - cutMin[ivar];
1037 effS *= (*fVarPdfS)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
1038 effB *= (*fVarPdfB)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
1044 if( !
fNegEffWarning )
Log() <<
kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1049 if( !
fNegEffWarning )
Log() <<
kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1077 if (nTotS == 0 && nTotB == 0) {
1078 Log() <<
kFATAL <<
"<GetEffsfromSelection> fatal error in zero total number of events:" 1079 <<
" nTotS, nTotB: " << nTotS <<
" " << nTotB <<
" ***" <<
Endl;
1086 Log() <<
kWARNING <<
"<ComputeEstimator> zero number of signal events" <<
Endl;
1088 else if (nTotB == 0) {
1091 Log() <<
kWARNING <<
"<ComputeEstimator> zero number of background events" <<
Endl;
1101 if( !
fNegEffWarning )
Log() <<
kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1106 if( !
fNegEffWarning )
Log() <<
kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1132 if( val > minVal ) minVal = val;
1133 if( val < maxVal ) maxVal = val;
1139 TString histTitle = (*fInputVars)[ivar] +
" signal training";
1140 TString histName = (*fInputVars)[ivar] +
"_sig";
1151 (*fVarHistS)[ivar] =
new TH1F(histName.
Data(), histTitle.
Data(),
fNbins, minVal, maxVal );
1154 histTitle = (*fInputVars)[ivar] +
" background training";
1155 histName = (*fInputVars)[ivar] +
"_bgd";
1166 (*fVarHistB)[ivar] =
new TH1F(histName.
Data(), histTitle.Data(),
fNbins, minVal, maxVal );
1172 (*fVarHistS)[ivar]->Fill( val );
1174 (*fVarHistB)[ivar]->Fill( val );
1181 (*fVarHistS_smooth)[ivar] = (
TH1F*)(*
fVarHistS)[ivar]->Clone();
1182 histTitle = (*fInputVars)[ivar] +
" signal training smoothed ";
1183 histTitle += nsmooth;
1184 histTitle +=
" times";
1185 histName = (*fInputVars)[ivar] +
"_sig_smooth";
1186 (*fVarHistS_smooth)[ivar]->SetName(histName);
1187 (*fVarHistS_smooth)[ivar]->SetTitle(histTitle);
1190 (*fVarHistS_smooth)[ivar]->Smooth(nsmooth);
1205 (*fVarHistB_smooth)[ivar] = (
TH1F*)(*fVarHistB)[ivar]->
Clone();
1206 histTitle = (*fInputVars)[ivar]+
" background training smoothed ";
1207 histTitle += nsmooth;
1208 histTitle +=
" times";
1209 histName = (*fInputVars)[ivar]+
"_bgd_smooth";
1210 (*fVarHistB_smooth)[ivar]->SetName(histName);
1211 (*fVarHistB_smooth)[ivar]->SetTitle(histTitle);
1214 (*fVarHistB_smooth)[ivar]->Smooth(nsmooth);
1231 istr >> dummy >>
dummy;
1236 istr >> dummy >> dummy >> dummy >> dummy >> dummy >> dummy >> dummyInt >>
dummy ;
1240 Log() <<
kFATAL <<
"<ReadWeightsFromStream> fatal error: mismatch " 1247 Log() <<
kWARNING <<
"Read cuts optimised using sample of MC events" <<
Endl;
1250 Log() <<
kWARNING <<
"Read cuts optimised using sample of MC events" <<
Endl;
1253 Log() <<
kINFO <<
"Read cuts optimised using Genetic Algorithm" <<
Endl;
1256 Log() <<
kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" <<
Endl;
1259 Log() <<
kINFO <<
"Read cuts optimised using Full Event Scan" <<
Endl;
1264 Log() <<
kINFO <<
"in " << fNbins <<
" signal efficiency bins and for " <<
GetNvar() <<
" variables" <<
Endl;
1268 istr.getline(buffer,200);
1269 istr.getline(buffer,200);
1275 TString(
GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
1279 istr >> tmpbin >> tmpeffS >> tmpeffB;
1298 std::vector<Double_t> cutsMin;
1299 std::vector<Double_t> cutsMax;
1305 gTools().
AddComment( wght,
Form(
"Below are the optimised cuts for %i variables: Format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0] ... cutMin[ivar=n-1] cutMax[ivar=n-1]",
GetNvar() ) );
1345 Int_t tmpEffMethod, tmpFitMethod;
1346 gTools().
ReadAttr( wghtnode,
"OptimisationMethod", tmpEffMethod );
1355 Log() <<
kINFO <<
"Read cuts optimised using sample of MC events" <<
Endl;
1358 Log() <<
kINFO <<
"Read cuts optimised using sample of MC-Event events" <<
Endl;
1361 Log() <<
kINFO <<
"Read cuts optimised using Genetic Algorithm" <<
Endl;
1364 Log() <<
kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" <<
Endl;
1367 Log() <<
kINFO <<
"Read cuts optimised using Full Event Scan" <<
Endl;
1402 if (tmpbin-1 >=
fNbins || tmpbin-1 < 0) {
1428 (*fVarHistS)[ivar]->Write();
1429 (*fVarHistB)[ivar]->Write();
1430 (*fVarHistS_smooth)[ivar]->Write();
1431 (*fVarHistB_smooth)[ivar]->Write();
1432 (*fVarPdfS)[ivar]->GetPDFHist()->Write();
1433 (*fVarPdfB)[ivar]->GetPDFHist()->Write();
1455 Log() <<
kFATAL <<
"<GetTrainingEfficiency> wrong number of arguments" 1456 <<
" in string: " << theString
1457 <<
" | required format, e.g., Efficiency:0.05" <<
Endl;
1470 if (results->
GetHist(
"EFF_BVSS_TR")==0) {
1490 results->
Store(eff_bvss_tr,
"EFF_BVSS_TR");
1491 results->
Store(rej_bvss_tr,
"REJ_BVSS_TR");
1498 Int_t nFailedBins=0;
1501 tmpCutMin[ivar] =
fCutMin[ivar][bini-1];
1502 tmpCutMax[ivar] =
fCutMax[ivar][bini-1];
1509 if (effBin != bini){
1510 Log()<<
kVERBOSE <<
"unable to fill efficiency bin " << bini<<
" " << effBin <<
Endl;
1519 if (nFailedBins>0)
Log()<<
kWARNING <<
" unable to fill "<< nFailedBins <<
" efficiency bins " <<
Endl;
1521 delete [] tmpCutMin;
1522 delete [] tmpCutMax;
1532 Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
1533 Int_t nbins_ = 1000;
1536 for (
Int_t bini=1; bini<=nbins_; bini++) {
1538 effS = (bini - 0.5)/
Float_t(nbins_);
1542 if ((effB - effBref)*(effB_ - effBref) < 0)
break;
1547 return 0.5*(effS + effS_);
1572 Log() <<
kFATAL <<
"<GetEfficiency> wrong number of arguments" 1573 <<
" in string: " << theString
1574 <<
" | required format, e.g., Efficiency:0.05, or empty string" <<
Endl;
1589 if (results->
GetHist(
"MVA_EFF_BvsS")==0) {
1613 results->
Store(eff_BvsS,
"MVA_EFF_BvsS");
1614 results->
Store(rej_BvsS);
1623 results->
Store(eff_s,
"MVA_S");
1624 results->
Store(eff_b,
"MVA_B");
1636 tmpCutMin[ivar] =
fCutMin[ivar][bini-1];
1637 tmpCutMax[ivar] =
fCutMax[ivar][bini-1];
1642 tmpBvsS->
SetPoint(bini, effS, effB);
1649 delete [] tmpCutMin;
1650 delete [] tmpCutMax;
1666 Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
1667 Int_t nbins_ = 1000;
1673 for (
Int_t bini=1; bini<=nbins_; bini++) {
1676 effS = (bini - 0.5)/
Float_t(nbins_);
1678 integral += (1.0 - effB);
1687 for (
Int_t bini=1; bini<=nbins_; bini++) {
1689 effS = (bini - 0.5)/
Float_t(nbins_);
1693 if ((effB - effBref)*(effB_ - effBref) < 0)
break;
1698 effS = 0.5*(effS + effS_);
1715 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
1716 fout <<
"};" << std::endl;
1734 Log() <<
"The optimisation of rectangular cuts performed by TMVA maximises " <<
Endl;
1735 Log() <<
"the background rejection at given signal efficiency, and scans " <<
Endl;
1736 Log() <<
"over the full range of the latter quantity. Three optimisation" <<
Endl;
1737 Log() <<
"methods are optional: Monte Carlo sampling (MC), a Genetics" <<
Endl;
1738 Log() <<
"Algorithm (GA), and Simulated Annealing (SA). GA and SA are" <<
Endl;
1739 Log() <<
"expected to perform best." <<
Endl;
1741 Log() <<
"The difficulty to find the optimal cuts strongly increases with" <<
Endl;
1742 Log() <<
"the dimensionality (number of input variables) of the problem." <<
Endl;
1743 Log() <<
"This behavior is due to the non-uniqueness of the solution space."<<
Endl;
1747 Log() <<
"If the dimensionality exceeds, say, 4 input variables, it is " <<
Endl;
1748 Log() <<
"advisable to scrutinize the separation power of the variables," <<
Endl;
1749 Log() <<
"and to remove the weakest ones. If some among the input variables" <<
Endl;
1750 Log() <<
"can be described by a single cut (e.g., because signal tends to be" <<
Endl;
1751 Log() <<
"larger than background), this can be indicated to MethodCuts via" <<
Endl;
1752 Log() <<
"the \"Fsmart\" options (see option string). Choosing this option" <<
Endl;
1753 Log() <<
"reduces the number of requirements for the variable from 2 (min/max)" <<
Endl;
1754 Log() <<
"to a single one (TMVA finds out whether it is to be interpreted as" <<
Endl;
1755 Log() <<
"min or max)." <<
Endl;
1759 Log() << bold <<
"Monte Carlo sampling:" << resbold <<
Endl;
1761 Log() <<
"Apart form the \"Fsmart\" option for the variables, the only way" <<
Endl;
1762 Log() <<
"to improve the MC sampling is to increase the sampling rate. This" <<
Endl;
1763 Log() <<
"is done via the configuration option \"MC_NRandCuts\". The execution" <<
Endl;
1764 Log() <<
"time scales linearly with the sampling rate." <<
Endl;
1766 Log() << bold <<
"Genetic Algorithm:" << resbold <<
Endl;
1768 Log() <<
"The algorithm terminates if no significant fitness increase has" <<
Endl;
1769 Log() <<
"been achieved within the last \"nsteps\" steps of the calculation." <<
Endl;
1770 Log() <<
"Wiggles in the ROC curve or constant background rejection of 1" <<
Endl;
1771 Log() <<
"indicate that the GA failed to always converge at the true maximum" <<
Endl;
1772 Log() <<
"fitness. In such a case, it is recommended to broaden the search " <<
Endl;
1773 Log() <<
"by increasing the population size (\"popSize\") and to give the GA " <<
Endl;
1774 Log() <<
"more time to find improvements by increasing the number of steps" <<
Endl;
1775 Log() <<
"(\"nsteps\")" <<
Endl;
1776 Log() <<
" -> increase \"popSize\" (at least >10 * number of variables)" <<
Endl;
1777 Log() <<
" -> increase \"nsteps\"" <<
Endl;
1779 Log() << bold <<
"Simulated Annealing (SA) algorithm:" << resbold <<
Endl;
1781 Log() <<
"\"Increasing Adaptive\" approach:" <<
Endl;
1783 Log() <<
"The algorithm seeks local minima and explores their neighborhood, while" <<
Endl;
1784 Log() <<
"changing the ambient temperature depending on the number of failures" <<
Endl;
1785 Log() <<
"in the previous steps. The performance can be improved by increasing" <<
Endl;
1786 Log() <<
"the number of iteration steps (\"MaxCalls\"), or by adjusting the" <<
Endl;
1787 Log() <<
"minimal temperature (\"MinTemperature\"). Manual adjustments of the" <<
Endl;
1788 Log() <<
"speed of the temperature increase (\"TemperatureScale\" and \"AdaptiveSpeed\")" <<
Endl;
1789 Log() <<
"to individual data sets should also help. Summary:" << brk <<
Endl;
1790 Log() <<
" -> increase \"MaxCalls\"" << brk <<
Endl;
1791 Log() <<
" -> adjust \"MinTemperature\"" << brk <<
Endl;
1792 Log() <<
" -> adjust \"TemperatureScale\"" << brk <<
Endl;
1793 Log() <<
" -> adjust \"AdaptiveSpeed\"" <<
Endl;
1795 Log() <<
"\"Decreasing Adaptive\" approach:" <<
Endl;
1797 Log() <<
"The algorithm calculates the initial temperature (based on the effect-" <<
Endl;
1798 Log() <<
"iveness of large steps) and the multiplier that ensures to reach the" <<
Endl;
1799 Log() <<
"minimal temperature with the requested number of iteration steps." <<
Endl;
1800 Log() <<
"The performance can be improved by adjusting the minimal temperature" <<
Endl;
1801 Log() <<
" (\"MinTemperature\") and by increasing number of steps (\"MaxCalls\"):" << brk <<
Endl;
1802 Log() <<
" -> increase \"MaxCalls\"" << brk <<
Endl;
1803 Log() <<
" -> adjust \"MinTemperature\"" <<
Endl;
1805 Log() <<
"Other kernels:" <<
Endl;
1807 Log() <<
"Alternative ways of counting the temperature change are implemented. " <<
Endl;
1808 Log() <<
"Each of them starts with the maximum temperature (\"MaxTemperature\")" <<
Endl;
1809 Log() <<
"and descreases while changing the temperature according to a given" <<
Endl;
1810 Log() <<
"prescription:" << brk <<
Endl;
1811 Log() <<
"CurrentTemperature =" << brk <<
Endl;
1812 Log() <<
" - Sqrt: InitialTemperature / Sqrt(StepNumber+2) * TemperatureScale" << brk <<
Endl;
1813 Log() <<
" - Log: InitialTemperature / Log(StepNumber+2) * TemperatureScale" << brk <<
Endl;
1814 Log() <<
" - Homo: InitialTemperature / (StepNumber+2) * TemperatureScale" << brk <<
Endl;
1815 Log() <<
" - Sin: (Sin(StepNumber / TemperatureScale) + 1) / (StepNumber + 1)*InitialTemperature + Eps" << brk <<
Endl;
1816 Log() <<
" - Geo: CurrentTemperature * TemperatureScale" <<
Endl;
1818 Log() <<
"Their performance can be improved by adjusting initial temperature" <<
Endl;
1819 Log() <<
"(\"InitialTemperature\"), the number of iteration steps (\"MaxCalls\")," <<
Endl;
1820 Log() <<
"and the multiplier that scales the termperature descrease" <<
Endl;
1821 Log() <<
"(\"TemperatureScale\")" << brk <<
Endl;
1822 Log() <<
" -> increase \"MaxCalls\"" << brk <<
Endl;
1823 Log() <<
" -> adjust \"InitialTemperature\"" << brk <<
Endl;
1824 Log() <<
" -> adjust \"TemperatureScale\"" << brk <<
Endl;
1825 Log() <<
" -> adjust \"KernelTemperature\"" <<
Endl;
std::vector< Double_t > * fRmsS
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
cut evaluation: returns 1.0 if event passed, 0.0 otherwise
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
void GetHelpMessage() const
get help message text
void WriteMonitoringHistosToFile(void) const
write histograms and PDFs to file for monitoring purposes
Random number generator class based on M.
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
MsgLogger & Endl(MsgLogger &ml)
Double_t ComputeEstimator(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA there are two requirements: 1) the signal efficiency m...
void TestClassification()
nothing to test
Collectable string class.
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
MethodCuts(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="MC:150:10000:")
standard constructor
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void MatchParsToCuts(const std::vector< Double_t > &, Double_t *, Double_t *)
translates parameters into cuts
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
void Clone(Ssiz_t nc)
Make self a distinct copy with capacity of at least tot, where tot cannot be smaller than the current...
Double_t Fill(const std::vector< TMVA::Event *> &events, const std::vector< Int_t > &theVars, Int_t theType=-1)
create the search tree from the event collection using ONLY the variables specified in "theVars" ...
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
std::vector< TH1 * > * fVarHistS
tomato 1-D histogram with a float per channel (see TH1 documentation)}
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Double_t GetCuts(Double_t effS, std::vector< Double_t > &cutMin, std::vector< Double_t > &cutMax) const
retrieve cut values for given signal efficiency
Short_t Min(Short_t a, Short_t b)
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
std::vector< PDF * > * fVarPdfS
Double_t GetSumOfWeights(void) const
return the sum of event (node) weights
std::vector< TH1 * > * fVarHistS_smooth
const TString & GetInputVar(Int_t i) const
Double_t EstimatorFunction(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA
std::vector< Double_t > * fMeanB
Double_t Run()
estimator function interface for fitting
void SetIPythonInteractive(bool *ExitFromTraining, UInt_t *fIPyMaxIter_, UInt_t *fIPyCurrentIter_)
std::vector< EFitParameters > * fFitParams
void Init(void)
default initialisation called by all constructors
const Event * GetEvent() const
void CreateVariablePDFs(void)
for PDF method: create efficiency reference histograms and PDFs
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
BinarySearchTree * fBinaryTreeS
void GetEffsfromSelection(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from event counting for given cut sample ...
virtual void ParseOptions()
options parser
DataSetInfo & DataInfo() const
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
std::vector< TH1 * > * fVarHistB_smooth
void MatchCutsToPars(std::vector< Double_t > &, Double_t *, Double_t *)
translates cuts into parameters
std::vector< PDF * > * fVarPdfB
void Train(void)
training method: here the cuts are optimised for the training sample
Float_t Max(Types::ESBType sb, UInt_t var)
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
std::vector< Int_t > * fRangeSign
Float_t Min(Types::ESBType sb, UInt_t var)
const char * GetName() const
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
char * Form(const char *fmt,...)
void GetEffsfromPDFs(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from PDFs for given cut sample
const TString & GetMethodName() const
virtual Double_t Eval(Double_t x) const =0
TSpline * fSplTrainEffBvsS
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: Method <strin...
virtual const char * GetPath() const
Returns the full path of the directory.
void SetNormalised(Bool_t norm)
UInt_t GetNVariables() const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
virtual Int_t FindBin(Double_t x)
Find bin number corresponding to abscissa x.
Bool_t IgnoreEventsWithNegWeightsInTraining() const
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
EFitMethodType fFitMethod
std::vector< Double_t > * fMeanS
Bool_t WriteOptionsReference() const
Float_t RMS(Types::ESBType sb, UInt_t var)
Bool_t IsNormalised() const
TH1 * GetHist(const TString &alias) const
void ReadWeightsFromStream(std::istream &i)
read the cuts from stream
static RooMathCoreReg dummy
void SetCurrentType(Types::ETreeType type) const
Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &)
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
VariableInfo & GetVariableInfo(Int_t i)
void AddPreDefVal(const T &)
virtual Double_t Uniform(Double_t x1=1)
Returns a uniform deviate on the interval (0, x1).
std::vector< Double_t > * fRmsB
const TString & GetOptions() const
Float_t Mean(Types::ESBType sb, UInt_t var)
you should not use this method at all Int_t Int_t z
#define REGISTER_METHOD(CLASS)
for example
virtual void SetPoint(Int_t i, Double_t x, Double_t y)
Set x and y values for point number i.
virtual ~MethodCuts(void)
destructor
IPythonInteractive * fInteractive
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
TString GetMethodTypeName() const
Short_t Max(Short_t a, Short_t b)
A Graph is a graphics object made of two arrays X and Y with npoints each.
static const Double_t fgMaxAbsCutVal
void ProcessOptions()
process user options sanity check, do not allow the input variables to be normalised, because this only creates problems when interpreting the cuts
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Bool_t IsSignal(const Event *ev) const
void PrintCuts(Double_t effS) const
print cuts
Types::EAnalysisType GetAnalysisType() const
void Store(TObject *obj, const char *alias=0)
Double_t Sqrt(Double_t x)
Double_t SearchVolume(Volume *, std::vector< const TMVA::BinarySearchTreeNode *> *events=0)
search the whole tree and add up all weigths of events that lie within the given voluem ...
const TString & GetTestvarName() const
virtual Int_t GetSize() const
Double_t GetTrainingEfficiency(const TString &)
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Cuts can only handle classification with 2 classes.
std::vector< Interval * > fCutRange
void CheckForUnusedOptions() const
checks for unused options in option string
BinarySearchTree * fBinaryTreeB
std::vector< TH1 * > * fVarHistB
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
const char * Data() const