77 , fGDPathStep ( 0.01 )
78 , fGDNPathSteps ( 1000 )
115 Log() <<
kFATAL <<
"RuleFitParams::Init() - MethodRuleFit ptr is null" <<
Endl;
163 Log() <<
kVERBOSE <<
"Path constr. - event index range = [ " <<
fPathIdx1 <<
", " << fPathIdx2 <<
" ]" 165 Log() <<
kVERBOSE <<
"Error estim. - event index range = [ " <<
fPerfIdx1 <<
", " << fPerfIdx2 <<
" ]" 184 fGDNtuple=
new TTree(
"MonitorNtuple_RuleFitParams",
"RuleFit path search");
206 std::vector<Double_t> &avsel,
207 std::vector<Double_t> &avrul )
209 UInt_t neve = ind2-ind1+1;
211 Log() <<
kFATAL <<
"<EvaluateAverage> - no events selected for path search -> BUG!" <<
Endl;
219 const std::vector<UInt_t> *eventRuleMap=0;
226 for (
UInt_t i=ind1; i<ind2+1; i++) {
236 nrules = (*eventRuleMap).size();
239 avrul[(*eventRuleMap)[
r]] += ew;
245 for (
UInt_t i=ind1; i<ind2+1; i++) {
263 avsel[sel] = avsel[sel] / sumew;
267 avrul[
r] = avrul[
r] / sumew;
313 UInt_t neve = ind2-ind1+1;
315 Log() <<
kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
320 for (
UInt_t i=ind1; i<ind2+1; i++) {
333 UInt_t neve = ind2-ind1+1;
335 Log() <<
kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
340 for (
UInt_t i=ind1; i<ind2+1; i++) {
355 Log() <<
kWARNING <<
"<Penalty> Using unverified code! Check!" <<
Endl;
452 Log() <<
kFATAL <<
"BUG! FindGDTau() has been called BEFORE InitGD()." <<
Endl;
454 Log() <<
kINFO <<
"Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." <<
Endl;
483 if ( (ip==0) || ((ip+1)%netst==0) ) {
490 doloop = ((ip<nscan) && (fGDNTauTstOK>3));
500 Log() <<
kERROR <<
"<FindGDTau> number of scanned loops is zero! Should NOT see this message." <<
Endl;
536 Log() <<
kINFO <<
"GD path scan - the scan stops when the max num. of steps is reached or a min is found" 573 std::vector<Double_t> coefsMin;
574 std::vector<Double_t> lincoefsMin;
589 std::vector<Double_t> valx;
590 std::vector<Double_t> valy;
591 std::vector<Double_t> valxy;
603 if (imod>100) imod=100;
614 Log() <<
kVERBOSE <<
"Obtained initial offset = " << offsetMin <<
Endl;
632 Int_t stopCondition=0;
640 if (isVerbose) t0 = clock();
643 tgradvec =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
644 stgradvec += tgradvec;
648 if (isVerbose) t0 = clock();
651 tupgrade =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
652 stupgrade += tupgrade;
656 docheck = ((iloop==0) ||((iloop+1)%imod==0));
673 trisk =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
683 Log() <<
"Risk(i+1)>=Risk(i) in path" <<
Endl;
684 riskFlat=(nbadrisk>3);
687 Log() <<
"--- STOPPING MINIMISATION ---" <<
Endl;
688 Log() <<
"This may be OK if minimum is already found" <<
Endl;
698 if (isVerbose) t0 = clock();
711 tperf =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
730 if (valx.size()==npreg) {
731 valx.erase(valx.begin());
732 valy.erase(valy.begin());
733 valxy.erase(valxy.begin());
746 <<
Form(
"%8d",iloop+1) <<
" " 748 <<
Form(
"%4.4f",riskPerf) <<
" " 766 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
770 else if (endOfLoop) {
774 Log() <<
kWARNING <<
"BUG TRAP: should not be here - still, this bug is harmless;)" <<
Endl;
788 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
789 Log() <<
kINFO <<
"Found minimum at step " << indMin+1 <<
" with error = " << errmin <<
Endl;
790 Log() <<
kINFO <<
"Reason for ending loop: ";
791 switch (stopCondition) {
793 Log() <<
kINFO <<
"clear minima found";
796 Log() <<
kINFO <<
"chaotic behaviour of risk";
799 Log() <<
kINFO <<
"end of loop reached";
806 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
811 Log() <<
"Check results and maybe decrease GDStep size" <<
Endl;
821 Log() <<
kINFO <<
"The error rate was still decreasing at the end of the path" <<
Endl;
822 Log() <<
kINFO <<
"Increase number of steps (GDNSteps)." <<
Endl;
833 Log() <<
kFATAL <<
"BUG TRAP: minimum not found in MakeGDPath()" <<
Endl;
840 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
884 Log() <<
kWARNING <<
"<CalcFStar> Using unverified code! Check!" <<
Endl;
887 Log() <<
kFATAL <<
"<CalcFStar> Invalid start/end indices!" <<
Endl;
894 std::vector<Double_t> fstarSorted;
898 const Event&
e = *(*events)[i];
900 fFstar.push_back(fstarVal);
901 fstarSorted.push_back(fstarVal);
905 std::sort( fstarSorted.begin(), fstarSorted.end() );
908 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
925 Log() <<
kWARNING <<
"<Optimism> Using unverified code! Check!" <<
Endl;
928 Log() <<
kFATAL <<
"<Optimism> Invalid start/end indices!" <<
Endl;
943 const Event&
e = *(*events)[i];
949 sumyhaty += w*yhat*
y;
954 Double_t cov = sumyhaty - sumyhat*sumy;
967 Log() <<
kWARNING <<
"<ErrorRateReg> Using unverified code! Check!" <<
Endl;
970 Log() <<
kFATAL <<
"<ErrorRateReg> Invalid start/end indices!" <<
Endl;
972 if (
fFstar.size()!=neve) {
973 Log() <<
kFATAL <<
"--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!" 974 <<
" Fstar.size() = " <<
fFstar.size() <<
" , N(events) = " << neve <<
Endl;
989 const Event&
e = *(*events)[i];
998 return sumdf/sumdfmed;
1012 Log() <<
kWARNING <<
"<ErrorRateBin> Using unverified code! Check!" <<
Endl;
1015 Log() <<
kFATAL <<
"<ErrorRateBin> Invalid start/end indices!" <<
Endl;
1026 const Event&
e = *(*events)[i];
1029 signF = (sF>0 ? +1:-1);
1042 std::vector<Double_t> & sFbkg )
1050 std::sort(sFsig.begin(), sFsig.end());
1051 std::sort(sFbkg.begin(), sFbkg.end());
1052 const Double_t minsig = sFsig.front();
1053 const Double_t minbkg = sFbkg.front();
1054 const Double_t maxsig = sFsig.back();
1055 const Double_t maxbkg = sFbkg.back();
1056 const Double_t minf = std::min(minsig,minbkg);
1057 const Double_t maxf = std::max(maxsig,maxbkg);
1060 const Int_t np = std::min((nsig+nbkg)/4,50);
1061 const Double_t df = (maxf-minf)/(np-1);
1066 std::vector<Double_t>::const_iterator indit;
1081 for (
Int_t i=0; i<np; i++) {
1083 indit = std::find_if( sFsig.begin(), sFsig.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1084 nesig = sFsig.end()-indit;
1087 indit = std::find_if( sFbkg.begin(), sFbkg.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1088 nrbkg = indit-sFbkg.begin();
1100 area += 0.5*(1+rejb)*effs;
1115 Log() <<
kWARNING <<
"<ErrorRateRoc> Should not be used in the current version! Check!" <<
Endl;
1118 Log() <<
kFATAL <<
"<ErrorRateRoc> Invalid start/end indices!" <<
Endl;
1125 std::vector<Double_t> sFsig;
1126 std::vector<Double_t> sFbkg;
1133 const Event&
e = *(*events)[i];
1136 sFsig.push_back(sF);
1141 sFbkg.push_back(sF);
1146 fsigave = sumfsig/sFsig.size();
1147 fbkgave = sumfbkg/sFbkg.size();
1165 Log() <<
kWARNING <<
"<ErrorRateRocTst> Should not be used in the current version! Check!" <<
Endl;
1168 Log() <<
kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1176 std::vector< std::vector<Double_t> > sFsig;
1177 std::vector< std::vector<Double_t> > sFbkg;
1189 sFsig[itau].push_back(sF);
1192 sFbkg[itau].push_back(sF);
1215 Log() <<
kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1231 if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1232 if (fGDErrTst[itau]<minx) {
1233 minx=fGDErrTst[itau];
1273 Log() <<
kFATAL <<
"<MakeTstGradientVector> Invalid start/end indices!" <<
Endl;
1297 const std::vector<UInt_t> *eventRuleMap=0;
1304 const Event *
e = (*events)[i];
1308 nrules = (*eventRuleMap).size();
1321 for (
UInt_t ir=0; ir<nrules; ir++) {
1322 rind = (*eventRuleMap)[ir];
1353 Double_t maxv = (maxr>maxl ? maxr:maxl);
1367 if (TMath::Abs(val)>=cthresh) {
1374 if (TMath::Abs(val)>=cthresh) {
1397 Log() <<
kFATAL <<
"<MakeGradientVector> Invalid start/end indices!" <<
Endl;
1417 const std::vector<UInt_t> *eventRuleMap=0;
1423 const Event *
e = (*events)[i];
1432 nrules = (*eventRuleMap).size();
1437 for (
UInt_t ir=0; ir<nrules; ir++) {
1438 rind = (*eventRuleMap)[ir];
1464 Double_t maxv = (maxr>maxl ? maxr:maxl);
1472 useRThresh = cthresh;
1473 useLThresh = cthresh;
1546 Log() <<
kFATAL <<
"<CalcAverageTruth> Invalid start/end indices!" <<
Endl;
1559 Log() <<
kVERBOSE <<
"Effective number of signal / background = " << ensig <<
" / " << enbkg <<
Endl;
static long int sum(long int i)
MsgLogger & Endl(MsgLogger &ml)
std::vector< std::vector< Double_t > > fGDCoefLinTst
std::vector< Double_t > fAverageSelectorPath
const std::vector< Double_t > & GetLinNorm() const
const std::vector< const TMVA::Event *> & GetTrainingEvents() const
Double_t EvalLinEventRaw(UInt_t vind, const Event &e, Bool_t norm) const
void MakeGradientVector()
make gradient vector
UInt_t GetNLinear() const
void EvaluateAveragePerf()
virtual Int_t Fill()
Fill all branches.
void FillCoefficients()
helper function to store the rule coefficients in local arrays
EMsgType GetMinType() const
const std::vector< UInt_t > & GetEventRuleMap(UInt_t evtidx) const
Double_t RiskPath() const
const std::vector< TMVA::Rule * > & GetRulesConst() const
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
std::vector< std::vector< Double_t > > fGradVecTst
void EvaluateAveragePath()
std::vector< Double_t > fAverageRulePath
Double_t GetGDValidEveFrac() const
Short_t Min(Short_t a, Short_t b)
std::vector< Double_t > fGDTauVec
std::vector< Double_t > fFstar
TString GetElapsedTime(Bool_t Scientific=kTRUE)
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
const std::vector< Double_t > & GetLinCoefficients() const
Double_t GetEventLinearValNorm(UInt_t i) const
void SetLinCoefficients(const std::vector< Double_t > &v)
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
#define rprev(otri1, otri2)
std::vector< std::vector< Double_t > > fGDCoefTst
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
Double_t GetEventRuleVal(UInt_t i) const
MsgLogger & Log() const
message logger
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk asessment
Double_t FStar() const
We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x) F(x) = FL(x) + FR(x) ...
Double_t CalcAverageTruth()
calulate the average truth
DataSetInfo & DataInfo() const
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
void SetMinType(EMsgType minType)
RuleEnsemble * GetRuleEnsemblePtr()
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
void SetMsgType(EMsgType t)
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Double_t GetGDPathEveFrac() const
std::vector< Char_t > fGDErrTstOK
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment...
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Double_t CoefficientRadius()
Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
std::vector< Double_t > fGDOfsTst
void SetOffset(Double_t v=0.0)
void ClearCoefficients(Double_t val=0)
const TMVA::Event * GetRuleMapEvent(UInt_t evtidx) const
Double_t Optimism()
implementation of eq.
char * Form(const char *fmt,...)
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
void ClearLinCoefficients(Double_t val=0)
Bool_t IsRuleMapOK() const
void SetLinCoefficient(UInt_t i, Double_t v)
void SetCoefficients(const std::vector< Double_t > &v)
set all rule coefficients
void InitNtuple()
initializes the ntuple
Double_t GetTrainingEventWeight(UInt_t i) const
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
virtual ~RuleFitParams()
destructor
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
std::vector< std::vector< Double_t > > fGradVecLinTst
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
RuleFitParams()
constructor
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
std::vector< Double_t > fGradVecLin
Double_t GetOffset() const
std::vector< Double_t > fGradVec
Double_t EvalLinEvent() const
Short_t Max(Short_t a, Short_t b)
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calulate the average response - TODO : rewrite bad dependancy on EvaluateAverage() ! ...
RuleEnsemble * fRuleEnsemble
std::vector< TMVA::Rule * > & GetRules()
Bool_t IsSignal(const Event *ev) const
void GetCoefficients(std::vector< Double_t > &v)
Retrieve all rule coefficients.
void InitGD()
Initialize GD path search.
A TTree object has a header with a name and a title.
Double_t EvalEvent() const
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
const MethodRuleFit * GetMethodRuleFit() const
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
std::vector< Double_t > fGDErrTst
Double_t Sqrt(Double_t x)
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
void MakeGDPath()
The following finds the gradient directed path in parameter space.
double norm(double *x, double *p)
Double_t RiskPerf() const
Int_t Type(const Event *e) const
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...