77 , fGDPathStep ( 0.01 )
78 , fGDNPathSteps ( 1000 )
103 if (fNTCoeff) {
delete fNTCoeff; fNTCoeff = 0; }
104 if (fNTLinCoeff) {
delete fNTLinCoeff;fNTLinCoeff = 0; }
113 if (fRuleFit==0)
return;
114 if (fRuleFit->GetMethodRuleFit()==0) {
115 Log() <<
kFATAL <<
"RuleFitParams::Init() - MethodRuleFit ptr is null" <<
Endl;
117 UInt_t neve = fRuleFit->GetTrainingEvents().size();
119 fRuleEnsemble = fRuleFit->GetRuleEnsemblePtr();
120 fNRules = fRuleEnsemble->GetNRules();
121 fNLinear = fRuleEnsemble->GetNLinear();
130 fPerfIdx2 =
static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
135 ofs = neve - fPerfIdx2 - 1;
145 fPathIdx2 =
static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
154 for (
UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
155 fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
159 for (
UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
160 fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
163 Log() <<
kVERBOSE <<
"Path constr. - event index range = [ " << fPathIdx1 <<
", " << fPathIdx2 <<
" ]"
164 <<
", effective N(events) = " << fNEveEffPath <<
Endl;
165 Log() <<
kVERBOSE <<
"Error estim. - event index range = [ " << fPerfIdx1 <<
", " << fPerfIdx2 <<
" ]"
166 <<
", effective N(events) = " << fNEveEffPerf <<
Endl;
168 if (fRuleEnsemble->DoRules())
169 Log() <<
kDEBUG <<
"Number of rules in ensemble: " << fNRules <<
Endl;
173 if (fRuleEnsemble->DoLinear())
174 Log() <<
kDEBUG <<
"Number of linear terms: " << fNLinear <<
Endl;
184 fGDNtuple=
new TTree(
"MonitorNtuple_RuleFitParams",
"RuleFit path search");
185 fGDNtuple->Branch(
"risk", &fNTRisk,
"risk/D");
186 fGDNtuple->Branch(
"error", &fNTErrorRate,
"error/D");
187 fGDNtuple->Branch(
"nuval", &fNTNuval,
"nuval/D");
188 fGDNtuple->Branch(
"coefrad", &fNTCoefRad,
"coefrad/D");
189 fGDNtuple->Branch(
"offset", &fNTOffset,
"offset/D");
191 fNTCoeff = (fNRules >0 ?
new Double_t[fNRules] : 0);
192 fNTLinCoeff = (fNLinear>0 ?
new Double_t[fNLinear] : 0);
194 for (
UInt_t i=0; i<fNRules; i++) {
195 fGDNtuple->Branch(
Form(
"a%d",i+1),&fNTCoeff[i],
Form(
"a%d/D",i+1));
197 for (
UInt_t i=0; i<fNLinear; i++) {
198 fGDNtuple->Branch(
Form(
"b%d",i+1),&fNTLinCoeff[i],
Form(
"b%d/D",i+1));
206 std::vector<Double_t> &avsel,
207 std::vector<Double_t> &avrul )
209 UInt_t neve = ind2-ind1+1;
211 Log() <<
kFATAL <<
"<EvaluateAverage> - no events selected for path search -> BUG!" <<
Endl;
217 if (fNLinear>0) avsel.resize(fNLinear,0);
218 if (fNRules>0) avrul.resize(fNRules,0);
219 const std::vector<UInt_t> *eventRuleMap=0;
225 if (fRuleEnsemble->IsRuleMapOK()) {
226 for (
UInt_t i=ind1; i<ind2+1; i++) {
227 ew = fRuleFit->GetTrainingEventWeight(i);
229 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
230 avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
234 if (fRuleEnsemble->DoRules()) {
235 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
236 nrules = (*eventRuleMap).size();
239 avrul[(*eventRuleMap)[
r]] += ew;
244 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
245 for (
UInt_t i=ind1; i<ind2+1; i++) {
246 ew = fRuleFit->GetTrainingEventWeight(i);
249 fRuleEnsemble->EvalLinEvent(*((*events)[i]));
250 fRuleEnsemble->EvalEvent(*((*events)[i]));
252 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
253 avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
257 avrul[
r] += ew*fRuleEnsemble->GetEventRuleVal(
r);
262 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
263 avsel[sel] = avsel[sel] / sumew;
267 avrul[
r] = avrul[
r] / sumew;
278 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)?1:-1) - h;
290 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) -
h;
292 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
301 Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
303 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) -
h;
305 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
313 UInt_t neve = ind2-ind1+1;
315 Log() <<
kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
320 for (
UInt_t i=ind1; i<ind2+1; i++) {
321 rval += LossFunction(i);
333 UInt_t neve = ind2-ind1+1;
335 Log() <<
kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
340 for (
UInt_t i=ind1; i<ind2+1; i++) {
341 rval += LossFunction(i,itau);
355 Log() <<
kWARNING <<
"<Penalty> Using unverified code! Check!" <<
Endl;
357 const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
358 for (
UInt_t i=0; i<fNRules; i++) {
359 rval +=
TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
361 for (
UInt_t i=0; i<fNLinear; i++) {
388 fGDTauVec.resize( fGDNTau );
390 fGDTauVec[0] = fGDTau;
394 Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
395 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
396 fGDTauVec[itau] =
static_cast<Double_t>(itau)*dtau + fGDTauMin;
397 if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
405 fGradVecLinTst.clear();
410 fGDCoefLinTst.clear();
414 fGDCoefTst.resize(fGDNTau);
415 fGradVec.resize(fNRules,0);
416 fGradVecTst.resize(fGDNTau);
417 for (
UInt_t i=0; i<fGDNTau; i++) {
418 fGradVecTst[i].resize(fNRules,0);
419 fGDCoefTst[i].resize(fNRules,0);
424 fGDCoefLinTst.resize(fGDNTau);
425 fGradVecLin.resize(fNLinear,0);
426 fGradVecLinTst.resize(fGDNTau);
427 for (
UInt_t i=0; i<fGDNTau; i++) {
428 fGradVecLinTst[i].resize(fNLinear,0);
429 fGDCoefLinTst[i].resize(fNLinear,0);
434 fGDErrTst.resize(fGDNTau,0);
435 fGDErrTstOK.resize(fGDNTau,
kTRUE);
436 fGDOfsTst.resize(fGDNTau,0);
437 fGDNTauTstOK = fGDNTau;
448 if (fGDNTau<2)
return 0;
449 if (fGDTauScan==0)
return 0;
451 if (fGDOfsTst.size()<1)
452 Log() <<
kFATAL <<
"BUG! FindGDTau() has been called BEFORE InitGD()." <<
Endl;
454 Log() <<
kINFO <<
"Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." <<
Endl;
457 UInt_t nscan = fGDTauScan;
477 MakeTstGradientVector();
479 UpdateTstCoefficients();
483 if ( (ip==0) || ((ip+1)%netst==0) ) {
485 itauMin = RiskPerfTst();
487 <<
" => error rate = " << fGDErrTst[itauMin] <<
Endl;
490 doloop = ((ip<nscan) && (fGDNTauTstOK>3));
500 Log() <<
kERROR <<
"<FindGDTau> number of scanned loops is zero! Should NOT see this message." <<
Endl;
502 fGDTau = fGDTauVec[itauMin];
503 fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
504 fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
505 fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
506 Log() <<
kINFO <<
"Best path found with tau = " <<
Form(
"%4.4f",fGDTau)
536 Log() <<
kINFO <<
"GD path scan - the scan stops when the max num. of steps is reached or a min is found"
538 Log() <<
kVERBOSE <<
"Number of events used per path step = " << fPathIdx2-fPathIdx1+1 <<
Endl;
539 Log() <<
kVERBOSE <<
"Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 <<
Endl;
549 EvaluateAveragePath();
550 EvaluateAveragePerf();
558 Log() <<
kVERBOSE <<
" tau range = [ " << fGDTauVec[0] <<
" , " << fGDTauVec[fGDNTau-1] <<
" ]" <<
Endl;
561 if (isDebug) InitNtuple();
573 std::vector<Double_t> coefsMin;
574 std::vector<Double_t> lincoefsMin;
589 std::vector<Double_t> valx;
590 std::vector<Double_t> valy;
591 std::vector<Double_t> valxy;
601 int imod = fGDNPathSteps/100;
602 if (imod<100) imod =
std::min(100,fGDNPathSteps);
603 if (imod>100) imod=100;
606 fAverageTruth = -CalcAverageTruth();
607 offsetMin = fAverageTruth;
608 fRuleEnsemble->SetOffset(offsetMin);
609 fRuleEnsemble->ClearCoefficients(0);
610 fRuleEnsemble->ClearLinCoefficients(0);
611 for (
UInt_t i=0; i<fGDOfsTst.size(); i++) {
612 fGDOfsTst[i] = offsetMin;
614 Log() <<
kVERBOSE <<
"Obtained initial offset = " << offsetMin <<
Endl;
617 Int_t nprescan = FindGDTau();
632 Int_t stopCondition=0;
639 if (isVerbose) t0 = clock();
640 MakeGradientVector();
642 tgradvec =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
643 stgradvec += tgradvec;
647 if (isVerbose) t0 = clock();
648 UpdateCoefficients();
650 tupgrade =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
651 stupgrade += tupgrade;
655 docheck = ((iloop==0) ||((iloop+1)%imod==0));
661 fNTNuval =
Double_t(iloop)*fGDPathStep;
666 if (isDebug) FillCoefficients();
667 fNTCoefRad = fRuleEnsemble->CoefficientRadius();
671 fNTRisk = RiskPath();
672 trisk =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
679 if (fNTRisk>=rprev) {
683 riskFlat=(nbadrisk>3);
687 Log() <<
kWARNING <<
"This may be OK if minimum is already found" <<
Endl;
697 if (isVerbose) t0 = clock();
707 fNTErrorRate = errroc;
710 tperf =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
717 if (fNTErrorRate<=errmin) {
718 errmin = fNTErrorRate;
721 fRuleEnsemble->GetCoefficients(coefsMin);
722 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
723 offsetMin = fRuleEnsemble->GetOffset();
725 if ( fNTErrorRate > fGDErrScale*errmin) found =
kTRUE;
729 if (valx.size()==npreg) {
730 valx.erase(valx.begin());
731 valy.erase(valy.begin());
732 valxy.erase(valxy.begin());
734 valx.push_back(fNTNuval);
735 valy.push_back(fNTErrorRate);
736 valxy.push_back(fNTErrorRate*fNTNuval);
741 if (isDebug) fGDNtuple->Fill();
745 <<
Form(
"%8d",iloop+1) <<
" "
746 <<
Form(
"%4.4f",fNTRisk) <<
" "
747 <<
Form(
"%4.4f",riskPerf) <<
" "
748 <<
Form(
"%4.4f",fNTRisk+riskPerf) <<
" "
764 Bool_t endOfLoop = (iloop==fGDNPathSteps);
765 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
769 else if (endOfLoop) {
773 Log() <<
kWARNING <<
"BUG TRAP: should not be here - still, this bug is harmless;)" <<
Endl;
774 errmin = fNTErrorRate;
777 fRuleEnsemble->GetCoefficients(coefsMin);
778 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
779 offsetMin = fRuleEnsemble->GetOffset();
786 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
787 Log() <<
kINFO <<
"Found minimum at step " << indMin+1 <<
" with error = " << errmin <<
Endl;
788 Log() <<
kINFO <<
"Reason for ending loop: ";
789 switch (stopCondition) {
791 Log() <<
kINFO <<
"clear minima found";
794 Log() <<
kINFO <<
"chaotic behaviour of risk";
797 Log() <<
kINFO <<
"end of loop reached";
804 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
809 Log() <<
kWARNING <<
"Check results and maybe decrease GDStep size" <<
Endl;
819 Log() <<
kINFO <<
"The error rate was still decreasing at the end of the path" <<
Endl;
820 Log() <<
kINFO <<
"Increase number of steps (GDNSteps)." <<
Endl;
826 fRuleEnsemble->SetCoefficients( coefsMin );
827 fRuleEnsemble->SetLinCoefficients( lincoefsMin );
828 fRuleEnsemble->SetOffset( offsetMin );
831 Log() <<
kFATAL <<
"BUG TRAP: minimum not found in MakeGDPath()" <<
Endl;
838 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
856 if (isDebug) fGDNtuple->
Write();
864 fNTOffset = fRuleEnsemble->GetOffset();
866 for (
UInt_t i=0; i<fNRules; i++) {
867 fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
869 for (
UInt_t i=0; i<fNLinear; i++) {
870 fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
882 Log() <<
kWARNING <<
"<CalcFStar> Using unverified code! Check!" <<
Endl;
883 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
885 Log() <<
kFATAL <<
"<CalcFStar> Invalid start/end indices!" <<
Endl;
889 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
892 std::vector<Double_t> fstarSorted;
895 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
896 const Event& e = *(*events)[i];
897 fstarVal = fRuleEnsemble->FStar(e);
898 fFstar.push_back(fstarVal);
899 fstarSorted.push_back(fstarVal);
903 std::sort( fstarSorted.begin(), fstarSorted.end() );
906 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
909 fFstarMedian = fstarSorted[ind];
923 Log() <<
kWARNING <<
"<Optimism> Using unverified code! Check!" <<
Endl;
924 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
926 Log() <<
kFATAL <<
"<Optimism> Invalid start/end indices!" <<
Endl;
929 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
940 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
941 const Event& e = *(*events)[i];
942 yhat = fRuleEnsemble->EvalEvent(i);
943 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? 1.0:-1.0);
944 w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf;
947 sumyhaty += w*yhat*
y;
952 Double_t cov = sumyhaty - sumyhat*sumy;
965 Log() <<
kWARNING <<
"<ErrorRateReg> Using unverified code! Check!" <<
Endl;
966 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
968 Log() <<
kFATAL <<
"<ErrorRateReg> Invalid start/end indices!" <<
Endl;
970 if (fFstar.size()!=neve) {
971 Log() <<
kFATAL <<
"--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
972 <<
" Fstar.size() = " << fFstar.size() <<
" , N(events) = " << neve <<
Endl;
977 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
986 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
987 const Event& e = *(*events)[i];
988 sF = fRuleEnsemble->EvalEvent( e );
990 sumdf +=
TMath::Abs(fFstar[i-fPerfIdx1] - sF);
991 sumdfmed +=
TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
996 return sumdf/sumdfmed;
1010 Log() <<
kWARNING <<
"<ErrorRateBin> Using unverified code! Check!" <<
Endl;
1011 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1013 Log() <<
kFATAL <<
"<ErrorRateBin> Invalid start/end indices!" <<
Endl;
1016 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1023 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1024 const Event& e = *(*events)[i];
1025 sF = fRuleEnsemble->EvalEvent( e );
1027 signF = (sF>0 ? +1:-1);
1029 signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? +1:-1);
1040 std::vector<Double_t> & sFbkg )
1048 std::sort(sFsig.begin(), sFsig.end());
1049 std::sort(sFbkg.begin(), sFbkg.end());
1050 const Double_t minsig = sFsig.front();
1051 const Double_t minbkg = sFbkg.front();
1052 const Double_t maxsig = sFsig.back();
1053 const Double_t maxbkg = sFbkg.back();
1059 const Double_t df = (maxf-minf)/(np-1);
1064 std::vector<Double_t>::const_iterator indit;
1081 indit = std::find_if( sFsig.begin(), sFsig.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1082 nesig = sFsig.end()-indit;
1085 indit = std::find_if( sFbkg.begin(), sFbkg.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1086 nrbkg = indit-sFbkg.begin();
1098 area += 0.5*(1+rejb)*effs;
1113 Log() <<
kWARNING <<
"<ErrorRateRoc> Should not be used in the current version! Check!" <<
Endl;
1114 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1116 Log() <<
kFATAL <<
"<ErrorRateRoc> Invalid start/end indices!" <<
Endl;
1119 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1123 std::vector<Double_t> sFsig;
1124 std::vector<Double_t> sFbkg;
1130 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1131 const Event& e = *(*events)[i];
1132 sF = fRuleEnsemble->EvalEvent(i);
1133 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)) {
1134 sFsig.push_back(sF);
1139 sFbkg.push_back(sF);
1144 fsigave = sumfsig/sFsig.size();
1145 fbkgave = sumfbkg/sFbkg.size();
1149 return ErrorRateRocRaw( sFsig, sFbkg );
1163 Log() <<
kWARNING <<
"<ErrorRateRocTst> Should not be used in the current version! Check!" <<
Endl;
1164 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1166 Log() <<
kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1170 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1174 std::vector< std::vector<Double_t> > sFsig;
1175 std::vector< std::vector<Double_t> > sFbkg;
1177 sFsig.resize( fGDNTau );
1178 sFbkg.resize( fGDNTau );
1181 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1182 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1185 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1186 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1187 sFsig[itau].push_back(sF);
1190 sFbkg[itau].push_back(sF);
1196 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1197 err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1198 fGDErrTst[itau] = err;
1211 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1213 Log() <<
kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1223 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1224 if (fGDErrTstOK[itau]) {
1226 fGDErrTst[itau] = RiskPerf(itau);
1227 sumx += fGDErrTst[itau];
1228 sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1229 if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1230 if (fGDErrTst[itau]<minx) {
1231 minx=fGDErrTst[itau];
1241 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1242 if (fGDErrTstOK[itau]) {
1243 if (fGDErrTst[itau] > maxacc) {
1244 fGDErrTstOK[itau] =
kFALSE;
1269 UInt_t neve = fPathIdx1-fPathIdx2+1;
1271 Log() <<
kFATAL <<
"<MakeTstGradientVector> Invalid start/end indices!" <<
Endl;
1277 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1280 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1281 if (fGDErrTstOK[itau]) {
1282 for (
UInt_t ir=0; ir<fNRules; ir++) {
1283 fGradVecTst[itau][ir]=0;
1285 for (
UInt_t il=0; il<fNLinear; il++) {
1286 fGradVecLinTst[itau][il]=0;
1295 const std::vector<UInt_t> *eventRuleMap=0;
1301 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1302 const Event *e = (*events)[i];
1304 if (fRuleEnsemble->DoRules()) {
1305 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1306 nrules = (*eventRuleMap).size();
1308 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1311 if (fGDErrTstOK[itau]) {
1312 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1316 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1317 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1319 for (
UInt_t ir=0; ir<nrules; ir++) {
1320 rind = (*eventRuleMap)[ir];
1321 fGradVecTst[itau][rind] +=
r;
1324 for (
UInt_t il=0; il<fNLinear; il++) {
1325 fGradVecLinTst[itau][il] += r*fRuleEnsemble->EvalLinEventRaw( il,i,
kTRUE );
1342 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1343 if (fGDErrTstOK[itau]) {
1345 maxr = ( (fNRules>0 ?
1346 TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(),
AbsValue()))):0) );
1347 maxl = ( (fNLinear>0 ?
1348 TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(),
AbsValue()))):0) );
1351 Double_t maxv = (maxr>maxl ? maxr:maxl);
1352 cthresh = maxv * fGDTauVec[itau];
1362 for (
UInt_t i=0; i<fNRules; i++) {
1363 val = fGradVecTst[itau][i];
1365 if (TMath::Abs(val)>=cthresh) {
1366 fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1370 for (
UInt_t i=0; i<fNLinear; i++) {
1371 val = fGradVecLinTst[itau][i];
1372 if (TMath::Abs(val)>=cthresh) {
1373 fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1380 CalcTstAverageResponse();
1393 UInt_t neve = fPathIdx2-fPathIdx1+1;
1395 Log() <<
kFATAL <<
"<MakeGradientVector> Invalid start/end indices!" <<
Endl;
1401 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1404 for (
UInt_t ir=0; ir<fNRules; ir++) {
1407 for (
UInt_t il=0; il<fNLinear; il++) {
1415 const std::vector<UInt_t> *eventRuleMap=0;
1420 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1421 const Event *e = (*events)[i];
1424 sF = fRuleEnsemble->EvalEvent( i );
1428 if (fRuleEnsemble->DoRules()) {
1429 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1430 nrules = (*eventRuleMap).size();
1432 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1433 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1435 for (
UInt_t ir=0; ir<nrules; ir++) {
1436 rind = (*eventRuleMap)[ir];
1437 fGradVec[rind] +=
r;
1442 for (
UInt_t il=0; il<fNLinear; il++) {
1443 fGradVecLin[il] += r*fRuleEnsemble->EvalLinEventRaw( il, i,
kTRUE );
1457 Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1459 Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1460 TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(),
AbsValue()))):0) );
1462 Double_t maxv = (maxr>maxl ? maxr:maxl);
1470 useRThresh = cthresh;
1471 useLThresh = cthresh;
1479 for (
UInt_t i=0; i<fGradVec.size(); i++) {
1482 coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1483 fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1488 for (
UInt_t i=0; i<fGradVecLin.size(); i++) {
1489 lval = fGradVecLin[i];
1491 lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
1492 fRuleEnsemble->SetLinCoefficient(i,lcoef);
1497 fRuleEnsemble->SetOffset( offset );
1507 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1508 if (fGDErrTstOK[itau]) {
1509 fGDOfsTst[itau] = 0;
1510 for (
UInt_t s=0; s<fNLinear; s++) {
1511 fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
1514 fGDOfsTst[itau] -= fGDCoefTst[itau][
r] * fAverageRulePath[
r];
1529 for (
UInt_t s=0; s<fNLinear; s++) {
1530 ofs -= fRuleEnsemble->GetLinCoefficients(s) * fAverageSelectorPath[s];
1533 ofs -= fRuleEnsemble->GetRules(
r)->GetCoefficient() * fAverageRulePath[
r];
1543 if (fPathIdx2<=fPathIdx1) {
1544 Log() <<
kFATAL <<
"<CalcAverageTruth> Invalid start/end indices!" <<
Endl;
1550 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1551 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1552 Double_t ew = fRuleFit->GetTrainingEventWeight(i);
1553 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1555 sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1557 Log() <<
kVERBOSE <<
"Effective number of signal / background = " << ensig <<
" / " << enbkg <<
Endl;
1559 return sum/fNEveEffPath;
1565 return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1);
1572 fLogger->SetMinType(t);
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
MsgLogger & Endl(MsgLogger &ml)
void MakeGradientVector()
make gradient vector
void FillCoefficients()
helper function to store the rule coefficients in local arrays
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
Short_t Min(Short_t a, Short_t b)
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
#define rprev(otri1, otri2)
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
Double_t CalcAverageTruth()
calulate the average truth
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
void SetMsgType(EMsgType t)
TString GetElapsedTime(Bool_t Scientific=kTRUE)
Int_t Type(const Event *e) const
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment...
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Double_t Optimism()
implementation of eq.
char * Form(const char *fmt,...)
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
void InitNtuple()
initializes the ntuple
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
virtual ~RuleFitParams()
destructor
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
RuleFitParams()
constructor
Short_t Max(Short_t a, Short_t b)
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calulate the average response - TODO : rewrite bad dependancy on EvaluateAverage() ! ...
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk asessment
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
void InitGD()
Initialize GD path search.
A TTree object has a header with a name and a title.
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Double_t Sqrt(Double_t x)
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
void MakeGDPath()
The following finds the gradient directed path in parameter space.
double norm(double *x, double *p)
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...