53 : fLearningModel ( kFull )
54 , fImportanceCut ( 0 )
55 , fLinQuantile ( 0.025 )
57 , fAverageSupport ( 0.8 )
58 , fAverageRuleSigma( 0.4 )
62 , fRuleMinDist ( 1
e-3 )
63 , fNRulesGenerated ( 0 )
65 , fEventCacheOK ( true )
69 , fRuleMapEvents ( 0 )
120 for ( std::vector<Rule *>::iterator itrRule =
fRules.begin(); itrRule !=
fRules.end(); itrRule++ ) {
143 for (
UInt_t i=0; i<nvars; i++) {
201 if (ncoeffs<1)
return 0;
205 for (
Int_t i=0; i<ncoeffs; i++) {
206 val =
fRules[i]->GetCoefficient();
219 for (
UInt_t i=0; i<nrules; i++) {
220 fRules[i]->SetCoefficient(0.0);
230 if (v.size()!=nrules) {
231 Log() <<
kFATAL <<
"<SetCoefficients> - BUG TRAP - input vector worng size! It is = " << v.size()
232 <<
" when it should be = " << nrules <<
Endl;
234 for (
UInt_t i=0; i<nrules; i++) {
235 fRules[i]->SetCoefficient(v[i]);
246 if (nrules==0)
return;
248 for (
UInt_t i=0; i<nrules; i++) {
249 v[i] = (
fRules[i]->GetCoefficient());
278 std::vector< Char_t > removeMe( nrulesIn,
false );
284 for (
UInt_t i=0; i<nrulesIn; i++) {
287 for (
UInt_t k=i+1; k<nrulesIn; k++) {
293 remind = (r>0.5 ? k:i);
300 if (!removeMe[remind]) {
301 removeMe[remind] =
true;
311 for (
UInt_t i=0; i<nrulesIn; i++) {
325 Log() <<
kVERBOSE <<
"Removed " << nrulesIn - nrulesOut <<
" out of " << nrulesIn <<
" rules" <<
Endl;
334 if (nrules==0)
return;
342 for (
UInt_t i=0; i<nrules; i++) {
355 Log() <<
kINFO <<
"Removed " << nrules-ind <<
" out of a total of " << nrules
369 for (
UInt_t i=0; i<nlin; i++) {
391 if ((nrules>0) && (events->size()>0)) {
392 for ( std::vector< Rule * >::iterator itrRule=
fRules.begin(); itrRule!=
fRules.end(); itrRule++ ) {
396 for ( std::vector<const Event * >::const_iterator itrEvent=events->begin(); itrEvent!=events->end(); itrEvent++ ) {
397 if ((*itrRule)->EvalEvent( *(*itrEvent) )) {
398 ew = (*itrEvent)->GetWeight();
407 t = (t<0 ? 0:
sqrt(t));
412 (*itrRule)->SetSupport(s);
413 (*itrRule)->SetNorm(t);
414 (*itrRule)->SetSSB( ssb );
415 (*itrRule)->SetSSBNeve(
Double_t(ssig+sbkg));
432 Double_t maxImp = (maxRuleImp>maxLinImp ? maxRuleImp : maxLinImp);
442 fRules[i]->SetImportanceRef(impref);
454 for (
int i=0; i<nrules; i++ ) {
455 fRules[i]->CalcImportance();
456 imp =
fRules[i]->GetImportance();
457 if (imp>maxImp) maxImp = imp;
459 for (
Int_t i=0; i<nrules; i++ ) {
460 fRules[i]->SetImportanceRef(maxImp);
484 for (
UInt_t i=0; i<nvars; i++ ) {
487 if (imp>maxImp) maxImp = imp;
509 for (
UInt_t ind=0; ind<nrules; ind++ ) {
510 rimp =
fRules[ind]->GetImportance();
511 nvarsUsed =
fRules[ind]->GetNumVarsUsed();
513 Log() <<
kFATAL <<
"<CalcVarImportance> Variables for importance calc!!!??? A BUG!" <<
Endl;
514 rimpN = (nvarsUsed > 0 ? rimp/nvarsUsed:0.0);
515 for (
UInt_t iv=0; iv<nvars; iv++ ) {
516 if (
fRules[ind]->ContainsVariable(iv)) {
532 for (
UInt_t iv=0; iv<nvars; iv++ ) {
536 for (
UInt_t iv=0; iv<nvars; iv++ ) {
551 fRules.resize(rules.size());
576 UInt_t ntrees = forest.size();
577 for (
UInt_t ind=0; ind<ntrees; ind++ ) {
581 nendn = (nrules/2) + 1;
583 sumn2 += nendn*nendn;
584 nrulesCheck += nrules;
586 Double_t nmean = (ntrees>0) ? sumnendn/ntrees : 0;
588 Double_t ndev = 2.0*(nmean-2.0-nsigm)/(nmean-2.0+nsigm);
590 Log() <<
kVERBOSE <<
"Average number of end nodes per tree = " << nmean <<
Endl;
591 if (ntrees>1)
Log() <<
kVERBOSE <<
"sigma of ditto ( ~= mean-2 ?) = " 594 Log() <<
kVERBOSE <<
"Deviation from exponential model = " << ndev <<
Endl;
595 Log() <<
kVERBOSE <<
"Corresponds to L (eq. 13, RuleFit ppr) = " << nmean <<
Endl;
597 if (nrulesCheck != static_cast<Int_t>(
fRules.size())) {
599 <<
"BUG! number of generated and possible rules do not match! N(rules) = " <<
fRules.size()
600 <<
" != " << nrulesCheck <<
Endl;
624 UInt_t neve = events->size();
625 UInt_t nvars = ((*events)[0])->GetNVariables();
627 typedef std::pair< Double_t, Int_t> dataType;
628 typedef std::pair< Double_t, dataType > dataPoint;
630 std::vector< std::vector<dataPoint> > vardata(nvars);
631 std::vector< Double_t > varsum(nvars,0.0);
632 std::vector< Double_t > varsum2(nvars,0.0);
637 for (
UInt_t i=0; i<neve; i++) {
638 ew = ((*events)[i])->GetWeight();
640 val = ((*events)[i])->GetValue(
v);
641 vardata[
v].push_back( dataPoint( val, dataType(ew,((*events)[i])->
GetClass()) ) );
671 std::sort( vardata[
v].begin(),vardata[
v].end() );
676 while ( (ie<neve) && (neff<nquant) ) {
677 neff += vardata[
v][ie].second.first;
680 indquantM = (ie==0 ? 0:ie-1);
684 while ( (ie>0) && (neff<nquant) ) {
686 neff += vardata[
v][ie].second.first;
688 indquantP = (ie==neve ? ie=neve-1:ie);
690 fLinDM[
v] = vardata[
v][indquantM].first;
691 fLinDP[
v] = vardata[
v][indquantP].first;
705 for (ie=0; ie<neve; ie++) {
706 val = vardata[
v][ie].first;
707 ew = vardata[
v][ie].second.first;
708 type = vardata[
v][ie].second.second;
711 varsum2[
v] += ew*lx*lx;
751 fstot +=
fLinPDFS[
v]->GetBinContent(bin);
752 fbtot +=
fLinPDFB[
v]->GetBinContent(bin);
754 if (nvars<1)
return 0;
755 ntot = (fstot+fbtot)/
Double_t(nvars);
757 return fstot/(fstot+fbtot);
775 for (
UInt_t ir=0; ir<nrules; ir++) {
788 if (ntot>0)
return nsig/ntot;
826 if ((nlt>0) && (nrt>0)) nt=2.0;
839 const UInt_t neve = events->size();
842 const Event *eveData;
858 std::vector<Int_t> varcnt;
866 varcnt.resize(nvars,0);
870 for (
UInt_t i=0; i<nrules; i++ ) {
872 if (
fRules[i]->ContainsVariable(
v)) varcnt[
v]++;
874 sigRule =
fRules[i]->IsSignalRule();
889 eveData = (*events)[
e];
890 tagged =
fRules[i]->EvalEvent(*eveData);
891 sigTag = (tagged && sigRule);
892 bkgTag = (tagged && (!sigRule));
894 sigTrue = (eveData->
GetClass() == 0);
897 if (sigTag && sigTrue) nss++;
898 if (sigTag && !sigTrue) nsb++;
899 if (bkgTag && sigTrue) nbs++;
900 if (bkgTag && !sigTrue) nbb++;
904 if (ntag>0 && neve > 0) {
913 fRuleFSig = (nsig>0) ? static_cast<Double_t>(nsig)/
static_cast<Double_t>(nsig+nbkg) : 0;
928 for (
UInt_t i=0; i<nrules; i++ ) {
946 Log() <<
kHEADER <<
"-------------------RULE ENSEMBLE SUMMARY------------------------" <<
Endl;
948 if (mrf)
Log() <<
kINFO <<
"Tree training method : " << (mrf->
UseBoost() ?
"AdaBoost":
"Random") << Endl;
956 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
969 Log() << kmtype <<
"================================================================" <<
Endl;
970 Log() << kmtype <<
" M o d e l " <<
Endl;
971 Log() << kmtype <<
"================================================================" <<
Endl;
986 << std::resetiosflags(std::ios::right)
995 Log() << kmtype <<
"------------------------------------" <<
Endl;
996 Log() << kmtype <<
"Linear model (weights unnormalised)" <<
Endl;
997 Log() << kmtype <<
"------------------------------------" <<
Endl;
998 Log() << kmtype << std::setw(maxL) <<
"Variable" 999 << std::resetiosflags(std::ios::right) <<
" : " 1000 << std::setw(11) <<
" Weights" 1001 << std::resetiosflags(std::ios::right) <<
" : " 1003 << std::resetiosflags(std::ios::right)
1005 Log() << kmtype <<
"------------------------------------" <<
Endl;
1010 << std::resetiosflags(std::ios::right)
1015 Log() << kmtype <<
"-> importance below threshhold = " 1019 Log() << kmtype <<
"------------------------------------" <<
Endl;
1022 else Log() << kmtype <<
"Linear terms were disabled" <<
Endl;
1024 if ((!
DoRules()) || (nrules==0)) {
1026 Log() << kmtype <<
"Rule terms were disabled" <<
Endl;
1029 Log() << kmtype <<
"Eventhough rules were included in the model, none passed! " << nrules <<
Endl;
1033 Log() << kmtype <<
"Number of rules = " << nrules <<
Endl;
1038 Log() << kmtype <<
"Fraction of rules containing a variable (%):" <<
Endl;
1047 std::list< std::pair<double,int> > sortedImp;
1048 for (
Int_t i=0; i<nrules; i++) {
1049 sortedImp.push_back( std::pair<double,int>(
fRules[i]->GetImportance(),i ) );
1053 Log() << kmtype <<
"Printing the first " << printN <<
" rules, ordered in importance." <<
Endl;
1055 for ( std::list< std::pair<double,int> >::reverse_iterator itpair = sortedImp.rbegin();
1056 itpair != sortedImp.rend(); itpair++ ) {
1057 ind = itpair->second;
1061 fRules[ind]->PrintLogger(
Form(
"Rule %4d : ",pind+1));
1064 if (nrules==printN) {
1065 Log() << kmtype <<
"All rules printed" <<
Endl;
1068 Log() << kmtype <<
"Skipping the next " << nrules-printN <<
" rules" <<
Endl;
1074 Log() << kmtype <<
"================================================================" <<
Endl;
1083 Int_t dp = os.precision();
1091 os <<
"Offset= " <<
fOffset << std::endl;
1092 os <<
"NRules= " << nrules << std::endl;
1093 for (
UInt_t i=0; i<nrules; i++){
1094 os <<
"***Rule " << i << std::endl;
1099 os <<
"NLinear= " <<
fLinTermOK.size() << std::endl;
1100 for (
UInt_t i=0; i<nlinear; i++) {
1101 os <<
"***Linear " << i << std::endl;
1102 os << std::setprecision(10) << (
fLinTermOK[i] ? 1:0) <<
" " 1109 os << std::setprecision(dp);
1131 for (
UInt_t i=0; i<nlinear; i++) {
1151 Int_t iLearningModel;
1166 for (i=0; i<nrules; i++) {
1168 fRules[i]->SetRuleEnsemble(
this );
1169 fRules[i]->ReadFromXML( ch );
1178 fLinDP .resize( nlinear );
1179 fLinDM .resize( nlinear );
1215 istr >> dummy >> nrules;
1221 for (
UInt_t i=0; i<nrules; i++){
1222 istr >> dummy >> idum;
1224 (
fRules.back())->SetRuleEnsemble(
this );
1233 istr >> dummy >> nlinear;
1238 fLinDP .resize( nlinear );
1239 fLinDM .resize( nlinear );
1244 for (
UInt_t i=0; i<nlinear; i++) {
1245 istr >> dummy >> idum;
1261 if(
this != &other) {
1288 if (dtree==0)
return 0;
1290 Int_t nendnodes = 0;
1292 return 2*(nendnodes-1);
1300 if (node==0)
return;
1325 if (node==0)
return;
1333 fRules.push_back( rule );
1338 Log() <<
kFATAL <<
"<AddRule> - ERROR failed in creating a rule! BUG!" <<
Endl;
1355 Log() <<
kFATAL <<
"<MakeTheRule> Input node is NULL. Should not happen. BUG!" <<
Endl;
1363 std::vector< const Node * > nodeVec;
1364 const Node *parent = node;
1369 nodeVec.push_back( node );
1372 if (!parent)
continue;
1375 nodeVec.insert( nodeVec.begin(), parent );
1378 if (nodeVec.size()<2) {
1379 Log() <<
kFATAL <<
"<MakeTheRule> BUG! Inconsistent Rule!" <<
Endl;
1382 Rule *rule =
new Rule(
this, nodeVec );
1395 if ((ifirst==0) || (ilast==0) || (ifirst>ilast)) {
1397 ilast = events->size()-1;
1423 std::vector<UInt_t> ruleind;
1425 for (
UInt_t i=ifirst; i<=ilast; i++) {
1435 Log() <<
kVERBOSE <<
"Made rule map for event# " << ifirst <<
" : " << ilast <<
Endl;
1443 os <<
"DON'T USE THIS - TO BE REMOVED" << std::endl;
void MakeRuleMap(const std::vector< const TMVA::Event *> *events=0, UInt_t ifirst=0, UInt_t ilast=0)
Makes rule map for all events.
MsgLogger & Endl(MsgLogger &ml)
void SetEvent(const Event &e)
std::vector< TH1F *> fLinPDFS
const std::vector< const TMVA::Event *> & GetTrainingEvents() const
bool equal(double d1, double d2, double stol=10000)
RuleEnsemble()
constructor
Rule * MakeTheRule(const Node *node)
Make a Rule from a given Node.
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Int_t CalcNRules(const TMVA::DecisionTree *dtree)
calculate the number of rules
EMsgType GetMinType() const
Double_t CalcLinNorm(Double_t stdev)
virtual ~RuleEnsemble()
destructor
const std::vector< TMVA::Rule * > & GetRulesConst() const
std::vector< Double_t > fLinDP
const std::vector< Double_t > & GetVarImportance() const
Short_t Min(Short_t a, Short_t b)
std::vector< Double_t > fRulePBB
const Event * GetTrainingEvent(UInt_t i) const
void CleanupLinear()
cleanup linear model
virtual DecisionTreeNode * GetRoot() const
const TString & GetInputLabel(Int_t i) const
std::vector< TMVA::Rule *> fRules
std::vector< Char_t > fLinTermOK
void SetAverageRuleSigma(Double_t v)
const std::vector< const TMVA::DecisionTree * > & GetForest() const
void SetMsgType(EMsgType t)
void SetImportanceRef(Double_t impref)
set reference importance
void RuleResponseStats()
calculate various statistics for this rule
Double_t GetRuleMinDist() const
std::vector< Double_t > fLinNorm
void RemoveSimilarRules()
remove rules that behave similar
void Copy(RuleEnsemble const &other)
copy function
virtual Node * GetRight() const
Double_t PdfRule(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for rules.
virtual Node * GetLeft() const
Double_t FStar() const
We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x) F(x) = FL(x) + FR(x) ...
void SetRules(const std::vector< TMVA::Rule *> &rules)
set rules
void Print() const
print function
std::vector< TH1F *> fLinPDFB
void SetMinType(EMsgType minType)
std::vector< Double_t > fRulePSB
void CalcImportance()
calculate the importance of each rule
ELearningModel GetLearningModel() const
std::vector< Double_t > fLinCoefficients
virtual Double_t Rndm()
Machine independent random number generator.
void CleanupRules()
cleanup rules
Double_t GetImportanceCut() const
void * AddXMLTo(void *parent) const
write rules to XML
virtual Node * GetParent() const
const MethodBase * GetMethodBase() const
void CalcVarImportance()
Calculates variable importance using eq (35) in RuleFit paper by Friedman et.al.
ELearningModel fLearningModel
Double_t CalcRuleImportance()
calculate importance of each rule
void PrintRuleGen() const
print rule generation info
std::vector< Double_t > fLinDM
void MakeRulesFromTree(const DecisionTree *dtree)
create rules from the decsision tree structure
Double_t CoefficientRadius()
Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
void AddRule(const Node *node)
add a new rule to the tree
char * Form(const char *fmt,...)
MsgLogger & Log() const
message logger
Double_t PdfLinear(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for the linear terms.
void RuleStatistics()
calculate various statistics for this rule
const RuleFit * GetRuleFit() const
R__EXTERN TRandom * gRandom
std::ostream & operator<<(std::ostream &os, const BinaryTree &tree)
print the tree recursinvely using the << operator
void ReadFromXML(void *wghtnode)
read rules from XML
const std::vector< const TMVA::Event * > * GetTrainingEvents() const
get list of training events from the rule fitter
Double_t GetLinQuantile() const
void PrintRaw(std::ostream &os) const
write rules to stream
std::vector< Char_t > fEventRuleVal
void FindNEndNodes(const TMVA::Node *node, Int_t &nendnodes)
find the number of leaf nodes
Double_t GetNEveEff() const
void Initialize(const RuleFit *rf)
Initializes all member variables with default values.
void SetCoefficients(const std::vector< Double_t > &v)
set all rule coefficients
std::vector< Double_t > fVarImportance
std::vector< Double_t > fEventLinearVal
static RooMathCoreReg dummy
void MakeLinearTerms()
Make the linear terms as in eq 25, ref 2 For this the b and (1-b) quatiles are needed.
std::vector< Double_t > fLinImportance
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Double_t fAverageRuleSigma
void MakeRules(const std::vector< const TMVA::DecisionTree *> &forest)
Makes rules from the given decision tree.
std::vector< Double_t > fRulePBS
void CalcRuleSupport()
calculate the support for all rules
const std::vector< const TMVA::Event * > * fRuleMapEvents
const MethodBase * GetMethodBase() const
Get a pointer to the original MethodRuleFit.
std::vector< Double_t > fRulePSS
void MakeModel()
create model
Double_t GetOffset() const
void SetMsgType(EMsgType t)
Short_t Max(Short_t a, Short_t b)
const MethodRuleFit * GetMethodRuleFit() const
Get a pointer to the original MethodRuleFit.
void ResetCoefficients()
reset all rule coefficients
void GetCoefficients(std::vector< Double_t > &v)
Retrieve all rule coefficients.
Double_t EvalEvent() const
Short_t GetSelector() const
const MethodRuleFit * GetMethodRuleFit() const
Double_t Sqrt(Double_t x)
Double_t CalcLinImportance()
calculate the linear importance for each rule
UInt_t GetNTreeSample() const
std::vector< Double_t > fRulePTag
const Event * GetTrainingEvent(UInt_t i) const
get the training event from the rule fitter
Bool_t Equal(const Rule &other, Bool_t useCutValue, Double_t maxdist) const
Compare two rules.
std::vector< Double_t > fRuleVarFrac
std::vector< std::vector< UInt_t > > fRuleMap
void ReadRaw(std::istream &istr)
read rule ensemble from stream