#include <algorithm>
#include <list>
#include "Riostream.h"
#include "TRandom3.h"
#include "TMath.h"
#include "TMatrix.h"
#include "TDirectory.h"
#include "TMVA/ClassifierFactory.h"
#include "TMVA/GiniIndex.h"
#include "TMVA/CrossEntropy.h"
#include "TMVA/SdivSqrtSplusB.h"
#include "TMVA/SeparationBase.h"
#include "TMVA/MisClassificationError.h"
#include "TMVA/MethodRuleFit.h"
#include "TMVA/RuleFitAPI.h"
#include "TMVA/Tools.h"
#include "TMVA/Timer.h"
#include "TMVA/Ranking.h"
#include "TMVA/Config.h"
#include "TMVA/MsgLogger.h"
REGISTER_METHOD(RuleFit)
ClassImp(TMVA::MethodRuleFit)
TMVA::MethodRuleFit::MethodRuleFit( const TString& jobName,
const TString& methodTitle,
DataSetInfo& theData,
const TString& theOption,
TDirectory* theTargetDir ) :
MethodBase( jobName, Types::kRuleFit, methodTitle, theData, theOption, theTargetDir )
, fSignalFraction(0)
, fNTImportance(0)
, fNTCoefficient(0)
, fNTSupport(0)
, fNTNcuts(0)
, fNTNvars(0)
, fNTPtag(0)
, fNTPss(0)
, fNTPsb(0)
, fNTPbs(0)
, fNTPbb(0)
, fNTSSB(0)
, fNTType(0)
, fUseRuleFitJF(kFALSE)
, fRFNrules(0)
, fRFNendnodes(0)
, fNTrees(0)
, fTreeEveFrac(0)
, fMinFracNEve(0)
, fMaxFracNEve(0)
, fNCuts(0)
, fPruneMethod(TMVA::DecisionTree::kCostComplexityPruning)
, fPruneStrength(0)
, fUseBoost(kFALSE)
, fGDPathEveFrac(0)
, fGDValidEveFrac(0)
, fGDTau(0)
, fGDTauPrec(0)
, fGDTauMin(0)
, fGDTauMax(0)
, fGDTauScan(0)
, fGDPathStep(0)
, fGDNPathSteps(0)
, fGDErrScale(0)
, fMinimp(0)
, fRuleMinDist(0)
, fLinQuantile(0)
{
}
TMVA::MethodRuleFit::MethodRuleFit( DataSetInfo& theData,
const TString& theWeightFile,
TDirectory* theTargetDir ) :
MethodBase( Types::kRuleFit, theData, theWeightFile, theTargetDir )
, fSignalFraction(0)
, fNTImportance(0)
, fNTCoefficient(0)
, fNTSupport(0)
, fNTNcuts(0)
, fNTNvars(0)
, fNTPtag(0)
, fNTPss(0)
, fNTPsb(0)
, fNTPbs(0)
, fNTPbb(0)
, fNTSSB(0)
, fNTType(0)
, fUseRuleFitJF(kFALSE)
, fRFNrules(0)
, fRFNendnodes(0)
, fNTrees(0)
, fTreeEveFrac(0)
, fMinFracNEve(0)
, fMaxFracNEve(0)
, fNCuts(0)
, fPruneMethod(TMVA::DecisionTree::kCostComplexityPruning)
, fPruneStrength(0)
, fUseBoost(kFALSE)
, fGDPathEveFrac(0)
, fGDValidEveFrac(0)
, fGDTau(0)
, fGDTauPrec(0)
, fGDTauMin(0)
, fGDTauMax(0)
, fGDTauScan(0)
, fGDPathStep(0)
, fGDNPathSteps(0)
, fGDErrScale(0)
, fMinimp(0)
, fRuleMinDist(0)
, fLinQuantile(0)
{
}
TMVA::MethodRuleFit::~MethodRuleFit( void )
{
for (UInt_t i=0; i<fEventSample.size(); i++) delete fEventSample[i];
for (UInt_t i=0; i<fForest.size(); i++) delete fForest[i];
}
Bool_t TMVA::MethodRuleFit::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t )
{
if (type == Types::kClassification && numberClasses == 2) return kTRUE;
return kFALSE;
}
void TMVA::MethodRuleFit::DeclareOptions()
{
DeclareOptionRef(fGDTau=-1, "GDTau", "Gradient-directed (GD) path: default fit cut-off");
DeclareOptionRef(fGDTauPrec=0.01, "GDTauPrec", "GD path: precision of tau");
DeclareOptionRef(fGDPathStep=0.01, "GDStep", "GD path: step size");
DeclareOptionRef(fGDNPathSteps=10000, "GDNSteps", "GD path: number of steps");
DeclareOptionRef(fGDErrScale=1.1, "GDErrScale", "Stop scan when error > scale*errmin");
DeclareOptionRef(fLinQuantile, "LinQuantile", "Quantile of linear terms (removes outliers)");
DeclareOptionRef(fGDPathEveFrac=0.5, "GDPathEveFrac", "Fraction of events used for the path search");
DeclareOptionRef(fGDValidEveFrac=0.5, "GDValidEveFrac", "Fraction of events used for the validation");
DeclareOptionRef(fMinFracNEve=0.1, "fEventsMin", "Minimum fraction of events in a splittable node");
DeclareOptionRef(fMaxFracNEve=0.9, "fEventsMax", "Maximum fraction of events in a splittable node");
DeclareOptionRef(fNTrees=20, "nTrees", "Number of trees in forest.");
DeclareOptionRef(fForestTypeS="AdaBoost", "ForestType", "Method to use for forest generation (AdaBoost or RandomForest)");
AddPreDefVal(TString("AdaBoost"));
AddPreDefVal(TString("Random"));
DeclareOptionRef(fRuleMinDist=0.001, "RuleMinDist", "Minimum distance between rules");
DeclareOptionRef(fMinimp=0.01, "MinImp", "Minimum rule importance accepted");
DeclareOptionRef(fModelTypeS="ModRuleLinear", "Model", "Model to be used");
AddPreDefVal(TString("ModRule"));
AddPreDefVal(TString("ModRuleLinear"));
AddPreDefVal(TString("ModLinear"));
DeclareOptionRef(fRuleFitModuleS="RFTMVA", "RuleFitModule","Which RuleFit module to use");
AddPreDefVal(TString("RFTMVA"));
AddPreDefVal(TString("RFFriedman"));
DeclareOptionRef(fRFWorkDir="./rulefit", "RFWorkDir", "Friedman\'s RuleFit module (RFF): working dir");
DeclareOptionRef(fRFNrules=2000, "RFNrules", "RFF: Mximum number of rules");
DeclareOptionRef(fRFNendnodes=4, "RFNendnodes", "RFF: Average number of end nodes");
}
void TMVA::MethodRuleFit::ProcessOptions()
{
if (IgnoreEventsWithNegWeightsInTraining()) {
Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
<< GetMethodTypeName()
<< " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
<< Endl;
}
fRuleFitModuleS.ToLower();
if (fRuleFitModuleS == "rftmva") fUseRuleFitJF = kFALSE;
else if (fRuleFitModuleS == "rffriedman") fUseRuleFitJF = kTRUE;
else fUseRuleFitJF = kTRUE;
fSepTypeS.ToLower();
if (fSepTypeS == "misclassificationerror") fSepType = new MisClassificationError();
else if (fSepTypeS == "giniindex") fSepType = new GiniIndex();
else if (fSepTypeS == "crossentropy") fSepType = new CrossEntropy();
else fSepType = new SdivSqrtSplusB();
fModelTypeS.ToLower();
if (fModelTypeS == "modlinear" ) fRuleFit.SetModelLinear();
else if (fModelTypeS == "modrule" ) fRuleFit.SetModelRules();
else fRuleFit.SetModelFull();
fPruneMethodS.ToLower();
if (fPruneMethodS == "expectederror" ) fPruneMethod = DecisionTree::kExpectedErrorPruning;
else if (fPruneMethodS == "costcomplexity" ) fPruneMethod = DecisionTree::kCostComplexityPruning;
else fPruneMethod = DecisionTree::kNoPruning;
fForestTypeS.ToLower();
if (fForestTypeS == "random" ) fUseBoost = kFALSE;
else if (fForestTypeS == "adaboost" ) fUseBoost = kTRUE;
else fUseBoost = kTRUE;
if (fUseBoost && (!fUseRuleFitJF)) fTreeEveFrac = 1.0;
if (fTreeEveFrac<=0) {
Int_t nevents = Data()->GetNTrainingEvents();
Double_t n = static_cast<Double_t>(nevents);
fTreeEveFrac = min( 0.5, (100.0 +6.0*sqrt(n))/n);
}
VerifyRange(Log(), "nTrees", fNTrees,0,100000,20);
VerifyRange(Log(), "MinImp", fMinimp,0.0,1.0,0.0);
VerifyRange(Log(), "GDTauPrec", fGDTauPrec,1e-5,5e-1);
VerifyRange(Log(), "GDTauMin", fGDTauMin,0.0,1.0);
VerifyRange(Log(), "GDTauMax", fGDTauMax,fGDTauMin,1.0);
VerifyRange(Log(), "GDPathStep", fGDPathStep,0.0,100.0,0.01);
VerifyRange(Log(), "GDErrScale", fGDErrScale,1.0,100.0,1.1);
VerifyRange(Log(), "GDPathEveFrac", fGDPathEveFrac,0.01,0.9,0.5);
VerifyRange(Log(), "GDValidEveFrac",fGDValidEveFrac,0.01,1.0-fGDPathEveFrac,1.0-fGDPathEveFrac);
VerifyRange(Log(), "fEventsMin", fMinFracNEve,0.0,1.0);
VerifyRange(Log(), "fEventsMax", fMaxFracNEve,fMinFracNEve,1.0);
fRuleFit.GetRuleEnsemblePtr()->SetLinQuantile(fLinQuantile);
fRuleFit.GetRuleFitParamsPtr()->SetGDTauRange(fGDTauMin,fGDTauMax);
fRuleFit.GetRuleFitParamsPtr()->SetGDTau(fGDTau);
fRuleFit.GetRuleFitParamsPtr()->SetGDTauPrec(fGDTauPrec);
fRuleFit.GetRuleFitParamsPtr()->SetGDTauScan(fGDTauScan);
fRuleFit.GetRuleFitParamsPtr()->SetGDPathStep(fGDPathStep);
fRuleFit.GetRuleFitParamsPtr()->SetGDNPathSteps(fGDNPathSteps);
fRuleFit.GetRuleFitParamsPtr()->SetGDErrScale(fGDErrScale);
fRuleFit.SetImportanceCut(fMinimp);
fRuleFit.SetRuleMinDist(fRuleMinDist);
if (fUseRuleFitJF) {
Log() << kINFO << "" << Endl;
Log() << kINFO << "--------------------------------------" <<Endl;
Log() << kINFO << "Friedmans RuleFit module is selected." << Endl;
Log() << kINFO << "Only the following options are used:" << Endl;
Log() << kINFO << Endl;
Log() << kINFO << gTools().Color("bold") << " Model" << gTools().Color("reset") << Endl;
Log() << kINFO << gTools().Color("bold") << " RFWorkDir" << gTools().Color("reset") << Endl;
Log() << kINFO << gTools().Color("bold") << " RFNrules" << gTools().Color("reset") << Endl;
Log() << kINFO << gTools().Color("bold") << " RFNendnodes" << gTools().Color("reset") << Endl;
Log() << kINFO << gTools().Color("bold") << " GDNPathSteps" << gTools().Color("reset") << Endl;
Log() << kINFO << gTools().Color("bold") << " GDPathStep" << gTools().Color("reset") << Endl;
Log() << kINFO << gTools().Color("bold") << " GDErrScale" << gTools().Color("reset") << Endl;
Log() << kINFO << "--------------------------------------" <<Endl;
Log() << kINFO << Endl;
}
fRuleFit.UseImportanceVisHists();
fRuleFit.SetMsgType( Log().GetMinType() );
if (HasTrainingTree()) InitEventSample();
}
void TMVA::MethodRuleFit::InitMonitorNtuple()
{
BaseDir()->cd();
fMonitorNtuple= new TTree("MonitorNtuple_RuleFit","RuleFit variables");
fMonitorNtuple->Branch("importance",&fNTImportance,"importance/D");
fMonitorNtuple->Branch("support",&fNTSupport,"support/D");
fMonitorNtuple->Branch("coefficient",&fNTCoefficient,"coefficient/D");
fMonitorNtuple->Branch("ncuts",&fNTNcuts,"ncuts/I");
fMonitorNtuple->Branch("nvars",&fNTNvars,"nvars/I");
fMonitorNtuple->Branch("type",&fNTType,"type/I");
fMonitorNtuple->Branch("ptag",&fNTPtag,"ptag/D");
fMonitorNtuple->Branch("pss",&fNTPss,"pss/D");
fMonitorNtuple->Branch("psb",&fNTPsb,"psb/D");
fMonitorNtuple->Branch("pbs",&fNTPbs,"pbs/D");
fMonitorNtuple->Branch("pbb",&fNTPbb,"pbb/D");
fMonitorNtuple->Branch("soversb",&fNTSSB,"soversb/D");
}
void TMVA::MethodRuleFit::Init()
{
SetSignalReferenceCut( 0.0 );
fLinQuantile = 0.025;
fTreeEveFrac = -1.0;
fNCuts = 20;
fSepTypeS = "GiniIndex";
fPruneMethodS = "NONE";
fPruneStrength = 3.5;
fGDTauMin = 0.0;
fGDTauMax = 1.0;
fGDTauScan = 1000;
}
void TMVA::MethodRuleFit::InitEventSample( void )
{
if (Data()->GetNEvents()==0) Log() << kFATAL << "<Init> Data().TrainingTree() is zero pointer" << Endl;
Int_t nevents = Data()->GetNEvents();
for (Int_t ievt=0; ievt<nevents; ievt++){
const Event * ev = GetEvent(ievt);
fEventSample.push_back( new Event(*ev));
}
if (fTreeEveFrac<=0) {
Double_t n = static_cast<Double_t>(nevents);
fTreeEveFrac = min( 0.5, (100.0 +6.0*sqrt(n))/n);
}
if (fTreeEveFrac>1.0) fTreeEveFrac=1.0;
std::random_shuffle(fEventSample.begin(), fEventSample.end());
Log() << kDEBUG << "Set sub-sample fraction to " << fTreeEveFrac << Endl;
}
void TMVA::MethodRuleFit::Train( void )
{
TMVA::DecisionTreeNode::fgIsTraining=true;
InitMonitorNtuple();
this->InitEventSample();
if (fUseRuleFitJF) {
TrainJFRuleFit();
}
else {
TrainTMVARuleFit();
}
fRuleFit.GetRuleEnsemblePtr()->ClearRuleMap();
TMVA::DecisionTreeNode::fgIsTraining=false;
}
void TMVA::MethodRuleFit::TrainTMVARuleFit( void )
{
if (IsNormalised()) Log() << kFATAL << "\"Normalise\" option cannot be used with RuleFit; "
<< "please remove the optoin from the configuration string, or "
<< "use \"!Normalise\""
<< Endl;
Timer timer( 1, GetName() );
fRuleFit.Initialize( this );
Log() << kDEBUG << "Fitting rule coefficients ..." << Endl;
fRuleFit.FitCoefficients();
Log() << kDEBUG << "Computing rule and variable importance" << Endl;
fRuleFit.CalcImportance();
fRuleFit.GetRuleEnsemblePtr()->Print();
Log() << kDEBUG << "Filling rule ntuple" << Endl;
UInt_t nrules = fRuleFit.GetRuleEnsemble().GetRulesConst().size();
const Rule *rule;
for (UInt_t i=0; i<nrules; i++ ) {
rule = fRuleFit.GetRuleEnsemble().GetRulesConst(i);
fNTImportance = rule->GetRelImportance();
fNTSupport = rule->GetSupport();
fNTCoefficient = rule->GetCoefficient();
fNTType = (rule->IsSignalRule() ? 1:-1 );
fNTNvars = rule->GetRuleCut()->GetNvars();
fNTNcuts = rule->GetRuleCut()->GetNcuts();
fNTPtag = fRuleFit.GetRuleEnsemble().GetRulePTag(i);
fNTPss = fRuleFit.GetRuleEnsemble().GetRulePSS(i);
fNTPsb = fRuleFit.GetRuleEnsemble().GetRulePSB(i);
fNTPbs = fRuleFit.GetRuleEnsemble().GetRulePBS(i);
fNTPbb = fRuleFit.GetRuleEnsemble().GetRulePBB(i);
fNTSSB = rule->GetSSB();
fMonitorNtuple->Fill();
}
Log() << kDEBUG << "Training done" << Endl;
fRuleFit.MakeVisHists();
fRuleFit.MakeDebugHists();
}
void TMVA::MethodRuleFit::TrainJFRuleFit( void )
{
fRuleFit.InitPtrs( this );
fRuleFit.SetTrainingEvents( GetTrainingEvents() );
RuleFitAPI *rfAPI = new RuleFitAPI( this, &fRuleFit, Log().GetMinType() );
rfAPI->WelcomeMessage();
Timer timer( 1, GetName() );
Log() << kINFO << "Training ..." << Endl;
rfAPI->TrainRuleFit();
Log() << kDEBUG << "reading model summary from rf_go.exe output" << Endl;
rfAPI->ReadModelSum();
Log() << kDEBUG << "calculating rule and variable importance" << Endl;
fRuleFit.CalcImportance();
fRuleFit.GetRuleEnsemblePtr()->Print();
fRuleFit.MakeVisHists();
delete rfAPI;
Log() << kDEBUG << "done training" << Endl;
}
const TMVA::Ranking* TMVA::MethodRuleFit::CreateRanking()
{
fRanking = new Ranking( GetName(), "Importance" );
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
fRanking->AddRank( Rank( GetInputLabel(ivar), fRuleFit.GetRuleEnsemble().GetVarImportance(ivar) ) );
}
return fRanking;
}
void TMVA::MethodRuleFit::AddWeightsXMLTo( void* parent ) const
{
fRuleFit.GetRuleEnsemble().AddXMLTo( parent );
}
void TMVA::MethodRuleFit::ReadWeightsFromStream( istream & istr )
{
fRuleFit.GetRuleEnsemblePtr()->ReadRaw( istr );
}
void TMVA::MethodRuleFit::ReadWeightsFromXML( void* wghtnode )
{
fRuleFit.GetRuleEnsemblePtr()->ReadFromXML( wghtnode );
}
Double_t TMVA::MethodRuleFit::GetMvaValue( Double_t* err, Double_t* errUpper )
{
NoErrorCalc(err, errUpper);
return fRuleFit.EvalEvent( *GetEvent() );
}
void TMVA::MethodRuleFit::WriteMonitoringHistosToFile( void ) const
{
BaseDir()->cd();
Log() << kINFO << "Write monitoring ntuple to file: " << BaseDir()->GetPath() << Endl;
fMonitorNtuple->Write();
}
void TMVA::MethodRuleFit::MakeClassSpecific( std::ostream& fout, const TString& className ) const
{
Int_t dp = fout.precision();
fout << " // not implemented for class: \"" << className << "\"" << std::endl;
fout << "};" << std::endl;
fout << "void " << className << "::Initialize(){}" << std::endl;
fout << "void " << className << "::Clear(){}" << std::endl;
fout << "double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const {" << std::endl;
fout << " double rval=" << setprecision(10) << fRuleFit.GetRuleEnsemble().GetOffset() << ";" << std::endl;
MakeClassRuleCuts(fout);
MakeClassLinear(fout);
fout << " return rval;" << std::endl;
fout << "}" << std::endl;
fout << std::setprecision(dp);
}
void TMVA::MethodRuleFit::MakeClassRuleCuts( std::ostream& fout ) const
{
Int_t dp = fout.precision();
if (!fRuleFit.GetRuleEnsemble().DoRules()) {
fout << " //" << std::endl;
fout << " // ==> MODEL CONTAINS NO RULES <==" << std::endl;
fout << " //" << std::endl;
return;
}
const RuleEnsemble *rens = &(fRuleFit.GetRuleEnsemble());
const std::vector< Rule* > *rules = &(rens->GetRulesConst());
const RuleCut *ruleCut;
std::list< std::pair<Double_t,Int_t> > sortedRules;
for (UInt_t ir=0; ir<rules->size(); ir++) {
sortedRules.push_back( std::pair<Double_t,Int_t>( (*rules)[ir]->GetImportance()/rens->GetImportanceRef(),ir ) );
}
sortedRules.sort();
fout << " //" << std::endl;
fout << " // here follows all rules ordered in importance (most important first)" << std::endl;
fout << " // at the end of each line, the relative importance of the rule is given" << std::endl;
fout << " //" << std::endl;
for ( std::list< std::pair<double,int> >::reverse_iterator itpair = sortedRules.rbegin();
itpair != sortedRules.rend(); itpair++ ) {
UInt_t ir = itpair->second;
Double_t impr = itpair->first;
ruleCut = (*rules)[ir]->GetRuleCut();
if (impr<rens->GetImportanceCut()) fout << " //" << std::endl;
fout << " if (" << std::flush;
for (UInt_t ic=0; ic<ruleCut->GetNvars(); ic++) {
Double_t sel = ruleCut->GetSelector(ic);
Double_t valmin = ruleCut->GetCutMin(ic);
Double_t valmax = ruleCut->GetCutMax(ic);
Bool_t domin = ruleCut->GetCutDoMin(ic);
Bool_t domax = ruleCut->GetCutDoMax(ic);
if (ic>0) fout << "&&" << std::flush;
if (domin) {
fout << "(" << setprecision(10) << valmin << std::flush;
fout << "<inputValues[" << sel << "])" << std::flush;
}
if (domax) {
if (domin) fout << "&&" << std::flush;
fout << "(inputValues[" << sel << "]" << std::flush;
fout << "<" << setprecision(10) << valmax << ")" <<std::flush;
}
}
fout << ") rval+=" << setprecision(10) << (*rules)[ir]->GetCoefficient() << ";" << std::flush;
fout << " // importance = " << Form("%3.3f",impr) << std::endl;
}
fout << std::setprecision(dp);
}
void TMVA::MethodRuleFit::MakeClassLinear( std::ostream& fout ) const
{
if (!fRuleFit.GetRuleEnsemble().DoLinear()) {
fout << " //" << std::endl;
fout << " // ==> MODEL CONTAINS NO LINEAR TERMS <==" << std::endl;
fout << " //" << std::endl;
return;
}
fout << " //" << std::endl;
fout << " // here follows all linear terms" << std::endl;
fout << " // at the end of each line, the relative importance of the term is given" << std::endl;
fout << " //" << std::endl;
const RuleEnsemble *rens = &(fRuleFit.GetRuleEnsemble());
UInt_t nlin = rens->GetNLinear();
for (UInt_t il=0; il<nlin; il++) {
if (rens->IsLinTermOK(il)) {
Double_t norm = rens->GetLinNorm(il);
Double_t imp = rens->GetLinImportance(il)/rens->GetImportanceRef();
fout << " rval+="
<< setprecision(10) << rens->GetLinCoefficients(il)*norm
<< "*std::min( double(" << setprecision(10) << rens->GetLinDP(il)
<< "), std::max( double(inputValues[" << il << "]), double(" << setprecision(10) << rens->GetLinDM(il) << ")));"
<< std::flush;
fout << " // importance = " << Form("%3.3f",imp) << std::endl;
}
}
}
void TMVA::MethodRuleFit::GetHelpMessage() const
{
TString col = gConfig().WriteOptionsReference() ? "" : gTools().Color("bold");
TString colres = gConfig().WriteOptionsReference() ? "" : gTools().Color("reset");
TString brk = gConfig().WriteOptionsReference() ? "<br>" : "";
Log() << Endl;
Log() << col << "--- Short description:" << colres << Endl;
Log() << Endl;
Log() << "This method uses a collection of so called rules to create a" << Endl;
Log() << "discriminating scoring function. Each rule consists of a series" << Endl;
Log() << "of cuts in parameter space. The ensemble of rules are created" << Endl;
Log() << "from a forest of decision trees, trained using the training data." << Endl;
Log() << "Each node (apart from the root) corresponds to one rule." << Endl;
Log() << "The scoring function is then obtained by linearly combining" << Endl;
Log() << "the rules. A fitting procedure is applied to find the optimum" << Endl;
Log() << "set of coefficients. The goal is to find a model with few rules" << Endl;
Log() << "but with a strong discriminating power." << Endl;
Log() << Endl;
Log() << col << "--- Performance optimisation:" << colres << Endl;
Log() << Endl;
Log() << "There are two important considerations to make when optimising:" << Endl;
Log() << Endl;
Log() << " 1. Topology of the decision tree forest" << brk << Endl;
Log() << " 2. Fitting of the coefficients" << Endl;
Log() << Endl;
Log() << "The maximum complexity of the rules is defined by the size of" << Endl;
Log() << "the trees. Large trees will yield many complex rules and capture" << Endl;
Log() << "higher order correlations. On the other hand, small trees will" << Endl;
Log() << "lead to a smaller ensemble with simple rules, only capable of" << Endl;
Log() << "modeling simple structures." << Endl;
Log() << "Several parameters exists for controlling the complexity of the" << Endl;
Log() << "rule ensemble." << Endl;
Log() << Endl;
Log() << "The fitting procedure searches for a minimum using a gradient" << Endl;
Log() << "directed path. Apart from step size and number of steps, the" << Endl;
Log() << "evolution of the path is defined by a cut-off parameter, tau." << Endl;
Log() << "This parameter is unknown and depends on the training data." << Endl;
Log() << "A large value will tend to give large weights to a few rules." << Endl;
Log() << "Similarily, a small value will lead to a large set of rules" << Endl;
Log() << "with similar weights." << Endl;
Log() << Endl;
Log() << "A final point is the model used; rules and/or linear terms." << Endl;
Log() << "For a given training sample, the result may improve by adding" << Endl;
Log() << "linear terms. If best performance is optained using only linear" << Endl;
Log() << "terms, it is very likely that the Fisher discriminant would be" << Endl;
Log() << "a better choice. Ideally the fitting procedure should be able to" << Endl;
Log() << "make this choice by giving appropriate weights for either terms." << Endl;
Log() << Endl;
Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
Log() << Endl;
Log() << "I. TUNING OF RULE ENSEMBLE:" << Endl;
Log() << Endl;
Log() << " " << col << "ForestType " << colres
<< ": Recomended is to use the default \"AdaBoost\"." << brk << Endl;
Log() << " " << col << "nTrees " << colres
<< ": More trees leads to more rules but also slow" << Endl;
Log() << " performance. With too few trees the risk is" << Endl;
Log() << " that the rule ensemble becomes too simple." << brk << Endl;
Log() << " " << col << "fEventsMin " << colres << brk << Endl;
Log() << " " << col << "fEventsMax " << colres
<< ": With a lower min, more large trees will be generated" << Endl;
Log() << " leading to more complex rules." << Endl;
Log() << " With a higher max, more small trees will be" << Endl;
Log() << " generated leading to more simple rules." << Endl;
Log() << " By changing this range, the average complexity" << Endl;
Log() << " of the rule ensemble can be controlled." << brk << Endl;
Log() << " " << col << "RuleMinDist " << colres
<< ": By increasing the minimum distance between" << Endl;
Log() << " rules, fewer and more diverse rules will remain." << Endl;
Log() << " Initially it is a good idea to keep this small" << Endl;
Log() << " or zero and let the fitting do the selection of" << Endl;
Log() << " rules. In order to reduce the ensemble size," << Endl;
Log() << " the value can then be increased." << Endl;
Log() << Endl;
Log() << "II. TUNING OF THE FITTING:" << Endl;
Log() << Endl;
Log() << " " << col << "GDPathEveFrac " << colres
<< ": fraction of events in path evaluation" << Endl;
Log() << " Increasing this fraction will improve the path" << Endl;
Log() << " finding. However, a too high value will give few" << Endl;
Log() << " unique events available for error estimation." << Endl;
Log() << " It is recomended to usethe default = 0.5." << brk << Endl;
Log() << " " << col << "GDTau " << colres
<< ": cutoff parameter tau" << Endl;
Log() << " By default this value is set to -1.0." << Endl;
Log() << " This means that the cut off parameter is" << Endl;
Log() << " automatically estimated. In most cases" << Endl;
Log() << " this should be fine. However, you may want" << Endl;
Log() << " to fix this value if you already know it" << Endl;
Log() << " and want to reduce on training time." << brk << Endl;
Log() << " " << col << "GDTauPrec " << colres
<< ": precision of estimated tau" << Endl;
Log() << " Increase this precision to find a more" << Endl;
Log() << " optimum cut-off parameter." << brk << Endl;
Log() << " " << col << "GDNStep " << colres
<< ": number of steps in path search" << Endl;
Log() << " If the number of steps is too small, then" << Endl;
Log() << " the program will give a warning message." << Endl;
Log() << Endl;
Log() << "III. WARNING MESSAGES" << Endl;
Log() << Endl;
Log() << col << "Risk(i+1)>=Risk(i) in path" << colres << brk << Endl;
Log() << col << "Chaotic behaviour of risk evolution." << colres << Endl;
Log() << " The error rate was still decreasing at the end" << Endl;
Log() << " By construction the Risk should always decrease." << Endl;
Log() << " However, if the training sample is too small or" << Endl;
Log() << " the model is overtrained, such warnings can" << Endl;
Log() << " occur." << Endl;
Log() << " The warnings can safely be ignored if only a" << Endl;
Log() << " few (<3) occur. If more warnings are generated," << Endl;
Log() << " the fitting fails." << Endl;
Log() << " A remedy may be to increase the value" << brk << Endl;
Log() << " "
<< col << "GDValidEveFrac" << colres
<< " to 1.0 (or a larger value)." << brk << Endl;
Log() << " In addition, if "
<< col << "GDPathEveFrac" << colres
<< " is too high" << Endl;
Log() << " the same warnings may occur since the events" << Endl;
Log() << " used for error estimation are also used for" << Endl;
Log() << " path estimation." << Endl;
Log() << " Another possibility is to modify the model - " << Endl;
Log() << " See above on tuning the rule ensemble." << Endl;
Log() << Endl;
Log() << col << "The error rate was still decreasing at the end of the path"
<< colres << Endl;
Log() << " Too few steps in path! Increase "
<< col << "GDNSteps" << colres << "." << Endl;
Log() << Endl;
Log() << col << "Reached minimum early in the search" << colres << Endl;
Log() << " Minimum was found early in the fitting. This" << Endl;
Log() << " may indicate that the used step size "
<< col << "GDStep" << colres << "." << Endl;
Log() << " was too large. Reduce it and rerun." << Endl;
Log() << " If the results still are not OK, modify the" << Endl;
Log() << " model either by modifying the rule ensemble" << Endl;
Log() << " or add/remove linear terms" << Endl;
}