96 return std::abs(
x-
y) < std::numeric_limits<float>::epsilon() * std::abs(
x+
y) *
ulp
98 || std::abs(
x-
y) < std::numeric_limits<float>::min();
104 return std::abs(
x-
y) < std::numeric_limits<double>::epsilon() * std::abs(
x+
y) *
ulp
106 || std::abs(
x-
y) < std::numeric_limits<double>::min();
119 fMinLinCorrForFisher (1),
120 fUseExclusiveVars (
kTRUE),
128 fPruneMethod (kNoPruning),
129 fNNodesBeforePruning(0),
130 fNodePurityLimit(0.5),
138 fAnalysisType (
Types::kClassification),
156 fMinLinCorrForFisher (1),
157 fUseExclusiveVars (
kTRUE),
165 fPruneMethod (kNoPruning),
166 fNNodesBeforePruning(0),
175 fAnalysisType (
Types::kClassification),
185 Log() << kWARNING <<
" You had chosen the training mode using optimal cuts, not\n"
186 <<
" based on a grid of " <<
fNCuts <<
" by setting the option NCuts < 0\n"
187 <<
" as this doesn't exist yet, I set it to " <<
fNCuts <<
" and use the grid"
203 fUseFisherCuts (
d.fUseFisherCuts),
204 fMinLinCorrForFisher (
d.fMinLinCorrForFisher),
205 fUseExclusiveVars (
d.fUseExclusiveVars),
206 fSepType (
d.fSepType),
207 fRegType (
d.fRegType),
208 fMinSize (
d.fMinSize),
209 fMinNodeSize(
d.fMinNodeSize),
210 fMinSepGain (
d.fMinSepGain),
211 fUseSearchTree (
d.fUseSearchTree),
212 fPruneStrength (
d.fPruneStrength),
213 fPruneMethod (
d.fPruneMethod),
214 fNodePurityLimit(
d.fNodePurityLimit),
215 fRandomisedTree (
d.fRandomisedTree),
216 fUseNvars (
d.fUseNvars),
217 fUsePoissonNvars(
d.fUsePoissonNvars),
219 fMaxDepth (
d.fMaxDepth),
220 fSigClass (
d.fSigClass),
222 fAnalysisType(
d.fAnalysisType),
223 fDataSetInfo (
d.fDataSetInfo)
239 if (fMyTrandom)
delete fMyTrandom;
240 if (fRegType)
delete fRegType;
252 Log() << kFATAL <<
"SetParentTreeNodes: started with undefined ROOT node" <<
Endl;
257 if ((this->GetLeftDaughter(
n) ==
NULL) && (this->GetRightDaughter(
n) !=
NULL) ) {
258 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
260 }
else if ((this->GetLeftDaughter(
n) !=
NULL) && (this->GetRightDaughter(
n) ==
NULL) ) {
261 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
265 if (this->GetLeftDaughter(
n) !=
NULL) {
266 this->SetParentTreeInNodes( this->GetLeftDaughter(
n) );
268 if (this->GetRightDaughter(
n) !=
NULL) {
269 this->SetParentTreeInNodes( this->GetRightDaughter(
n) );
272 n->SetParentTree(
this);
273 if (
n->GetDepth() >
this->GetTotalTreeDepth()) this->SetTotalTreeDepth(
n->GetDepth());
281 std::string
type(
"");
301 xmin = std::vector<Float_t>(nvars);
302 xmax = std::vector<Float_t>(nvars);
315 xmin = std::vector<Float_t>(nvars);
316 xmax = std::vector<Float_t>(nvars);
343 if(nvars !=
other.nvars)
345 std::cout <<
"!!! ERROR BuildNodeInfo1+BuildNodeInfo2 failure. Nvars1 != Nvars2." << std::endl;
355 ret.target2 = target2 +
other.target2;
358 for(
Int_t i=0; i<nvars; i++)
385 this->GetRoot()->SetPos(
's');
386 this->GetRoot()->SetDepth(0);
387 this->GetRoot()->SetParentTree(
this);
390 Log() << kDEBUG <<
"\tThe minimal node size MinNodeSize=" << fMinNodeSize <<
" fMinNodeSize="<<fMinNodeSize<<
"% is translated to an actual number of events = "<< fMinSize<<
" for the training sample size of " <<
eventSample.size() <<
Endl;
391 Log() << kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
392 Log() << kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
399 if (fNvars==0) fNvars =
eventSample[0]->GetNVariables();
400 fVariableImportance.resize(fNvars);
402 else Log() << kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
424 if (
evt->GetClass() == fSigClass) {
434 if ( DoRegression() ) {
463 Log() << kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. "
464 <<
"(Nsig="<<
nodeInfo.s<<
" Nbkg="<<
nodeInfo.b<<
" Probaby you use a Monte Carlo with negative weights. That should in principle "
465 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the "
466 <<
"minimal number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<fMinNodeSize
467 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough "
468 <<
"to allow for reasonable averaging!!!" <<
Endl
469 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events "
470 <<
"with negative weight in the training." <<
Endl;
475 Log() << kDEBUG <<
"Event "<< i<<
" has (original) weight: " <<
eventSample[i]->GetWeight()/
eventSample[i]->GetBoostWeight()
479 Log() << kDEBUG <<
" that gives in total: " <<
nBkg<<
Endl;
489 if (node == this->GetRoot()) {
528 if (DoRegression()) {
568 Log() << kERROR <<
"<TrainNode> all events went to the same branch" <<
Endl
569 <<
"--- Hence new node == old node ... check" <<
Endl
573 <<
"\n when cutting on variable " << node->
GetSelector()
575 << kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
602 if (DoRegression()) {
642 this->GetRoot()->SetPos(
's');
643 this->GetRoot()->SetDepth(0);
644 this->GetRoot()->SetParentTree(
this);
647 Log() << kDEBUG <<
"\tThe minimal node size MinNodeSize=" << fMinNodeSize <<
" fMinNodeSize="<<fMinNodeSize<<
"% is translated to an actual number of events = "<< fMinSize<<
" for the training sample size of " <<
eventSample.size() <<
Endl;
648 Log() << kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
649 Log() << kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
656 if (fNvars==0) fNvars =
eventSample[0]->GetNVariables();
657 fVariableImportance.resize(fNvars);
659 else Log() <<
kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
679 if (
evt->GetClass() == fSigClass) {
689 if ( DoRegression() ) {
706 Log() <<
kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. "
707 <<
"(Nsig="<<s<<
" Nbkg="<<
b<<
" Probaby you use a Monte Carlo with negative weights. That should in principle "
708 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the "
709 <<
"minimul number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<fMinNodeSize
710 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough "
711 <<
"to allow for reasonable averaging!!!" <<
Endl
712 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events "
713 <<
"with negative weight in the training." <<
Endl;
732 if (node == this->GetRoot()) {
759 && ( ( s!=0 &&
b !=0 && !DoRegression()) || ( (s+
b)!=0 && DoRegression()) ) ) {
770 if (DoRegression()) {
811 Log() <<
kERROR <<
"<TrainNode> all events went to the same branch" <<
Endl
812 <<
"--- Hence new node == old node ... check" <<
Endl
816 <<
"\n when cutting on variable " << node->
GetSelector()
818 <<
kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
845 if (DoRegression()) {
894 node = this->GetRoot();
900 if (event.
GetClass() == fSigClass) {
913 this->FillEvent(event, node->
GetRight());
915 this->FillEvent(event, node->
GetLeft());
924 if (this->GetRoot()!=
NULL) this->GetRoot()->ClearNodeAndAllDaughters();
939 node = this->GetRoot();
948 if (
l->GetNodeType() *
r->GetNodeType() > 0) {
950 this->PruneNode(node);
954 return this->CountNodes();
968 if( fPruneMethod == kNoPruning )
return 0.0;
970 if (fPruneMethod == kExpectedErrorPruning)
973 else if (fPruneMethod == kCostComplexityPruning)
978 Log() << kFATAL <<
"Selected pruning method not yet implemented "
982 if(!
tool)
return 0.0;
984 tool->SetPruneStrength(GetPruneStrength());
985 if(
tool->IsAutomatic()) {
987 Log() << kFATAL <<
"Cannot automate the pruning algorithm without an "
988 <<
"independent validation sample!" <<
Endl;
990 Log() << kFATAL <<
"Cannot automate the pruning algorithm with "
991 <<
"independent validation sample of ZERO events!" <<
Endl;
998 Log() << kFATAL <<
"Error pruning tree! Check prune.log for more information."
1007 for (
UInt_t i = 0; i <
info->PruneSequence.size(); ++i) {
1009 PruneNode(
info->PruneSequence[i]);
1030 GetRoot()->ResetValidationData();
1045 n = this->GetRoot();
1047 Log() << kFATAL <<
"TestPrunedTreeQuality: started with undefined ROOT node" <<
Endl;
1052 if(
n->GetLeft() !=
NULL &&
n->GetRight() !=
NULL && !
n->IsTerminal() ) {
1053 return (TestPrunedTreeQuality(
n->GetLeft(),
mode ) +
1054 TestPrunedTreeQuality(
n->GetRight(),
mode ));
1057 if (DoRegression()) {
1059 return n->GetSumTarget2() - 2*
n->GetSumTarget()*
n->GetResponse() +
sumw*
n->GetResponse()*
n->GetResponse();
1063 if (
n->GetPurity() >
this->GetNodePurityLimit())
1064 return n->GetNBValidation();
1066 return n->GetNSValidation();
1068 else if (
mode == 1 ) {
1070 return (
n->GetPurity() *
n->GetNBValidation() + (1.0 -
n->GetPurity()) *
n->GetNSValidation());
1073 throw std::string(
"Unknown ValidationQualityMode");
1087 if (current ==
NULL) {
1088 Log() << kFATAL <<
"CheckEventWithPrunedTree: started with undefined ROOT node" <<
Endl;
1091 while(current !=
NULL) {
1092 if(
e->GetClass() == fSigClass)
1097 if (
e->GetNTargets() > 0) {
1133 n = this->GetRoot();
1135 Log() << kFATAL <<
"CountLeafNodes: started with undefined ROOT node" <<
Endl;
1142 if ((this->GetLeftDaughter(
n) ==
NULL) && (this->GetRightDaughter(
n) ==
NULL) ) {
1146 if (this->GetLeftDaughter(
n) !=
NULL) {
1147 countLeafs += this->CountLeafNodes( this->GetLeftDaughter(
n) );
1149 if (this->GetRightDaughter(
n) !=
NULL) {
1150 countLeafs += this->CountLeafNodes( this->GetRightDaughter(
n) );
1162 n = this->GetRoot();
1164 Log() << kFATAL <<
"DescendTree: started with undefined ROOT node" <<
Endl;
1169 if ((this->GetLeftDaughter(
n) ==
NULL) && (this->GetRightDaughter(
n) ==
NULL) ) {
1172 else if ((this->GetLeftDaughter(
n) ==
NULL) && (this->GetRightDaughter(
n) !=
NULL) ) {
1173 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
1176 else if ((this->GetLeftDaughter(
n) !=
NULL) && (this->GetRightDaughter(
n) ==
NULL) ) {
1177 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
1181 if (this->GetLeftDaughter(
n) !=
NULL) {
1182 this->DescendTree( this->GetLeftDaughter(
n) );
1184 if (this->GetRightDaughter(
n) !=
NULL) {
1185 this->DescendTree( this->GetRightDaughter(
n) );
1204 this->DeleteNode(
l);
1205 this->DeleteNode(
r);
1217 if(node ==
NULL)
return;
1220 node->
SetAlpha( std::numeric_limits<double>::infinity( ) );
1232 Node* current = this->GetRoot();
1236 if ( tmp &
sequence) current = this->GetRightDaughter(current);
1237 else current = this->GetLeftDaughter(current);
1257 Double_t bla = fMyTrandom->Rndm()*fNvars;
1267 if (
nSelectedVars !=
useNvars) { std::cout <<
"Bug in TrainNode - GetRandisedVariables()... sorry" << std::endl; std::exit(1);}
1284 nSelS = std::vector< std::vector<Double_t> >(cNvars);
1285 nSelB = std::vector< std::vector<Double_t> >(cNvars);
1286 nSelS_unWeighted = std::vector< std::vector<Double_t> >(cNvars);
1287 nSelB_unWeighted = std::vector< std::vector<Double_t> >(cNvars);
1288 target = std::vector< std::vector<Double_t> >(cNvars);
1289 target2 = std::vector< std::vector<Double_t> >(cNvars);
1292 nSelS[
ivar] = std::vector<Double_t>(nBins[
ivar], 0);
1293 nSelB[
ivar] = std::vector<Double_t>(nBins[
ivar], 0);
1294 nSelS_unWeighted[
ivar] = std::vector<Double_t>(nBins[
ivar], 0);
1295 nSelB_unWeighted[
ivar] = std::vector<Double_t>(nBins[
ivar], 0);
1297 target2[
ivar] = std::vector<Double_t>(nBins[
ivar], 0);
1319 std::vector< std::vector<Double_t> >
nSelS;
1320 std::vector< std::vector<Double_t> >
nSelB;
1333 if(cNvars !=
other.cNvars)
1335 std::cout <<
"!!! ERROR TrainNodeInfo1+TrainNodeInfo2 failure. cNvars1 != cNvars2." << std::endl;
1352 ret.nTotS_unWeighted = nTotS_unWeighted +
other.nTotS_unWeighted;
1354 ret.nTotB_unWeighted = nTotB_unWeighted +
other.nTotB_unWeighted;
1400 if (fRandomisedTree) {
1415 if (fUseFisherCuts) {
1430 Log() << kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
1474 if (fUseFisherCuts &&
fisherOK) cNvars++;
1491 nBins[
ivar] = fNCuts+1;
1492 if (
ivar < fNvars) {
1493 if (fDataSetInfo->GetVariableInfo(
ivar).GetVarType() ==
'I') {
1559 if (
ivar < fNvars) {
1635 if (DoRegression()) {
1697 if (DoRegression()) {
1719 if (DoRegression()) {
1725 Log() << kFATAL <<
"Helge, you have a bug ....nodeInfo.nSelS_unw..+nodeInfo.nSelB_unw..= "
1727 <<
" while eventsample size = " <<
eventSample.size()
1733 Log() << kFATAL <<
"Helge, you have another bug ....nodeInfo.nSelS+nodeInfo.nSelB= "
1735 <<
" while total number of events = " <<
totalSum
1774 if ( ((
sl+
bl)>=fMinSize && (sr+
br)>=fMinSize)
1778 if (DoRegression()) {
1808 if (DoRegression()) {
1854 for (
UInt_t i=0; i<cNvars; i++) {
1909 Int_t nTotS_unWeighted, nTotB_unWeighted;
1921 if (fRandomisedTree) {
1936 if (fUseFisherCuts) {
1951 Log() <<
kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
1995 if (fUseFisherCuts &&
fisherOK) cNvars++;
2014 nBins[
ivar] = fNCuts+1;
2015 if (
ivar < fNvars) {
2016 if (fDataSetInfo->GetVariableInfo(
ivar).GetVarType() ==
'I') {
2093 if (
ivar < fNvars) {
2111 nTotS_unWeighted=0; nTotB_unWeighted=0;
2117 nTotS_unWeighted++; }
2148 if (DoRegression()) {
2164 if (DoRegression()) {
2170 Log() <<
kFATAL <<
"Helge, you have a bug ....nSelS_unw..+nSelB_unw..= "
2171 << nSelS_unWeighted[
ivar][nBins[
ivar]-1] +nSelB_unWeighted[
ivar][nBins[
ivar]-1]
2172 <<
" while eventsample size = " <<
eventSample.size()
2178 Log() <<
kFATAL <<
"Helge, you have another bug ....nSelS+nSelB= "
2180 <<
" while total number of events = " <<
totalSum
2215 if ( ((
sl+
bl)>=fMinSize && (sr+
br)>=fMinSize)
2219 if (DoRegression()) {
2247 if (DoRegression()) {
2302 for (
UInt_t i=0; i<cNvars; i++) {
2305 delete [] nSelS_unWeighted[i];
2306 delete [] nSelB_unWeighted[i];
2308 delete [] target2[i];
2313 delete [] nSelS_unWeighted;
2314 delete [] nSelB_unWeighted;
2430 if (
ev->GetClass() == fSigClass )
sum2Sig[k] += ( (
xval[
x] - (*meanMatx)(
x, 0))*(
xval[
y] - (*meanMatx)(
y, 0)) )*weight;
2431 else sum2Bgd[k] += ( (
xval[
x] - (*meanMatx)(
x, 1))*(
xval[
y] - (*meanMatx)(
y, 1)) )*weight;
2488 Log() << kWARNING <<
"FisherCoeff matrix is almost singular with determinant="
2490 <<
" did you use the variables that are linear combinations or highly correlated?"
2494 Log() << kFATAL <<
"FisherCoeff matrix is singular with determinant="
2496 <<
" did you use the variables that are linear combinations?"
2539 Int_t nTotS_unWeighted = 0, nTotB_unWeighted = 0;
2545 std::vector<Double_t>
lCutValue( fNvars, 0.0 );
2546 std::vector<Double_t>
lSepGain( fNvars, -1.0e6 );
2547 std::vector<Char_t>
lCutType( fNvars );
2553 if((*it)->GetClass() == fSigClass) {
2554 nTotS += (*it)->GetWeight();
2558 nTotB += (*it)->GetWeight();
2568 if (fRandomisedTree) {
2569 if (fUseNvars ==0 ) {
2575 Double_t bla = fMyTrandom->Rndm()*fNvars;
2597 for( ; it !=
it_end; ++it ) {
2598 if((**it)->GetClass() == fSigClass )
2615 if( *(*it) ==
NULL ) {
2616 Log() << kFATAL <<
"In TrainNodeFull(): have a null event! Where index="
2630 Double_t nSelS = it->GetCumulativeWeight(
true);
2631 Double_t nSelB = it->GetCumulativeWeight(
false);
2693 Log() << kFATAL <<
"CheckEvent: started with undefined ROOT node" <<
Endl;
2702 Log() << kFATAL <<
"DT::CheckEvent: inconsistent tree structure" <<
Endl;
2707 if (DoRegression()) {
2731 Log() << kFATAL <<
"<SamplePurity> sumtot != sumsig+sumbkg"
2748 for (
UInt_t i=0; i< fNvars; i++) {
2749 sum += fVariableImportance[i];
2753 for (
UInt_t i=0; i< fNvars; i++) {
2754 if (
sum > std::numeric_limits<double>::epsilon())
2770 Log() << kFATAL <<
"<GetVariableImportance>" <<
Endl
2771 <<
"--- ivar = " <<
ivar <<
" is out of range " <<
Endl;
bool almost_equal_double(double x, double y, int ulp=4)
bool almost_equal_float(float x, float y, int ulp=4)
int Int_t
Signed integer 4 bytes (int)
char Char_t
Character 1 byte (char)
unsigned long ULong_t
Unsigned long integer 4 bytes (unsigned long). Size depends on architecture.
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int)
float Float_t
Float 4 bytes (float)
double Double_t
Double 8 bytes.
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t target
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char mode
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
TMatrixT< Double_t > TMatrixD
const_iterator begin() const
const_iterator end() const
static void SetVarIndex(Int_t iVar)
Base class for BinarySearch and Decision Trees.
UInt_t fNNodes
total number of nodes in the tree (counted)
static Config & Instance()
static function: returns TMVA instance
Class that contains all the data information.
void SetNEvents_unweighted(Float_t nev)
set the number of unweighted events that entered the node (during training), if traininfo defined
void SetNodeType(Int_t t)
set node type: 1 signal node, -1 bkg leave, 0 intermediate Node
void SetSeparationGain(Float_t sep)
set the separation, or information gained BY this node's selection, if traininfo defined
void SetNBkgEvents(Float_t b)
set the sum of the backgr weights in the node, if traininfo defined
void SetCutType(Bool_t t)
set true: if event variable > cutValue ==> signal , false otherwise
Double_t GetNSValidation() const
return number of signal events from the pruning validation sample, or -1 if traininfo undefined
void IncrementNEvents_unweighted()
increment the number of events that entered the node (during training), if traininfo defined
void SetFisherCoeff(Int_t ivar, Double_t coeff)
set fisher coefficients
void SetNSigEvents_unboosted(Float_t s)
set the sum of the unboosted signal events in the node, if traininfo defined
void SetAlphaMinSubtree(Double_t g)
set the minimum alpha in the tree rooted at this node, if traininfo defined
void IncrementNBkgEvents(Float_t b)
increment the sum of the backgr weights in the node, if traininfo defined
void SetNEvents_unboosted(Float_t nev)
set the number of unboosted events that entered the node (during training), if traininfo defined
Float_t GetNSigEvents(void) const
return the sum of the signal weights in the node, or -1 if traininfo undefined
DecisionTreeNode * GetLeft() const override
void SetTerminal(Bool_t s=kTRUE)
void SetLeft(Node *l) override
DecisionTreeNode * GetRight() const override
void SetResponse(Float_t r)
set the response of the node (for regression)
void SetSampleMax(UInt_t ivar, Float_t xmax)
set the maximum of variable ivar from the training sample that pass/end up in this node,...
void SetNBValidation(Double_t b)
set number of background events from the pruning validation sample, if traininfo defined
void IncrementNEvents(Float_t nev)
void SetPurity(void)
return the S/(S+B) (purity) for the node REM: even if nodes with purity 0.01 are very PURE background...
void SetSubTreeR(Double_t r)
set the resubstitution estimate, R(T_t), of the tree rooted at this node, if traininfo defined
void AddToSumTarget2(Float_t t2)
add to sum target 2, if traininfo defined
DecisionTreeNode * GetParent() const override
Double_t GetNodeR() const
return the node resubstitution estimate, R(t), for Cost Complexity pruning, or -1 if traininfo undefi...
void SetNFisherCoeff(Int_t nvars)
Short_t GetSelector() const
return index of variable used for discrimination at this node
void SetNSigEvents(Float_t s)
set the sum of the signal weights in the node, if traininfo defined
Float_t GetResponse(void) const
return the response of the node (for regression)
Float_t GetCutValue(void) const
return the cut value applied at this node
Bool_t GoesRight(const Event &) const override
test event if it descends the tree at this node to the right
Int_t GetNodeType(void) const
return node type: 1 signal node, -1 bkg leave, 0 intermediate Node
void IncrementNBkgEvents_unweighted()
increment the sum of the backgr weights in the node, if traininfo defined
void SetNSigEvents_unweighted(Float_t s)
set the sum of the unweighted signal events in the node, if traininfo defined
void SetRight(Node *r) override
Double_t GetNBValidation() const
return number of background events from the pruning validation sample, or -1 if traininfo undefined
void SetAlpha(Double_t alpha)
set the critical point alpha, if traininfo defined
void SetSeparationIndex(Float_t sep)
set the chosen index, measure of "purity" (separation between S and B) AT this node,...
void SetRMS(Float_t r)
set the RMS of the response of the node (for regression)
void IncrementNSigEvents_unweighted()
increment the sum of the signal weights in the node, if traininfo defined
void SetNBkgEvents_unboosted(Float_t b)
set the sum of the unboosted backgr events in the node, if traininfo defined
Float_t GetPurity(void) const
return S/(S+B) (purity) at this node (from training)
void IncrementNSigEvents(Float_t s)
increment the sum of the signal weights in the node, if traininfo defined
Float_t GetSampleMax(UInt_t ivar) const
return the maximum of variable ivar from the training sample that pass/end up in this node,...
void SetCutValue(Float_t c)
set the cut value applied at this node
Float_t GetNBkgEvents(void) const
return the sum of the backgr weights in the node, or -1 if traininfo undefined
Float_t GetSampleMin(UInt_t ivar) const
return the minimum of variable ivar from the training sample that pass/end up in this node,...
void SetSampleMin(UInt_t ivar, Float_t xmin)
set the minimum of variable ivar from the training sample that pass/end up in this node,...
void SetSelector(Short_t i)
set index of variable used for discrimination at this node
void SetNBkgEvents_unweighted(Float_t b)
set the sum of the unweighted backgr events in the node, if traininfo defined
void SetNSValidation(Double_t s)
set number of signal events from the pruning validation sample, if traininfo defined
void AddToSumTarget(Float_t t)
add to sum target, if traininfo defined
void SetNTerminal(Int_t n)
set number of terminal nodes in the subtree rooted here, if traininfo defined
void SetNEvents(Float_t nev)
set the number of events that entered the node (during training), if traininfo defined
Implementation of a Decision Tree.
UInt_t BuildTree(const EventConstList &eventSample, DecisionTreeNode *node=nullptr)
building the decision tree by recursively calling the splitting of one (root-) node into two daughter...
void FillTree(const EventList &eventSample)
fill the existing the decision tree structure by filling event in from the top node and see where the...
void PruneNode(TMVA::DecisionTreeNode *node)
prune away the subtree below the node
void ApplyValidationSample(const EventConstList *validationSample) const
run the validation sample through the (pruned) tree and fill in the nodes the variables NSValidation ...
Double_t TrainNodeFull(const EventConstList &eventSample, DecisionTreeNode *node)
train a node by finding the single optimal cut for a single variable that best separates signal and b...
TMVA::DecisionTreeNode * GetEventNode(const TMVA::Event &e) const
get the pointer to the leaf node where a particular event ends up in... (used in gradient boosting)
void GetRandomisedVariables(Bool_t *useVariable, UInt_t *variableMap, UInt_t &nVars)
void SetParentTreeInNodes(Node *n=nullptr)
descend a tree to find all its leaf nodes, fill max depth reached in the tree at the same time.
void DescendTree(Node *n=nullptr)
descend a tree to find all its leaf nodes
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=262657)
re-create a new tree (decision tree or search tree) from XML
std::vector< const TMVA::Event * > EventConstList
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
static const Int_t fgRandomSeed
Int_t fNCuts
number of grid point in variable cut scans
UInt_t CleanTree(DecisionTreeNode *node=nullptr)
remove those last splits that result in two leaf nodes that are both of the type (i....
virtual ~DecisionTree(void)
destructor
Types::EAnalysisType fAnalysisType
kClassification(=0=false) or kRegression(=1=true)
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
void CheckEventWithPrunedTree(const TMVA::Event *) const
pass a single validation event through a pruned decision tree on the way down the tree,...
void PruneNodeInPlace(TMVA::DecisionTreeNode *node)
prune a node temporarily (without actually deleting its descendants which allows testing the pruned t...
Double_t TestPrunedTreeQuality(const DecisionTreeNode *dt=nullptr, Int_t mode=0) const
return the misclassification rate of a pruned tree a "pruned tree" may have set the variable "IsTermi...
Double_t PruneTree(const EventConstList *validationSample=nullptr)
prune (get rid of internal nodes) the Decision tree to avoid overtraining several different pruning m...
void FillEvent(const TMVA::Event &event, TMVA::DecisionTreeNode *node)
fill the existing the decision tree structure by filling event in from the top node and see where the...
void ClearTree()
clear the tree nodes (their S/N, Nevents etc), just keep the structure of the tree
Double_t SamplePurity(EventList eventSample)
calculates the purity S/(S+B) of a given event sample
Node * GetNode(ULong_t sequence, UInt_t depth)
retrieve node from the tree.
std::vector< Double_t > GetFisherCoefficients(const EventConstList &eventSample, UInt_t nFisherVars, UInt_t *mapVarInFisher)
calculate the fisher coefficients for the event sample and the variables used
UInt_t CountLeafNodes(TMVA::Node *n=nullptr)
return the number of terminal nodes in the sub-tree below Node n
Double_t TrainNodeFast(const EventConstList &eventSample, DecisionTreeNode *node)
Decide how to split a node using one of the variables that gives the best separation of signal/backgr...
RegressionVariance * fRegType
the separation criteria used in Regression
DecisionTree(void)
default constructor using the GiniIndex as separation criterion, no restrictions on minium number of ...
Double_t GetSumWeights(const EventConstList *validationSample) const
calculate the normalization factor for a pruning validation sample
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Node for the BinarySearch or Decision Trees.
Calculate the "SeparationGain" for Regression analysis separation criteria used in various training a...
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
Singleton class for Global types used by TMVA.
Random number generator class based on M.
TSeq< unsigned int > TSeqU
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Double_t Sqrt(Double_t x)
Returns the square root of x.
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
BuildNodeInfo(Int_t fNvars, std::vector< Float_t > &inxmin, std::vector< Float_t > &inxmax)
std::vector< Float_t > xmin
BuildNodeInfo operator+(const BuildNodeInfo &other)
std::vector< Float_t > xmax
BuildNodeInfo(Int_t fNvars, const TMVA::Event *evt)
std::vector< std::vector< Double_t > > target2
std::vector< std::vector< Double_t > > nSelB_unWeighted
std::vector< std::vector< Double_t > > nSelB
std::vector< std::vector< Double_t > > target
std::vector< std::vector< Double_t > > nSelS_unWeighted
TrainNodeInfo operator+(const TrainNodeInfo &other)
std::vector< std::vector< Double_t > > nSelS
TrainNodeInfo(Int_t cNvars_, UInt_t *nBins_)
static uint64_t sum(uint64_t i)