97 return std::abs(
x-
y) < std::numeric_limits<float>::epsilon() * std::abs(
x+
y) * ulp
99 || std::abs(
x-
y) < std::numeric_limits<float>::min();
105 return std::abs(
x-
y) < std::numeric_limits<double>::epsilon() * std::abs(
x+
y) * ulp
107 || std::abs(
x-
y) < std::numeric_limits<double>::min();
179 if (sepType == NULL) {
186 Log() << kWARNING <<
" You had chosen the training mode using optimal cuts, not\n"
187 <<
" based on a grid of " <<
fNCuts <<
" by setting the option NCuts < 0\n"
188 <<
" as this doesn't exist yet, I set it to " <<
fNCuts <<
" and use the grid"
253 Log() << kFATAL <<
"SetParentTreeNodes: started with undefined ROOT node" <<
Endl;
259 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
262 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
273 n->SetParentTree(
this);
282 std::string
type(
"");
286 dt->
ReadXML( node, tmva_Version_Code );
306 for (
Int_t ivar=0; ivar<fNvars; ivar++) {
320 for (
Int_t ivar=0; ivar<fNvars; ivar++) {
321 xmin[ivar]=inxmin[ivar];
322 xmax[ivar]=inxmax[ivar];
346 std::cout <<
"!!! ERROR BuildNodeInfo1+BuildNodeInfo2 failure. Nvars1 != Nvars2." << std::endl;
388 this->
GetRoot()->SetParentTree(
this);
391 Log() << kDEBUG <<
"\tThe minimal node size MinNodeSize=" <<
fMinNodeSize <<
" fMinNodeSize="<<
fMinNodeSize<<
"% is translated to an actual number of events = "<<
fMinSize<<
" for the training sample size of " << eventSample.size() <<
Endl;
392 Log() << kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
393 Log() << kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
397 UInt_t nevents = eventSample.size();
400 if (
fNvars==0)
fNvars = eventSample[0]->GetNVariables();
403 else Log() << kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
414 auto f = [
this, &eventSample, &nPartitions](
UInt_t partition = 0){
416 Int_t start = 1.0*partition/nPartitions*eventSample.size();
417 Int_t end = (partition+1.0)/nPartitions*eventSample.size();
421 for(
Int_t iev=start; iev<end; iev++){
426 nodeInfof.
s += weight;
428 nodeInfof.
sub += orgWeight;
431 nodeInfof.
b += weight;
433 nodeInfof.
bub += orgWeight;
437 nodeInfof.
target +=weight*tgt;
438 nodeInfof.
target2+=weight*tgt*tgt;
445 nodeInfof.
xmin[ivar]=val;
446 nodeInfof.
xmax[ivar]=val;
448 if (val < nodeInfof.
xmin[ivar]) nodeInfof.
xmin[ivar]=val;
449 if (val > nodeInfof.
xmax[ivar]) nodeInfof.
xmax[ivar]=val;
459 auto redfunc = [nodeInfoInit](std::vector<BuildNodeInfo>
v) ->
BuildNodeInfo {
return std::accumulate(
v.begin(),
v.end(), nodeInfoInit); };
463 if (nodeInfo.
s+nodeInfo.
b < 0) {
464 Log() << kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. "
465 <<
"(Nsig="<<nodeInfo.
s<<
" Nbkg="<<nodeInfo.
b<<
" Probaby you use a Monte Carlo with negative weights. That should in principle "
466 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the "
467 <<
"minimal number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<
fMinNodeSize
468 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough "
469 <<
"to allow for reasonable averaging!!!" <<
Endl
470 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events "
471 <<
"with negative weight in the training." <<
Endl;
473 for (
UInt_t i=0;
i<eventSample.size();
i++) {
474 if (eventSample[
i]->GetClass() !=
fSigClass) {
475 nBkg += eventSample[
i]->GetWeight();
476 Log() << kDEBUG <<
"Event "<<
i<<
" has (original) weight: " << eventSample[
i]->GetWeight()/eventSample[
i]->GetBoostWeight()
477 <<
" boostWeight: " << eventSample[
i]->GetBoostWeight() <<
Endl;
480 Log() << kDEBUG <<
" that gives in total: " << nBkg<<
Endl;
526 if (separationGain < std::numeric_limits<double>::epsilon()) {
548 std::vector<const TMVA::Event*> leftSample; leftSample.reserve(nevents);
549 std::vector<const TMVA::Event*> rightSample; rightSample.reserve(nevents);
552 Double_t nRightUnBoosted=0, nLeftUnBoosted=0;
554 for (
UInt_t ie=0; ie< nevents ; ie++) {
556 rightSample.push_back(eventSample[ie]);
557 nRight += eventSample[ie]->GetWeight();
558 nRightUnBoosted += eventSample[ie]->GetOriginalWeight();
561 leftSample.push_back(eventSample[ie]);
562 nLeft += eventSample[ie]->GetWeight();
563 nLeftUnBoosted += eventSample[ie]->GetOriginalWeight();
567 if (leftSample.empty() || rightSample.empty()) {
569 Log() << kERROR <<
"<TrainNode> all events went to the same branch" <<
Endl
570 <<
"--- Hence new node == old node ... check" <<
Endl
571 <<
"--- left:" << leftSample.size()
572 <<
" right:" << rightSample.size() <<
Endl
573 <<
" while the separation is thought to be " << separationGain
574 <<
"\n when cutting on variable " << node->
GetSelector()
576 << kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
643 this->GetRoot()->SetPos(
's');
644 this->GetRoot()->SetDepth(0);
645 this->GetRoot()->SetParentTree(
this);
646 fMinSize = fMinNodeSize/100. * eventSample.size();
648 Log() << kDEBUG <<
"\tThe minimal node size MinNodeSize=" << fMinNodeSize <<
" fMinNodeSize="<<fMinNodeSize<<
"% is translated to an actual number of events = "<< fMinSize<<
" for the training sample size of " << eventSample.size() <<
Endl;
649 Log() << kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
650 Log() << kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
654 UInt_t nevents = eventSample.size();
657 if (fNvars==0) fNvars = eventSample[0]->GetNVariables();
658 fVariableImportance.resize(fNvars);
660 else Log() <<
kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
670 for (
UInt_t ivar=0; ivar<fNvars; ivar++) {
676 for (
UInt_t iev=0; iev<eventSample.size(); iev++) {
677 const TMVA::Event* evt = eventSample[iev];
690 if ( DoRegression() ) {
693 target2+=weight*tgt*tgt;
697 for (
UInt_t ivar=0; ivar<fNvars; ivar++) {
699 if (iev==0)
xmin[ivar]=
xmax[ivar]=val;
700 if (val <
xmin[ivar])
xmin[ivar]=val;
701 if (val >
xmax[ivar])
xmax[ivar]=val;
707 Log() <<
kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. "
708 <<
"(Nsig="<<s<<
" Nbkg="<<
b<<
" Probaby you use a Monte Carlo with negative weights. That should in principle "
709 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the "
710 <<
"minimul number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<fMinNodeSize
711 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough "
712 <<
"to allow for reasonable averaging!!!" <<
Endl
713 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events "
714 <<
"with negative weight in the training." <<
Endl;
716 for (
UInt_t i=0;
i<eventSample.size();
i++) {
717 if (eventSample[
i]->
GetClass() != fSigClass) {
718 nBkg += eventSample[
i]->GetWeight();
719 Log() <<
kDEBUG <<
"Event "<<
i<<
" has (original) weight: " << eventSample[
i]->GetWeight()/eventSample[
i]->GetBoostWeight()
720 <<
" boostWeight: " << eventSample[
i]->GetBoostWeight() <<
Endl;
733 if (node == this->GetRoot()) {
740 for (
UInt_t ivar=0; ivar<fNvars; ivar++) {
759 if ((eventSample.size() >= 2*fMinSize && s+
b >= 2*fMinSize) && node->
GetDepth() < fMaxDepth
760 && ( ( s!=0 &&
b !=0 && !DoRegression()) || ( (s+
b)!=0 && DoRegression()) ) ) {
763 separationGain = this->TrainNodeFast(eventSample, node);
765 separationGain = this->TrainNodeFull(eventSample, node);
767 if (separationGain < std::numeric_limits<double>::epsilon()) {
771 if (DoRegression()) {
786 if (node->
GetDepth() > this->GetTotalTreeDepth()) this->SetTotalTreeDepth(node->
GetDepth());
790 std::vector<const TMVA::Event*> leftSample; leftSample.reserve(nevents);
791 std::vector<const TMVA::Event*> rightSample; rightSample.reserve(nevents);
794 Double_t nRightUnBoosted=0, nLeftUnBoosted=0;
796 for (
UInt_t ie=0; ie< nevents ; ie++) {
798 rightSample.push_back(eventSample[ie]);
799 nRight += eventSample[ie]->GetWeight();
800 nRightUnBoosted += eventSample[ie]->GetOriginalWeight();
803 leftSample.push_back(eventSample[ie]);
804 nLeft += eventSample[ie]->GetWeight();
805 nLeftUnBoosted += eventSample[ie]->GetOriginalWeight();
810 if (leftSample.empty() || rightSample.empty()) {
812 Log() <<
kERROR <<
"<TrainNode> all events went to the same branch" <<
Endl
813 <<
"--- Hence new node == old node ... check" <<
Endl
814 <<
"--- left:" << leftSample.size()
815 <<
" right:" << rightSample.size() <<
Endl
816 <<
" while the separation is thought to be " << separationGain
817 <<
"\n when cutting on variable " << node->
GetSelector()
819 <<
kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
823 TMVA::DecisionTreeNode *rightNode =
new TMVA::DecisionTreeNode(node,
'r');
829 TMVA::DecisionTreeNode *leftNode =
new TMVA::DecisionTreeNode(node,
'l');
840 this->BuildTree(rightSample, rightNode);
841 this->BuildTree(leftSample, leftNode );
846 if (DoRegression()) {
867 if (node->
GetDepth() > this->GetTotalTreeDepth()) this->SetTotalTreeDepth(node->
GetDepth());
882 for (
UInt_t i=0;
i<eventSample.size();
i++) {
925 if (this->
GetRoot()!=NULL) this->
GetRoot()->ClearNodeAndAllDaughters();
949 if (
l->GetNodeType() *
r->GetNodeType() > 0) {
979 Log() << kFATAL <<
"Selected pruning method not yet implemented "
983 if(!tool)
return 0.0;
987 if(validationSample == NULL){
988 Log() << kFATAL <<
"Cannot automate the pruning algorithm without an "
989 <<
"independent validation sample!" <<
Endl;
990 }
else if(validationSample->size() == 0) {
991 Log() << kFATAL <<
"Cannot automate the pruning algorithm with "
992 <<
"independent validation sample of ZERO events!" <<
Endl;
999 Log() << kFATAL <<
"Error pruning tree! Check prune.log for more information."
1019 return pruneStrength;
1031 GetRoot()->ResetValidationData();
1032 for (
UInt_t ievt=0; ievt < validationSample->size(); ievt++) {
1048 Log() << kFATAL <<
"TestPrunedTreeQuality: started with undefined ROOT node" <<
Endl;
1053 if(
n->GetLeft() != NULL &&
n->GetRight() != NULL && !
n->IsTerminal() ) {
1059 Double_t sumw =
n->GetNSValidation() +
n->GetNBValidation();
1060 return n->GetSumTarget2() - 2*
n->GetSumTarget()*
n->GetResponse() + sumw*
n->GetResponse()*
n->GetResponse();
1064 if (
n->GetPurity() > this->GetNodePurityLimit())
1065 return n->GetNBValidation();
1067 return n->GetNSValidation();
1069 else if (
mode == 1 ) {
1071 return (
n->GetPurity() *
n->GetNBValidation() + (1.0 -
n->GetPurity()) *
n->GetNSValidation());
1074 throw std::string(
"Unknown ValidationQualityMode");
1088 if (current == NULL) {
1089 Log() << kFATAL <<
"CheckEventWithPrunedTree: started with undefined ROOT node" <<
Endl;
1092 while(current != NULL) {
1098 if (
e->GetNTargets() > 0) {
1121 for( EventConstList::const_iterator it = validationSample->begin();
1122 it != validationSample->end(); ++it ) {
1123 sumWeights += (*it)->GetWeight();
1136 Log() << kFATAL <<
"CountLeafNodes: started with undefined ROOT node" <<
Endl;
1165 Log() << kFATAL <<
"DescendTree: started with undefined ROOT node" <<
Endl;
1174 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
1178 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
1218 if(node == NULL)
return;
1221 node->
SetAlpha( std::numeric_limits<double>::infinity( ) );
1256 UInt_t nSelectedVars = 0;
1257 while (nSelectedVars < useNvars) {
1262 if (useVariable[ivar] ==
kTRUE) {
1263 mapVariable[nSelectedVars] = ivar;
1268 if (nSelectedVars != useNvars) { std::cout <<
"Bug in TrainNode - GetRandisedVariables()... sorry" << std::endl; std::exit(1);}
1285 nSelS = std::vector< std::vector<Double_t> >(
cNvars);
1286 nSelB = std::vector< std::vector<Double_t> >(
cNvars);
1293 nSelS[ivar] = std::vector<Double_t>(
nBins[ivar], 0);
1294 nSelB[ivar] = std::vector<Double_t>(
nBins[ivar], 0);
1297 target[ivar] = std::vector<Double_t>(
nBins[ivar], 0);
1320 std::vector< std::vector<Double_t> >
nSelS;
1321 std::vector< std::vector<Double_t> >
nSelB;
1336 std::cout <<
"!!! ERROR TrainNodeInfo1+TrainNodeInfo2 failure. cNvars1 != cNvars2." << std::endl;
1384 separationGain[ivar]=-1;
1390 UInt_t nevents = eventSample.size();
1398 std::vector<Double_t> fisherCoeff;
1407 useVariable[ivar] =
kTRUE;
1408 mapVariable[ivar] = ivar;
1424 useVarInFisher[ivar] =
kFALSE;
1425 mapVarInFisher[ivar] = ivar;
1428 std::vector<TMatrixDSym*>* covMatrices;
1431 Log() << kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
1443 useVarInFisher[ivar] =
kTRUE;
1444 useVarInFisher[jvar] =
kTRUE;
1455 if (useVarInFisher[ivar] && useVariable[ivar]) {
1456 mapVarInFisher[nFisherVars++]=ivar;
1467 delete [] useVarInFisher;
1468 delete [] mapVarInFisher;
1490 for (
UInt_t ivar=0; ivar<cNvars; ivar++) {
1494 if (
fDataSetInfo->GetVariableInfo(ivar).GetVarType() ==
'I') {
1499 cutValues[ivar] =
new Double_t [nBins[ivar]];
1503 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1510 useVariable[ivar]=
kFALSE;
1518 for (
UInt_t iev=0; iev<nevents; iev++) {
1522 result += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1528 for (
UInt_t ibin=0; ibin<nBins[ivar]; ibin++) {
1529 cutValues[ivar][ibin]=0;
1543 auto fvarInitCuts = [
this, &useVariable, &cutValues, &invBinWidth, &binWidth, &nBins, &
xmin, &
xmax](
UInt_t ivar = 0){
1545 if ( useVariable[ivar] ) {
1559 invBinWidth[ivar] = 1./binWidth[ivar];
1561 if (
fDataSetInfo->GetVariableInfo(ivar).GetVarType() ==
'I') { invBinWidth[ivar] = 1; binWidth[ivar] = 1; }
1569 for (
UInt_t icut=0; icut<nBins[ivar]-1; icut++) {
1570 cutValues[ivar][icut]=
xmin[ivar]+(
Double_t(icut+1))*binWidth[ivar];
1587 if(eventSample.size() >= cNvars*
fNCuts*nPartitions*2)
1592 auto f = [
this, &eventSample, &fisherCoeff, &useVariable, &invBinWidth,
1593 &nBins, &
xmin, &cNvars, &nPartitions](
UInt_t partition = 0){
1595 UInt_t start = 1.0*partition/nPartitions*eventSample.size();
1596 UInt_t end = (partition+1.0)/nPartitions*eventSample.size();
1600 for(
UInt_t iev=start; iev<end; iev++) {
1602 Double_t eventWeight = eventSample[iev]->GetWeight();
1603 if (eventSample[iev]->GetClass() ==
fSigClass) {
1604 nodeInfof.
nTotS+=eventWeight;
1607 nodeInfof.
nTotB+=eventWeight;
1613 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1616 if ( useVariable[ivar] ) {
1618 if (ivar <
fNvars) eventData = eventSample[iev]->GetValueFast(ivar);
1620 eventData = fisherCoeff[
fNvars];
1622 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1628 if (eventSample[iev]->GetClass() ==
fSigClass) {
1629 nodeInfof.
nSelS[ivar][iBin]+=eventWeight;
1633 nodeInfof.
nSelB[ivar][iBin]+=eventWeight;
1637 nodeInfof.
target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
1638 nodeInfof.
target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
1650 auto redfunc = [nodeInfoInit](std::vector<TrainNodeInfo>
v) ->
TrainNodeInfo {
return std::accumulate(
v.begin(),
v.end(), nodeInfoInit); };
1659 auto fvarFillNodeInfo = [
this, &nodeInfo, &eventSample, &fisherCoeff, &useVariable, &invBinWidth, &nBins, &
xmin](
UInt_t ivar = 0){
1661 for(
UInt_t iev=0; iev<eventSample.size(); iev++) {
1664 Double_t eventWeight = eventSample[iev]->GetWeight();
1668 if (eventSample[iev]->GetClass() ==
fSigClass) {
1669 nodeInfo.
nTotS+=eventWeight;
1672 nodeInfo.
nTotB+=eventWeight;
1678 if ( useVariable[ivar] ) {
1680 if (ivar <
fNvars) eventData = eventSample[iev]->GetValueFast(ivar);
1682 eventData = fisherCoeff[
fNvars];
1684 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1690 if (eventSample[iev]->GetClass() ==
fSigClass) {
1691 nodeInfo.
nSelS[ivar][iBin]+=eventWeight;
1695 nodeInfo.
nSelB[ivar][iBin]+=eventWeight;
1699 nodeInfo.
target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
1700 nodeInfo.
target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
1713 auto fvarCumulative = [&nodeInfo, &useVariable, &nBins,
this, &eventSample](
UInt_t ivar = 0){
1714 if (useVariable[ivar]) {
1715 for (
UInt_t ibin=1; ibin < nBins[ivar]; ibin++) {
1716 nodeInfo.
nSelS[ivar][ibin]+=nodeInfo.
nSelS[ivar][ibin-1];
1718 nodeInfo.
nSelB[ivar][ibin]+=nodeInfo.
nSelB[ivar][ibin-1];
1721 nodeInfo.
target[ivar][ibin] +=nodeInfo.
target[ivar][ibin-1] ;
1726 Log() << kFATAL <<
"Helge, you have a bug ....nodeInfo.nSelS_unw..+nodeInfo.nSelB_unw..= "
1728 <<
" while eventsample size = " << eventSample.size()
1731 double lastBins=nodeInfo.
nSelS[ivar][nBins[ivar]-1] +nodeInfo.
nSelB[ivar][nBins[ivar]-1];
1732 double totalSum=nodeInfo.
nTotS+nodeInfo.
nTotB;
1733 if (
TMath::Abs(lastBins-totalSum)/totalSum>0.01) {
1734 Log() << kFATAL <<
"Helge, you have another bug ....nodeInfo.nSelS+nodeInfo.nSelB= "
1736 <<
" while total number of events = " << totalSum
1747 auto fvarMaxSep = [&nodeInfo, &useVariable,
this, &separationGain, &cutIndex, &nBins] (
UInt_t ivar = 0){
1748 if (useVariable[ivar]) {
1750 for (
UInt_t iBin=0; iBin<nBins[ivar]-1; iBin++) {
1780 sepTmp =
fRegType->GetSeparationGain(nodeInfo.
nSelS[ivar][iBin]+nodeInfo.
nSelB[ivar][iBin],
1783 nodeInfo.
target[ivar][nBins[ivar]-1],nodeInfo.
target2[ivar][nBins[ivar]-1]);
1787 if (separationGain[ivar] < sepTmp) {
1788 separationGain[ivar] = sepTmp;
1789 cutIndex[ivar] = iBin;
1799 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1800 if (useVariable[ivar] ) {
1801 if (separationGainTotal < separationGain[ivar]) {
1802 separationGainTotal = separationGain[ivar];
1822 if (nodeInfo.
nSelS[mxVar][cutIndex[mxVar]]/nodeInfo.
nTotS > nodeInfo.
nSelB[mxVar][cutIndex[mxVar]]/nodeInfo.
nTotB) cutType=
kTRUE;
1827 node->
SetCutValue(cutValues[mxVar][cutIndex[mxVar]]);
1849 separationGainTotal = 0;
1862 delete [] cutValues[
i];
1873 delete [] cutValues;
1878 delete [] useVariable;
1879 delete [] mapVariable;
1881 delete [] separationGain;
1886 delete [] invBinWidth;
1888 return separationGainTotal;
1897 Double_t separationGainTotal = -1, sepTmp;
1902 for (
UInt_t ivar=0; ivar <= fNvars; ivar++) {
1903 separationGain[ivar]=-1;
1910 Int_t nTotS_unWeighted, nTotB_unWeighted;
1911 UInt_t nevents = eventSample.size();
1919 std::vector<Double_t> fisherCoeff;
1922 if (fRandomisedTree) {
1924 GetRandomisedVariables(useVariable,mapVariable,tmp);
1927 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1928 useVariable[ivar] =
kTRUE;
1929 mapVariable[ivar] = ivar;
1933 useVariable[fNvars] =
kFALSE;
1937 if (fUseFisherCuts) {
1938 useVariable[fNvars] =
kTRUE;
1944 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1945 useVarInFisher[ivar] =
kFALSE;
1946 mapVarInFisher[ivar] = ivar;
1949 std::vector<TMatrixDSym*>* covMatrices;
1952 Log() <<
kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
1960 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1961 for (
UInt_t jvar=ivar+1; jvar < fNvars; jvar++) {
1962 if ( (
TMath::Abs( (*s)(ivar, jvar)) > fMinLinCorrForFisher) ||
1963 (
TMath::Abs( (*
b)(ivar, jvar)) > fMinLinCorrForFisher) ){
1964 useVarInFisher[ivar] =
kTRUE;
1965 useVarInFisher[jvar] =
kTRUE;
1973 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1976 if (useVarInFisher[ivar] && useVariable[ivar]) {
1977 mapVarInFisher[nFisherVars++]=ivar;
1980 if (fUseExclusiveVars) useVariable[ivar] =
kFALSE;
1985 fisherCoeff = this->GetFisherCoefficients(eventSample, nFisherVars, mapVarInFisher);
1988 delete [] useVarInFisher;
1989 delete [] mapVarInFisher;
1996 if (fUseFisherCuts && fisherOK) cNvars++;
2013 for (
UInt_t ivar=0; ivar<cNvars; ivar++) {
2015 nBins[ivar] = fNCuts+1;
2016 if (ivar < fNvars) {
2017 if (fDataSetInfo->GetVariableInfo(ivar).GetVarType() ==
'I') {
2024 nSelS[ivar] =
new Double_t [nBins[ivar]];
2025 nSelB[ivar] =
new Double_t [nBins[ivar]];
2026 nSelS_unWeighted[ivar] =
new Double_t [nBins[ivar]];
2027 nSelB_unWeighted[ivar] =
new Double_t [nBins[ivar]];
2029 target2[ivar] =
new Double_t [nBins[ivar]];
2030 cutValues[ivar] =
new Double_t [nBins[ivar]];
2039 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2046 useVariable[ivar]=
kFALSE;
2054 for (
UInt_t iev=0; iev<nevents; iev++) {
2057 for (
UInt_t jvar=0; jvar<fNvars; jvar++)
2058 result += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
2063 for (
UInt_t ibin=0; ibin<nBins[ivar]; ibin++) {
2064 nSelS[ivar][ibin]=0;
2065 nSelB[ivar][ibin]=0;
2066 nSelS_unWeighted[ivar][ibin]=0;
2067 nSelB_unWeighted[ivar][ibin]=0;
2069 target2[ivar][ibin]=0;
2070 cutValues[ivar][ibin]=0;
2077 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2079 if ( useVariable[ivar] ) {
2093 invBinWidth[ivar] = 1./binWidth[ivar];
2094 if (ivar < fNvars) {
2095 if (fDataSetInfo->GetVariableInfo(ivar).GetVarType() ==
'I') { invBinWidth[ivar] = 1; binWidth[ivar] = 1; }
2103 for (
UInt_t icut=0; icut<nBins[ivar]-1; icut++) {
2104 cutValues[ivar][icut]=
xmin[ivar]+(
Double_t(icut+1))*binWidth[ivar];
2112 nTotS_unWeighted=0; nTotB_unWeighted=0;
2113 for (
UInt_t iev=0; iev<nevents; iev++) {
2115 Double_t eventWeight = eventSample[iev]->GetWeight();
2116 if (eventSample[iev]->
GetClass() == fSigClass) {
2118 nTotS_unWeighted++; }
2126 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2129 if ( useVariable[ivar] ) {
2131 if (ivar < fNvars) eventData = eventSample[iev]->GetValueFast(ivar);
2133 eventData = fisherCoeff[fNvars];
2134 for (
UInt_t jvar=0; jvar<fNvars; jvar++)
2135 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
2141 if (eventSample[iev]->
GetClass() == fSigClass) {
2142 nSelS[ivar][iBin]+=eventWeight;
2143 nSelS_unWeighted[ivar][iBin]++;
2146 nSelB[ivar][iBin]+=eventWeight;
2147 nSelB_unWeighted[ivar][iBin]++;
2149 if (DoRegression()) {
2150 target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
2151 target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
2158 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2159 if (useVariable[ivar]) {
2160 for (
UInt_t ibin=1; ibin < nBins[ivar]; ibin++) {
2161 nSelS[ivar][ibin]+=nSelS[ivar][ibin-1];
2162 nSelS_unWeighted[ivar][ibin]+=nSelS_unWeighted[ivar][ibin-1];
2163 nSelB[ivar][ibin]+=nSelB[ivar][ibin-1];
2164 nSelB_unWeighted[ivar][ibin]+=nSelB_unWeighted[ivar][ibin-1];
2165 if (DoRegression()) {
2167 target2[ivar][ibin]+=target2[ivar][ibin-1];
2170 if (nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1] != eventSample.size()) {
2171 Log() <<
kFATAL <<
"Helge, you have a bug ....nSelS_unw..+nSelB_unw..= "
2172 << nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1]
2173 <<
" while eventsample size = " << eventSample.size()
2176 double lastBins=nSelS[ivar][nBins[ivar]-1] +nSelB[ivar][nBins[ivar]-1];
2177 double totalSum=nTotS+nTotB;
2178 if (
TMath::Abs(lastBins-totalSum)/totalSum>0.01) {
2179 Log() <<
kFATAL <<
"Helge, you have another bug ....nSelS+nSelB= "
2181 <<
" while total number of events = " << totalSum
2189 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2190 if (useVariable[ivar]) {
2191 for (
UInt_t iBin=0; iBin<nBins[ivar]-1; iBin++) {
2203 Double_t sl = nSelS_unWeighted[ivar][iBin];
2204 Double_t bl = nSelB_unWeighted[ivar][iBin];
2216 if ( ((sl+bl)>=fMinSize && (sr+br)>=fMinSize)
2217 && ((slW+blW)>=fMinSize && (srW+brW)>=fMinSize)
2220 if (DoRegression()) {
2221 sepTmp = fRegType->GetSeparationGain(nSelS[ivar][iBin]+nSelB[ivar][iBin],
2222 target[ivar][iBin],target2[ivar][iBin],
2224 target[ivar][nBins[ivar]-1],target2[ivar][nBins[ivar]-1]);
2226 sepTmp = fSepType->GetSeparationGain(nSelS[ivar][iBin], nSelB[ivar][iBin], nTotS, nTotB);
2228 if (separationGain[ivar] < sepTmp) {
2229 separationGain[ivar] = sepTmp;
2230 cutIndex[ivar] = iBin;
2238 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2239 if (useVariable[ivar] ) {
2240 if (separationGainTotal < separationGain[ivar]) {
2241 separationGainTotal = separationGain[ivar];
2248 if (DoRegression()) {
2249 node->
SetSeparationIndex(fRegType->GetSeparationIndex(nTotS+nTotB,
target[0][nBins[mxVar]-1],target2[0][nBins[mxVar]-1]));
2251 if (
almost_equal_double(target2[0][nBins[mxVar]-1]/(nTotS+nTotB),
target[0][nBins[mxVar]-1]/(nTotS+nTotB)*
target[0][nBins[mxVar]-1]/(nTotS+nTotB))) {
2254 node->
SetRMS(
TMath::Sqrt(target2[0][nBins[mxVar]-1]/(nTotS+nTotB) -
target[0][nBins[mxVar]-1]/(nTotS+nTotB)*
target[0][nBins[mxVar]-1]/(nTotS+nTotB)));
2260 if (nSelS[mxVar][cutIndex[mxVar]]/nTotS > nSelB[mxVar][cutIndex[mxVar]]/nTotB) cutType=
kTRUE;
2265 node->
SetCutValue(cutValues[mxVar][cutIndex[mxVar]]);
2268 if (mxVar < (
Int_t) fNvars){
2270 fVariableImportance[mxVar] += separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
2277 for (
UInt_t ivar=0; ivar<=fNvars; ivar++) {
2281 fVariableImportance[ivar] += fisherCoeff[ivar]*fisherCoeff[ivar]*separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
2287 separationGainTotal = 0;
2306 delete [] nSelS_unWeighted[
i];
2307 delete [] nSelB_unWeighted[
i];
2309 delete [] target2[
i];
2310 delete [] cutValues[
i];
2314 delete [] nSelS_unWeighted;
2315 delete [] nSelB_unWeighted;
2318 delete [] cutValues;
2323 delete [] useVariable;
2324 delete [] mapVariable;
2326 delete [] separationGain;
2331 delete [] invBinWidth;
2333 return separationGainTotal;
2343 std::vector<Double_t> fisherCoeff(
fNvars+1);
2366 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) { sumS[ivar] = sumB[ivar] = 0; }
2368 UInt_t nevents = eventSample.size();
2370 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
2373 const Event * ev = eventSample[ievt];
2378 else sumOfWeightsB += weight;
2381 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
2385 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
2386 (*meanMatx)( ivar, 2 ) = sumS[ivar];
2387 (*meanMatx)( ivar, 0 ) = sumS[ivar]/sumOfWeightsS;
2389 (*meanMatx)( ivar, 2 ) += sumB[ivar];
2390 (*meanMatx)( ivar, 1 ) = sumB[ivar]/sumOfWeightsB;
2393 (*meanMatx)( ivar, 2 ) /= (sumOfWeightsS + sumOfWeightsB);
2405 assert( sumOfWeightsS > 0 && sumOfWeightsB > 0 );
2409 const Int_t nFisherVars2 = nFisherVars*nFisherVars;
2413 memset(sum2Sig,0,nFisherVars2*
sizeof(
Double_t));
2414 memset(sum2Bgd,0,nFisherVars2*
sizeof(
Double_t));
2417 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
2421 const Event* ev = eventSample.at(ievt);
2431 if ( ev->
GetClass() ==
fSigClass ) sum2Sig[k] += ( (xval[
x] - (*meanMatx)(
x, 0))*(xval[
y] - (*meanMatx)(
y, 0)) )*weight;
2432 else sum2Bgd[k] += ( (xval[
x] - (*meanMatx)(
x, 1))*(xval[
y] - (*meanMatx)(
y, 1)) )*weight;
2440 (*with)(
x,
y) = sum2Sig[k]/sumOfWeightsS + sum2Bgd[k]/sumOfWeightsB;
2460 prodSig = ( ((*meanMatx)(
x, 0) - (*meanMatx)(
x, 2))*
2461 ((*meanMatx)(
y, 0) - (*meanMatx)(
y, 2)) );
2462 prodBgd = ( ((*meanMatx)(
x, 1) - (*meanMatx)(
x, 2))*
2463 ((*meanMatx)(
y, 1) - (*meanMatx)(
y, 2)) );
2465 (*betw)(
x,
y) = (sumOfWeightsS*prodSig + sumOfWeightsB*prodBgd) / (sumOfWeightsS + sumOfWeightsB);
2474 (*cov)(
x,
y) = (*with)(
x,
y) + (*betw)(
x,
y);
2489 Log() << kWARNING <<
"FisherCoeff matrix is almost singular with determinant="
2491 <<
" did you use the variables that are linear combinations or highly correlated?"
2495 Log() << kFATAL <<
"FisherCoeff matrix is singular with determinant="
2497 <<
" did you use the variables that are linear combinations?"
2504 Double_t xfact =
TMath::Sqrt( sumOfWeightsS*sumOfWeightsB ) / (sumOfWeightsS + sumOfWeightsB);
2507 std::vector<Double_t> diffMeans( nFisherVars );
2509 for (
UInt_t ivar=0; ivar<=
fNvars; ivar++) fisherCoeff[ivar] = 0;
2510 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
2511 for (
UInt_t jvar=0; jvar<nFisherVars; jvar++) {
2512 Double_t d = (*meanMatx)(jvar, 0) - (*meanMatx)(jvar, 1);
2513 fisherCoeff[mapVarInFisher[ivar]] += invCov(ivar, jvar)*
d;
2517 fisherCoeff[mapVarInFisher[ivar]] *= xfact;
2522 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++){
2523 f0 += fisherCoeff[mapVarInFisher[ivar]]*((*meanMatx)(ivar, 0) + (*meanMatx)(ivar, 1));
2527 fisherCoeff[
fNvars] = f0;
2540 Int_t nTotS_unWeighted = 0, nTotB_unWeighted = 0;
2542 std::vector<TMVA::BDTEventWrapper> bdtEventSample;
2546 std::vector<Double_t> lCutValue(
fNvars, 0.0 );
2547 std::vector<Double_t> lSepGain(
fNvars, -1.0e6 );
2548 std::vector<Char_t> lCutType(
fNvars );
2553 for( std::vector<const TMVA::Event*>::const_iterator it = eventSample.begin(); it != eventSample.end(); ++it ) {
2555 nTotS += (*it)->GetWeight();
2559 nTotB += (*it)->GetWeight();
2565 std::vector<Char_t> useVariable(
fNvars);
2574 Int_t nSelectedVars = 0;
2580 if(useVariable[ivar] ==
Char_t(
kTRUE)) nSelectedVars++;
2588 if(!useVariable[ivar])
continue;
2592 std::sort( bdtEventSample.begin(),bdtEventSample.end() );
2595 Double_t bkgWeightCtr = 0.0, sigWeightCtr = 0.0;
2597 std::vector<TMVA::BDTEventWrapper>::iterator it = bdtEventSample.begin(), it_end = bdtEventSample.end();
2598 for( ; it != it_end; ++it ) {
2600 sigWeightCtr += (**it)->GetWeight();
2602 bkgWeightCtr += (**it)->GetWeight();
2604 it->SetCumulativeWeight(
false,bkgWeightCtr);
2605 it->SetCumulativeWeight(
true,sigWeightCtr);
2611 Double_t separationGain = -1.0, sepTmp = 0.0, cutValue = 0.0, dVal = 0.0, norm = 0.0;
2614 for( it = bdtEventSample.begin(); it != it_end; ++it ) {
2616 if( *(*it) == NULL ) {
2617 Log() << kFATAL <<
"In TrainNodeFull(): have a null event! Where index="
2621 dVal = bdtEventSample[
index].GetVal() - bdtEventSample[
index-1].GetVal();
2627 sepTmp =
fSepType->GetSeparationGain( it->GetCumulativeWeight(
true), it->GetCumulativeWeight(
false), sigWeightCtr, bkgWeightCtr );
2628 if( sepTmp > separationGain ) {
2629 separationGain = sepTmp;
2630 cutValue = it->GetVal() - 0.5*dVal;
2631 Double_t nSelS = it->GetCumulativeWeight(
true);
2632 Double_t nSelB = it->GetCumulativeWeight(
false);
2635 if( nSelS/sigWeightCtr > nSelB/bkgWeightCtr ) cutType =
kTRUE;
2641 lCutType[ivar] =
Char_t(cutType);
2642 lCutValue[ivar] = cutValue;
2643 lSepGain[ivar] = separationGain;
2646 Int_t iVarIndex = -1;
2648 if( lSepGain[ivar] > separationGain ) {
2650 separationGain = lSepGain[ivar];
2655 if(iVarIndex >= 0) {
2660 fVariableImportance[iVarIndex] += separationGain*separationGain * (nTotS+nTotB) * (nTotS+nTotB);
2663 separationGain = 0.0;
2666 return separationGain;
2694 Log() << kFATAL <<
"CheckEvent: started with undefined ROOT node" <<
Endl;
2703 Log() << kFATAL <<
"DT::CheckEvent: inconsistent tree structure" <<
Endl;
2724 Double_t sumsig=0, sumbkg=0, sumtot=0;
2725 for (
UInt_t ievt=0; ievt<eventSample.size(); ievt++) {
2726 if (eventSample[ievt]->GetClass() !=
fSigClass) sumbkg+=eventSample[ievt]->GetWeight();
2727 else sumsig+=eventSample[ievt]->GetWeight();
2728 sumtot+=eventSample[ievt]->GetWeight();
2731 if (sumtot!= (sumsig+sumbkg)){
2732 Log() << kFATAL <<
"<SamplePurity> sumtot != sumsig+sumbkg"
2733 << sumtot <<
" " << sumsig <<
" " << sumbkg <<
Endl;
2735 if (sumtot>0)
return sumsig/(sumsig + sumbkg);
2747 std::vector<Double_t> relativeImportance(
fNvars);
2755 if (
sum > std::numeric_limits<double>::epsilon())
2756 relativeImportance[
i] /=
sum;
2758 relativeImportance[
i] = 0;
2760 return relativeImportance;
2769 if (ivar <
fNvars)
return relativeImportance[ivar];
2771 Log() << kFATAL <<
"<GetVariableImportance>" <<
Endl
2772 <<
"--- ivar = " << ivar <<
" is out of range " <<
Endl;
bool almost_equal_double(double x, double y, int ulp=4)
bool almost_equal_float(float x, float y, int ulp=4)
return
Invalidate stored TCling state for declarations included in transaction ‘T’.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t target
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char mode
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
TMatrixT< Double_t > TMatrixD
static void SetVarIndex(Int_t iVar)
UInt_t CountNodes(Node *n=nullptr)
return the number of nodes in the tree. (make a new count --> takes time)
Node * GetLeftDaughter(Node *n)
get left daughter node current node "n"
Node * GetRightDaughter(Node *n)
get right daughter node current node "n"
BinaryTree(void)
constructor for a yet "empty" tree. Needs to be filled afterwards
void DeleteNode(Node *)
protected, recursive, function used by the class destructor and when Pruning
UInt_t fNNodes
total number of nodes in the tree (counted)
void SetTotalTreeDepth(Int_t depth)
virtual void ReadXML(void *node, UInt_t tmva_Version_Code=262657)
read attributes from XML
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
static Config & Instance()
static function: returns TMVA instance
Class that contains all the data information.
void SetNEvents_unweighted(Float_t nev)
set the number of unweighted events that entered the node (during training), if traininfo defined
void SetNodeType(Int_t t)
set node type: 1 signal node, -1 bkg leave, 0 intermediate Node
void SetSeparationGain(Float_t sep)
set the separation, or information gained BY this node's selection, if traininfo defined
void SetNBkgEvents(Float_t b)
set the sum of the backgr weights in the node, if traininfo defined
void SetCutType(Bool_t t)
set true: if event variable > cutValue ==> signal , false otherwise
Double_t GetNSValidation() const
return number of signal events from the pruning validation sample, or -1 if traininfo undefined
void IncrementNEvents_unweighted()
increment the number of events that entered the node (during training), if traininfo defined
void SetFisherCoeff(Int_t ivar, Double_t coeff)
set fisher coefficients
void SetNSigEvents_unboosted(Float_t s)
set the sum of the unboosted signal events in the node, if traininfo defined
void SetAlphaMinSubtree(Double_t g)
set the minimum alpha in the tree rooted at this node, if traininfo defined
void IncrementNBkgEvents(Float_t b)
increment the sum of the backgr weights in the node, if traininfo defined
void SetNEvents_unboosted(Float_t nev)
set the number of unboosted events that entered the node (during training), if traininfo defined
Float_t GetNSigEvents(void) const
return the sum of the signal weights in the node, or -1 if traininfo undefined
virtual void SetLeft(Node *l)
void SetTerminal(Bool_t s=kTRUE)
void SetResponse(Float_t r)
set the response of the node (for regression)
void SetSampleMax(UInt_t ivar, Float_t xmax)
set the maximum of variable ivar from the training sample that pass/end up in this node,...
void SetNBValidation(Double_t b)
set number of background events from the pruning validation sample, if traininfo defined
void IncrementNEvents(Float_t nev)
void SetPurity(void)
return the S/(S+B) (purity) for the node REM: even if nodes with purity 0.01 are very PURE background...
void SetSubTreeR(Double_t r)
set the resubstitution estimate, R(T_t), of the tree rooted at this node, if traininfo defined
void AddToSumTarget2(Float_t t2)
add to sum target 2, if traininfo defined
virtual DecisionTreeNode * GetLeft() const
Double_t GetNodeR() const
return the node resubstitution estimate, R(t), for Cost Complexity pruning, or -1 if traininfo undefi...
virtual Bool_t GoesRight(const Event &) const
test event if it descends the tree at this node to the right
void SetNFisherCoeff(Int_t nvars)
Short_t GetSelector() const
return index of variable used for discrimination at this node
void SetNSigEvents(Float_t s)
set the sum of the signal weights in the node, if traininfo defined
Float_t GetResponse(void) const
return the response of the node (for regression)
Float_t GetCutValue(void) const
return the cut value applied at this node
Int_t GetNodeType(void) const
return node type: 1 signal node, -1 bkg leave, 0 intermediate Node
void IncrementNBkgEvents_unweighted()
increment the sum of the backgr weights in the node, if traininfo defined
void SetNSigEvents_unweighted(Float_t s)
set the sum of the unweighted signal events in the node, if traininfo defined
Double_t GetNBValidation() const
return number of background events from the pruning validation sample, or -1 if traininfo undefined
void SetAlpha(Double_t alpha)
set the critical point alpha, if traininfo defined
void SetSeparationIndex(Float_t sep)
set the chosen index, measure of "purity" (separation between S and B) AT this node,...
virtual void SetRight(Node *r)
void SetRMS(Float_t r)
set the RMS of the response of the node (for regression)
void IncrementNSigEvents_unweighted()
increment the sum of the signal weights in the node, if traininfo defined
void SetNBkgEvents_unboosted(Float_t b)
set the sum of the unboosted backgr events in the node, if traininfo defined
Float_t GetPurity(void) const
return S/(S+B) (purity) at this node (from training)
void IncrementNSigEvents(Float_t s)
increment the sum of the signal weights in the node, if traininfo defined
Float_t GetSampleMax(UInt_t ivar) const
return the maximum of variable ivar from the training sample that pass/end up in this node,...
void SetCutValue(Float_t c)
set the cut value applied at this node
Float_t GetNBkgEvents(void) const
return the sum of the backgr weights in the node, or -1 if traininfo undefined
Float_t GetSampleMin(UInt_t ivar) const
return the minimum of variable ivar from the training sample that pass/end up in this node,...
void SetSampleMin(UInt_t ivar, Float_t xmin)
set the minimum of variable ivar from the training sample that pass/end up in this node,...
void SetSelector(Short_t i)
set index of variable used for discrimination at this node
virtual DecisionTreeNode * GetParent() const
void SetNBkgEvents_unweighted(Float_t b)
set the sum of the unweighted backgr events in the node, if traininfo defined
void SetNSValidation(Double_t s)
set number of signal events from the pruning validation sample, if traininfo defined
void AddToSumTarget(Float_t t)
add to sum target, if traininfo defined
void SetNTerminal(Int_t n)
set number of terminal nodes in the subtree rooted here, if traininfo defined
void SetNEvents(Float_t nev)
set the number of events that entered the node (during training), if traininfo defined
virtual DecisionTreeNode * GetRight() const
Implementation of a Decision Tree.
Bool_t DoRegression() const
Int_t fNNodesBeforePruning
remember this one (in case of pruning, it allows to monitor the before/after
Double_t fMinSize
min number of events in node
UInt_t BuildTree(const EventConstList &eventSample, DecisionTreeNode *node=nullptr)
building the decision tree by recursively calling the splitting of one (root-) node into two daughter...
void FillTree(const EventList &eventSample)
fill the existing the decision tree structure by filling event in from the top node and see where the...
Double_t fMinNodeSize
min fraction of training events in node
void PruneNode(TMVA::DecisionTreeNode *node)
prune away the subtree below the node
TRandom3 * fMyTrandom
random number generator for randomised trees
Int_t fTreeID
just an ID number given to the tree.. makes debugging easier as tree knows who he is.
void ApplyValidationSample(const EventConstList *validationSample) const
run the validation sample through the (pruned) tree and fill in the nodes the variables NSValidation ...
Double_t TrainNodeFull(const EventConstList &eventSample, DecisionTreeNode *node)
train a node by finding the single optimal cut for a single variable that best separates signal and b...
EPruneMethod fPruneMethod
method used for pruning
Bool_t fUseSearchTree
cut scan done with binary trees or simple event loop.
virtual DecisionTreeNode * GetRoot() const
TMVA::DecisionTreeNode * GetEventNode(const TMVA::Event &e) const
get the pointer to the leaf node where a particular event ends up in... (used in gradient boosting)
SeparationBase * fSepType
the separation criteria
UInt_t fMaxDepth
max depth
void GetRandomisedVariables(Bool_t *useVariable, UInt_t *variableMap, UInt_t &nVars)
Int_t fUseNvars
the number of variables used in randomised trees;
void SetParentTreeInNodes(Node *n=nullptr)
descend a tree to find all its leaf nodes, fill max depth reached in the tree at the same time.
void DescendTree(Node *n=nullptr)
descend a tree to find all its leaf nodes
Double_t fPruneStrength
a parameter to set the "amount" of pruning..needs to be adjusted
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=262657)
re-create a new tree (decision tree or search tree) from XML
UInt_t fSigClass
class which is treated as signal when building the tree
std::vector< const TMVA::Event * > EventConstList
Bool_t fUseFisherCuts
use multivariate splits using the Fisher criterium
Double_t fNodePurityLimit
purity limit to decide whether a node is signal
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
Bool_t fUseExclusiveVars
individual variables already used in fisher criterium are not anymore analysed individually for node ...
static const Int_t fgRandomSeed
Int_t fNCuts
number of grid point in variable cut scans
UInt_t CleanTree(DecisionTreeNode *node=nullptr)
remove those last splits that result in two leaf nodes that are both of the type (i....
Double_t fMinLinCorrForFisher
the minimum linear correlation between two variables demanded for use in fisher criterium in node spl...
UInt_t fNvars
number of variables used to separate S and B
Bool_t fRandomisedTree
choose at each node splitting a random set of variables
virtual ~DecisionTree(void)
destructor
Types::EAnalysisType fAnalysisType
kClassification(=0=false) or kRegression(=1=true)
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
void CheckEventWithPrunedTree(const TMVA::Event *) const
pass a single validation event through a pruned decision tree on the way down the tree,...
void PruneNodeInPlace(TMVA::DecisionTreeNode *node)
prune a node temporarily (without actually deleting its descendants which allows testing the pruned t...
Double_t fMinSepGain
min number of separation gain to perform node splitting
Double_t TestPrunedTreeQuality(const DecisionTreeNode *dt=nullptr, Int_t mode=0) const
return the misclassification rate of a pruned tree a "pruned tree" may have set the variable "IsTermi...
std::vector< Double_t > fVariableImportance
the relative importance of the different variables
Double_t PruneTree(const EventConstList *validationSample=nullptr)
prune (get rid of internal nodes) the Decision tree to avoid overtraining several different pruning m...
void FillEvent(const TMVA::Event &event, TMVA::DecisionTreeNode *node)
fill the existing the decision tree structure by filling event in from the top node and see where the...
DataSetInfo * fDataSetInfo
void ClearTree()
clear the tree nodes (their S/N, Nevents etc), just keep the structure of the tree
Double_t SamplePurity(EventList eventSample)
calculates the purity S/(S+B) of a given event sample
Node * GetNode(ULong_t sequence, UInt_t depth)
retrieve node from the tree.
std::vector< Double_t > GetFisherCoefficients(const EventConstList &eventSample, UInt_t nFisherVars, UInt_t *mapVarInFisher)
calculate the fisher coefficients for the event sample and the variables used
UInt_t CountLeafNodes(TMVA::Node *n=nullptr)
return the number of terminal nodes in the sub-tree below Node n
Double_t TrainNodeFast(const EventConstList &eventSample, DecisionTreeNode *node)
Decide how to split a node using one of the variables that gives the best separation of signal/backgr...
Bool_t fUsePoissonNvars
use "fUseNvars" not as fixed number but as mean of a poisson distr. in each split
RegressionVariance * fRegType
the separation criteria used in Regression
DecisionTree(void)
default constructor using the GiniIndex as separation criterion, no restrictions on minium number of ...
Double_t GetSumWeights(const EventConstList *validationSample) const
calculate the normalization factor for a pruning validation sample
Double_t GetPruneStrength() const
Double_t GetOriginalWeight() const
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetValueFast(UInt_t ivar) const
Float_t GetTarget(UInt_t itgt) const
auto Map(F func, unsigned nTimes) -> std::vector< InvokeResult_t< F > >
Wrap TExecutor::Map functions.
auto MapReduce(F func, ROOT::TSeq< INTEGER > args, R redfunc) -> InvokeResult_t< F, INTEGER >
Wrap TExecutor::MapReduce functions.
unsigned int GetPoolSize() const
Node for the BinarySearch or Decision Trees.
std::vector< DecisionTreeNode * > PruneSequence
the regularization parameter for pruning
Double_t PruneStrength
quality measure for a pruned subtree T of T_max
Calculate the "SeparationGain" for Regression analysis separation criteria used in various training a...
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
Singleton class for Global types used by TMVA.
Double_t Determinant() const override
Return the matrix determinant.
TMatrixT< Element > & Invert(Double_t *det=nullptr)
Invert the matrix and calculate its determinant.
Random number generator class based on M.
TSeq< unsigned int > TSeqU
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Double_t Sqrt(Double_t x)
Returns the square root of x.
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
BuildNodeInfo(Int_t fNvars, std::vector< Float_t > &inxmin, std::vector< Float_t > &inxmax)
std::vector< Float_t > xmin
BuildNodeInfo operator+(const BuildNodeInfo &other)
std::vector< Float_t > xmax
BuildNodeInfo(Int_t fNvars, const TMVA::Event *evt)
Double_t nTotB_unWeighted
std::vector< std::vector< Double_t > > target2
std::vector< std::vector< Double_t > > nSelB_unWeighted
std::vector< std::vector< Double_t > > nSelB
Double_t nTotS_unWeighted
std::vector< std::vector< Double_t > > target
std::vector< std::vector< Double_t > > nSelS_unWeighted
TrainNodeInfo operator+(const TrainNodeInfo &other)
std::vector< std::vector< Double_t > > nSelS
TrainNodeInfo(Int_t cNvars_, UInt_t *nBins_)
static uint64_t sum(uint64_t i)