100 || std::abs(x-y) < std::numeric_limits<float>::min();
108 || std::abs(x-y) < std::numeric_limits<double>::min();
121 fMinLinCorrForFisher (1),
122 fUseExclusiveVars (
kTRUE),
130 fPruneMethod (kNoPruning),
131 fNNodesBeforePruning(0),
132 fNodePurityLimit(0.5),
140 fAnalysisType (
Types::kClassification),
180 if (sepType == NULL) {
187 Log() << kWARNING <<
" You had chosen the training mode using optimal cuts, not\n" 188 <<
" based on a grid of " <<
fNCuts <<
" by setting the option NCuts < 0\n" 189 <<
" as this doesn't exist yet, I set it to " <<
fNCuts <<
" and use the grid" 253 Log() << kFATAL <<
"SetParentTreeNodes: started with undefined ROOT node" <<
Endl;
259 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
262 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
282 std::string
type(
"");
286 dt->
ReadXML( node, tmva_Version_Code );
308 Log() << kDEBUG <<
"\tThe minimal node size MinNodeSize=" <<
fMinNodeSize <<
" fMinNodeSize="<<
fMinNodeSize<<
"% is translated to an actual number of events = "<<
fMinSize<<
" for the training sample size of " << eventSample.size() <<
Endl;
309 Log() << kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
310 Log() << kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
314 UInt_t nevents = eventSample.size();
317 if (
fNvars==0)
fNvars = eventSample[0]->GetNVariables();
320 else Log() << kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
329 xmin[ivar]=xmax[ivar]=0;
331 for (
UInt_t iev=0; iev<eventSample.size(); iev++) {
348 target2+=weight*tgt*tgt;
353 if (iev==0) xmin[ivar]=xmax[ivar]=val;
354 if (val < xmin[ivar]) xmin[ivar]=val;
355 if (val > xmax[ivar]) xmax[ivar]=val;
361 Log() << kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. " 362 <<
"(Nsig="<<s<<
" Nbkg="<<
b<<
" Probably you use a Monte Carlo with negative weights. That should in principle " 363 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the " 364 <<
"minimal number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<
fMinNodeSize 365 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough " 366 <<
"to allow for reasonable averaging!!!" << Endl
367 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events " 368 <<
"with negative weight in the training." <<
Endl;
370 for (
UInt_t i=0; i<eventSample.size(); i++) {
372 nBkg += eventSample[i]->GetWeight();
373 Log() << kDEBUG <<
"Event "<< i<<
" has (original) weight: " << eventSample[i]->GetWeight()/eventSample[i]->GetBoostWeight()
374 <<
" boostWeight: " << eventSample[i]->GetBoostWeight() <<
Endl;
377 Log() << kDEBUG <<
" that gives in total: " << nBkg<<
Endl;
441 std::vector<const TMVA::Event*> leftSample; leftSample.reserve(nevents);
442 std::vector<const TMVA::Event*> rightSample; rightSample.reserve(nevents);
445 Double_t nRightUnBoosted=0, nLeftUnBoosted=0;
447 for (
UInt_t ie=0; ie< nevents ; ie++) {
449 rightSample.push_back(eventSample[ie]);
450 nRight += eventSample[ie]->GetWeight();
451 nRightUnBoosted += eventSample[ie]->GetOriginalWeight();
454 leftSample.push_back(eventSample[ie]);
455 nLeft += eventSample[ie]->GetWeight();
456 nLeftUnBoosted += eventSample[ie]->GetOriginalWeight();
461 if (leftSample.empty() || rightSample.empty()) {
463 Log() << kERROR <<
"<TrainNode> all events went to the same branch" << Endl
464 <<
"--- Hence new node == old node ... check" << Endl
465 <<
"--- left:" << leftSample.size()
466 <<
" right:" << rightSample.size() << Endl
467 <<
" while the separation is thought to be " << separationGain
468 <<
"\n when cutting on variable " << node->
GetSelector()
470 << kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
532 for (
UInt_t i=0; i<eventSample.size(); i++) {
566 this->
FillEvent(event,static_cast<TMVA::DecisionTreeNode*>(node->
GetLeft())) ;
629 Log() << kFATAL <<
"Selected pruning method not yet implemented " 633 if(!tool)
return 0.0;
637 if(validationSample == NULL){
638 Log() << kFATAL <<
"Cannot automate the pruning algorithm without an " 639 <<
"independent validation sample!" <<
Endl;
640 }
else if(validationSample->size() == 0) {
641 Log() << kFATAL <<
"Cannot automate the pruning algorithm with " 642 <<
"independent validation sample of ZERO events!" <<
Endl;
649 Log() << kFATAL <<
"Error pruning tree! Check prune.log for more information." 669 return pruneStrength;
682 for (
UInt_t ievt=0; ievt < validationSample->size(); ievt++) {
698 Log() << kFATAL <<
"TestPrunedTreeQuality: started with undefined ROOT node" <<
Endl;
719 else if ( mode == 1 ) {
724 throw std::string(
"Unknown ValidationQualityMode");
738 if (current == NULL) {
739 Log() << kFATAL <<
"CheckEventWithPrunedTree: started with undefined ROOT node" <<
Endl;
742 while(current != NULL) {
771 for( EventConstList::const_iterator it = validationSample->begin();
772 it != validationSample->end(); ++it ) {
773 sumWeights += (*it)->GetWeight();
786 Log() << kFATAL <<
"CountLeafNodes: started with undefined ROOT node" <<
Endl;
815 Log() << kFATAL <<
"DescendTree: started with undefined ROOT node" <<
Endl;
824 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
828 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
868 if(node == NULL)
return;
871 node->
SetAlpha( std::numeric_limits<double>::infinity( ) );
885 for (
UInt_t i =0; i < depth; i++) {
907 while (nSelectedVars < useNvars) {
912 if (useVariable[ivar] ==
kTRUE) {
913 mapVariable[nSelectedVars] = ivar;
918 if (nSelectedVars != useNvars) { std::cout <<
"Bug in TrainNode - GetRandisedVariables()... sorry" << std::endl; std::exit(1);}
933 Double_t separationGainTotal = -1, sepTmp;
938 separationGain[ivar]=-1;
944 Int_t nTotS_unWeighted, nTotB_unWeighted;
945 UInt_t nevents = eventSample.size();
953 std::vector<Double_t> fisherCoeff;
961 useVariable[ivar] =
kTRUE;
962 mapVariable[ivar] = ivar;
976 useVarInFisher[ivar] =
kFALSE;
977 mapVarInFisher[ivar] = ivar;
980 std::vector<TMatrixDSym*>* covMatrices;
983 Log() << kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
995 useVarInFisher[ivar] =
kTRUE;
996 useVarInFisher[jvar] =
kTRUE;
1007 if (useVarInFisher[ivar] && useVariable[ivar]) {
1008 mapVarInFisher[nFisherVars++]=ivar;
1019 delete [] useVarInFisher;
1020 delete [] mapVarInFisher;
1040 for (
UInt_t ivar=0; ivar<cNvars; ivar++) {
1048 nSelS[ivar] =
new Double_t [nBins[ivar]];
1049 nSelB[ivar] =
new Double_t [nBins[ivar]];
1050 nSelS_unWeighted[ivar] =
new Double_t [nBins[ivar]];
1051 nSelB_unWeighted[ivar] =
new Double_t [nBins[ivar]];
1052 target[ivar] =
new Double_t [nBins[ivar]];
1053 target2[ivar] =
new Double_t [nBins[ivar]];
1054 cutValues[ivar] =
new Double_t [nBins[ivar]];
1061 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1068 useVariable[ivar]=
kFALSE;
1076 for (
UInt_t iev=0; iev<nevents; iev++) {
1080 result += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1081 if (result > xmax[ivar]) xmax[ivar]=result;
1082 if (result < xmin[ivar]) xmin[ivar]=result;
1085 for (
UInt_t ibin=0; ibin<nBins[ivar]; ibin++) {
1086 nSelS[ivar][ibin]=0;
1087 nSelB[ivar][ibin]=0;
1088 nSelS_unWeighted[ivar][ibin]=0;
1089 nSelB_unWeighted[ivar][ibin]=0;
1090 target[ivar][ibin]=0;
1091 target2[ivar][ibin]=0;
1092 cutValues[ivar][ibin]=0;
1097 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1099 if ( useVariable[ivar] ) {
1112 binWidth[ivar] = ( xmax[ivar] - xmin[ivar] ) /
Double_t(nBins[ivar]);
1113 invBinWidth[ivar] = 1./binWidth[ivar];
1123 for (
UInt_t icut=0; icut<nBins[ivar]-1; icut++) {
1124 cutValues[ivar][icut]=xmin[ivar]+(
Double_t(icut+1))*binWidth[ivar];
1131 nTotS_unWeighted=0; nTotB_unWeighted=0;
1132 for (
UInt_t iev=0; iev<nevents; iev++) {
1134 Double_t eventWeight = eventSample[iev]->GetWeight();
1137 nTotS_unWeighted++; }
1144 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1147 if ( useVariable[ivar] ) {
1149 if (ivar <
fNvars) eventData = eventSample[iev]->GetValueFast(ivar);
1151 eventData = fisherCoeff[
fNvars];
1153 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1159 nSelS[ivar][iBin]+=eventWeight;
1160 nSelS_unWeighted[ivar][iBin]++;
1163 nSelB[ivar][iBin]+=eventWeight;
1164 nSelB_unWeighted[ivar][iBin]++;
1167 target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
1168 target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
1174 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1175 if (useVariable[ivar]) {
1176 for (
UInt_t ibin=1; ibin < nBins[ivar]; ibin++) {
1177 nSelS[ivar][ibin]+=nSelS[ivar][ibin-1];
1178 nSelS_unWeighted[ivar][ibin]+=nSelS_unWeighted[ivar][ibin-1];
1179 nSelB[ivar][ibin]+=nSelB[ivar][ibin-1];
1180 nSelB_unWeighted[ivar][ibin]+=nSelB_unWeighted[ivar][ibin-1];
1182 target[ivar][ibin] +=target[ivar][ibin-1] ;
1183 target2[ivar][ibin]+=target2[ivar][ibin-1];
1186 if (nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1] != eventSample.size()) {
1187 Log() << kFATAL <<
"Helge, you have a bug ....nSelS_unw..+nSelB_unw..= " 1188 << nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1]
1189 <<
" while eventsample size = " << eventSample.size()
1192 double lastBins=nSelS[ivar][nBins[ivar]-1] +nSelB[ivar][nBins[ivar]-1];
1193 double totalSum=nTotS+nTotB;
1194 if (
TMath::Abs(lastBins-totalSum)/totalSum>0.01) {
1195 Log() << kFATAL <<
"Helge, you have another bug ....nSelS+nSelB= " 1197 <<
" while total number of events = " << totalSum
1204 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1205 if (useVariable[ivar]) {
1206 for (
UInt_t iBin=0; iBin<nBins[ivar]-1; iBin++) {
1218 Double_t sl = nSelS_unWeighted[ivar][iBin];
1219 Double_t bl = nSelB_unWeighted[ivar][iBin];
1237 target[ivar][iBin],target2[ivar][iBin],
1239 target[ivar][nBins[ivar]-1],target2[ivar][nBins[ivar]-1]);
1243 if (separationGain[ivar] < sepTmp) {
1244 separationGain[ivar] = sepTmp;
1245 cutIndex[ivar] = iBin;
1254 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1255 if (useVariable[ivar] ) {
1256 if (separationGainTotal < separationGain[ivar]) {
1257 separationGainTotal = separationGain[ivar];
1266 node->
SetResponse(target[0][nBins[mxVar]-1]/(nTotS+nTotB));
1267 if (
almost_equal_double(target2[0][nBins[mxVar]-1]/(nTotS+nTotB), target[0][nBins[mxVar]-1]/(nTotS+nTotB)*target[0][nBins[mxVar]-1]/(nTotS+nTotB))) {
1270 node->
SetRMS(
TMath::Sqrt(target2[0][nBins[mxVar]-1]/(nTotS+nTotB) - target[0][nBins[mxVar]-1]/(nTotS+nTotB)*target[0][nBins[mxVar]-1]/(nTotS+nTotB)));
1276 if (nSelS[mxVar][cutIndex[mxVar]]/nTotS > nSelB[mxVar][cutIndex[mxVar]]/nTotB) cutType=
kTRUE;
1281 node->
SetCutValue(cutValues[mxVar][cutIndex[mxVar]]);
1286 fVariableImportance[mxVar] += separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
1297 fVariableImportance[ivar] += fisherCoeff[ivar]*fisherCoeff[ivar]*separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
1303 separationGainTotal = 0;
1320 for (
UInt_t i=0; i<cNvars; i++) {
1323 delete [] nSelS_unWeighted[i];
1324 delete [] nSelB_unWeighted[i];
1325 delete [] target[i];
1326 delete [] target2[i];
1327 delete [] cutValues[i];
1331 delete [] nSelS_unWeighted;
1332 delete [] nSelB_unWeighted;
1335 delete [] cutValues;
1340 delete [] useVariable;
1341 delete [] mapVariable;
1343 delete [] separationGain;
1348 delete [] invBinWidth;
1350 return separationGainTotal;
1358 std::vector<Double_t> fisherCoeff(
fNvars+1);
1381 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) { sumS[ivar] = sumB[ivar] = 0; }
1383 UInt_t nevents = eventSample.size();
1385 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
1388 const Event * ev = eventSample[ievt];
1393 else sumOfWeightsB += weight;
1396 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
1397 sum[ivar] += ev->
GetValueFast( mapVarInFisher[ivar] )*weight;
1400 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
1401 (*meanMatx)( ivar, 2 ) = sumS[ivar];
1402 (*meanMatx)( ivar, 0 ) = sumS[ivar]/sumOfWeightsS;
1404 (*meanMatx)( ivar, 2 ) += sumB[ivar];
1405 (*meanMatx)( ivar, 1 ) = sumB[ivar]/sumOfWeightsB;
1408 (*meanMatx)( ivar, 2 ) /= (sumOfWeightsS + sumOfWeightsB);
1420 assert( sumOfWeightsS > 0 && sumOfWeightsB > 0 );
1424 const Int_t nFisherVars2 = nFisherVars*nFisherVars;
1428 memset(sum2Sig,0,nFisherVars2*
sizeof(
Double_t));
1429 memset(sum2Bgd,0,nFisherVars2*
sizeof(
Double_t));
1432 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
1436 const Event* ev = eventSample.at(ievt);
1446 if ( ev->
GetClass() ==
fSigClass ) sum2Sig[k] += ( (xval[
x] - (*meanMatx)(
x, 0))*(xval[
y] - (*meanMatx)(
y, 0)) )*weight;
1447 else sum2Bgd[k] += ( (xval[
x] - (*meanMatx)(
x, 1))*(xval[
y] - (*meanMatx)(
y, 1)) )*weight;
1455 (*with)(
x,
y) = sum2Sig[k]/sumOfWeightsS + sum2Bgd[k]/sumOfWeightsB;
1475 prodSig = ( ((*meanMatx)(
x, 0) - (*meanMatx)(
x, 2))*
1476 ((*meanMatx)(
y, 0) - (*meanMatx)(
y, 2)) );
1477 prodBgd = ( ((*meanMatx)(
x, 1) - (*meanMatx)(
x, 2))*
1478 ((*meanMatx)(
y, 1) - (*meanMatx)(
y, 2)) );
1480 (*betw)(
x,
y) = (sumOfWeightsS*prodSig + sumOfWeightsB*prodBgd) / (sumOfWeightsS + sumOfWeightsB);
1489 (*cov)(
x,
y) = (*with)(
x,
y) + (*betw)(
x,
y);
1504 Log() << kWARNING <<
"FisherCoeff matrix is almost singular with determinant=" 1506 <<
" did you use the variables that are linear combinations or highly correlated?" 1510 Log() << kFATAL <<
"FisherCoeff matrix is singular with determinant=" 1512 <<
" did you use the variables that are linear combinations?" 1519 Double_t xfact =
TMath::Sqrt( sumOfWeightsS*sumOfWeightsB ) / (sumOfWeightsS + sumOfWeightsB);
1522 std::vector<Double_t> diffMeans( nFisherVars );
1524 for (
UInt_t ivar=0; ivar<=
fNvars; ivar++) fisherCoeff[ivar] = 0;
1525 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
1526 for (
UInt_t jvar=0; jvar<nFisherVars; jvar++) {
1527 Double_t d = (*meanMatx)(jvar, 0) - (*meanMatx)(jvar, 1);
1528 fisherCoeff[mapVarInFisher[ivar]] += invCov(ivar, jvar)*d;
1532 fisherCoeff[mapVarInFisher[ivar]] *= xfact;
1537 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++){
1538 f0 += fisherCoeff[mapVarInFisher[ivar]]*((*meanMatx)(ivar, 0) + (*meanMatx)(ivar, 1));
1542 fisherCoeff[
fNvars] = f0;
1555 Int_t nTotS_unWeighted = 0, nTotB_unWeighted = 0;
1557 std::vector<TMVA::BDTEventWrapper> bdtEventSample;
1560 std::vector<Double_t> lCutValue(
fNvars, 0.0 );
1561 std::vector<Double_t> lSepGain(
fNvars, -1.0e6 );
1562 std::vector<Char_t> lCutType(
fNvars );
1567 for( std::vector<const TMVA::Event*>::const_iterator it = eventSample.begin(); it != eventSample.end(); ++it ) {
1569 nTotS += (*it)->GetWeight();
1573 nTotB += (*it)->GetWeight();
1579 std::vector<Char_t> useVariable(
fNvars);
1588 Int_t nSelectedVars = 0;
1594 if(useVariable[ivar] ==
Char_t(
kTRUE)) nSelectedVars++;
1603 if(!useVariable[ivar])
continue;
1605 std::sort( bdtEventSample.begin(),bdtEventSample.end() );
1607 Double_t bkgWeightCtr = 0.0, sigWeightCtr = 0.0;
1608 std::vector<TMVA::BDTEventWrapper>::iterator it = bdtEventSample.begin(), it_end = bdtEventSample.end();
1609 for( ; it != it_end; ++it ) {
1611 sigWeightCtr += (**it)->GetWeight();
1613 bkgWeightCtr += (**it)->GetWeight();
1615 it->SetCumulativeWeight(
false,bkgWeightCtr);
1616 it->SetCumulativeWeight(
true,sigWeightCtr);
1622 Double_t separationGain = -1.0, sepTmp = 0.0, cutValue = 0.0, dVal = 0.0, norm = 0.0;
1624 for( it = bdtEventSample.begin(); it != it_end; ++it ) {
1625 if( index == 0 ) { ++index;
continue; }
1626 if( *(*it) == NULL ) {
1627 Log() << kFATAL <<
"In TrainNodeFull(): have a null event! Where index=" 1628 << index <<
", and parent node=" << node->
GetParent() <<
Endl;
1631 dVal = bdtEventSample[index].GetVal() - bdtEventSample[index-1].GetVal();
1632 norm =
TMath::Abs(bdtEventSample[index].GetVal() + bdtEventSample[index-1].GetVal());
1635 if( index >=
fMinSize && (nTotS_unWeighted + nTotB_unWeighted) - index >=
fMinSize &&
TMath::Abs(dVal/(0.5*norm + 1)) > fPMin ) {
1636 sepTmp =
fSepType->
GetSeparationGain( it->GetCumulativeWeight(
true), it->GetCumulativeWeight(
false), sigWeightCtr, bkgWeightCtr );
1637 if( sepTmp > separationGain ) {
1638 separationGain = sepTmp;
1639 cutValue = it->GetVal() - 0.5*dVal;
1640 Double_t nSelS = it->GetCumulativeWeight(
true);
1641 Double_t nSelB = it->GetCumulativeWeight(
false);
1644 if( nSelS/sigWeightCtr > nSelB/bkgWeightCtr ) cutType =
kTRUE;
1650 lCutType[ivar] =
Char_t(cutType);
1651 lCutValue[ivar] = cutValue;
1652 lSepGain[ivar] = separationGain;
1656 Int_t iVarIndex = -1;
1658 if( lSepGain[ivar] > separationGain ) {
1660 separationGain = lSepGain[ivar];
1664 if(iVarIndex >= 0) {
1669 fVariableImportance[iVarIndex] += separationGain*separationGain * (nTotS+nTotB) * (nTotS+nTotB);
1672 separationGain = 0.0;
1675 return separationGain;
1703 Log() << kFATAL <<
"CheckEvent: started with undefined ROOT node" <<
Endl;
1712 Log() << kFATAL <<
"DT::CheckEvent: inconsistent tree structure" <<
Endl;
1733 Double_t sumsig=0, sumbkg=0, sumtot=0;
1734 for (
UInt_t ievt=0; ievt<eventSample.size(); ievt++) {
1735 if (eventSample[ievt]->
GetClass() !=
fSigClass) sumbkg+=eventSample[ievt]->GetWeight();
1736 else sumsig+=eventSample[ievt]->GetWeight();
1737 sumtot+=eventSample[ievt]->GetWeight();
1740 if (sumtot!= (sumsig+sumbkg)){
1741 Log() << kFATAL <<
"<SamplePurity> sumtot != sumsig+sumbkg" 1742 << sumtot <<
" " << sumsig <<
" " << sumbkg <<
Endl;
1744 if (sumtot>0)
return sumsig/(sumsig + sumbkg);
1756 std::vector<Double_t> relativeImportance(
fNvars);
1765 relativeImportance[i] /=
sum;
1767 relativeImportance[i] = 0;
1769 return relativeImportance;
1778 if (ivar <
fNvars)
return relativeImportance[ivar];
1780 Log() << kFATAL <<
"<GetVariableImportance>" <<
Endl 1781 <<
"--- ivar = " << ivar <<
" is out of range " <<
Endl;
Float_t GetValueFast(UInt_t ivar) const
void SetNTerminal(Int_t n)
Double_t PruneStrength
quality measure for a pruned subtree T of T_max
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
DataSetInfo * fDataSetInfo
static long int sum(long int i)
Random number generator class based on M.
void SetSelector(Short_t i)
MsgLogger & Endl(MsgLogger &ml)
void SetFisherCoeff(Int_t ivar, Double_t coeff)
set fisher coefficients
Singleton class for Global types used by TMVA.
virtual Double_t Rndm()
Machine independent random number generator.
Float_t GetSumTarget() const
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
Int_t fNNodesBeforePruning
Double_t GetNodePurityLimit() const
EPruneMethod fPruneMethod
Calculate the "SeparationGain" for Regression analysis separation criteria used in various training a...
virtual DecisionTreeNode * GetParent() const
void IncrementNEvents_unweighted()
Float_t GetSumTarget2() const
void IncrementNEvents(Float_t nev)
Float_t GetSampleMax(UInt_t ivar) const
return the maximum of variable ivar from the training sample that pass/end up in this node ...
Short_t Min(Short_t a, Short_t b)
std::vector< Double_t > GetFisherCoefficients(const EventConstList &eventSample, UInt_t nFisherVars, UInt_t *mapVarInFisher)
calculate the fisher coefficients for the event sample and the variables used
std::vector< DecisionTreeNode * > PruneSequence
the regularization parameter for pruning
Bool_t IsTerminal() const
virtual void SetParentTree(TMVA::BinaryTree *t)
Double_t fNodePurityLimit
virtual void SetRight(Node *r)
virtual ~DecisionTree(void)
destructor
Double_t TestPrunedTreeQuality(const DecisionTreeNode *dt=NULL, Int_t mode=0) const
return the misclassification rate of a pruned tree a "pruned tree" may have set the variable "IsTermi...
Float_t GetNSigEvents(void) const
virtual Double_t Determinant() const
Return the matrix determinant.
virtual DecisionTreeNode * GetRoot() const
void CheckEventWithPrunedTree(const TMVA::Event *) const
pass a single validation event through a pruned decision tree on the way down the tree...
void DeleteNode(Node *)
protected, recursive, function used by the class destructor and when Pruning
void SetNSigEvents_unweighted(Float_t s)
virtual Double_t GetSeparationIndex(const Double_t n, const Double_t target, const Double_t target2)
Separation Index: a simple Variance.
void SetResponse(Float_t r)
void SetNBValidation(Double_t b)
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
void SetNFisherCoeff(Int_t nvars)
std::vector< const TMVA::Event * > EventConstList
Base class for BinarySearch and Decision Trees.
static const Int_t fgRandomSeed
Float_t GetNBkgEvents(void) const
void FillTree(const EventList &eventSample)
fill the existing the decision tree structure by filling event in from the top node and see where the...
Float_t GetSampleMin(UInt_t ivar) const
return the minimum of variable ivar from the training sample that pass/end up in this node ...
Float_t GetCutValue(void) const
void IncrementNBkgEvents(Float_t b)
Double_t SamplePurity(EventList eventSample)
calculates the purity S/(S+B) of a given event sample
Double_t GetNodeR() const
std::vector< Double_t > fVariableImportance
void SetSeparationGain(Float_t sep)
Double_t GetSumWeights(const EventConstList *validationSample) const
calculate the normalization factor for a pruning validation sample
void ResetValidationData()
temporary stored node values (number of events, etc.) that originate not from the training but from t...
void SetNBkgEvents(Float_t b)
void SetNSValidation(Double_t s)
Class that contains all the data information.
Double_t GetSeparationGain(const Double_t nLeft, const Double_t targetLeft, const Double_t target2Left, const Double_t nTot, const Double_t targetTot, const Double_t target2Tot)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
UInt_t CountLeafNodes(TMVA::Node *n=NULL)
return the number of terminal nodes in the sub-tree below Node n
void AddToSumTarget(Float_t t)
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Double_t TrainNodeFast(const EventConstList &eventSample, DecisionTreeNode *node)
Decide how to split a node using one of the variables that gives the best separation of signal/backgr...
void DescendTree(Node *n=NULL)
descend a tree to find all its leaf nodes
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
void FillEvent(const TMVA::Event &event, TMVA::DecisionTreeNode *node)
fill the existing the decision tree structure by filling event in from the top node and see where the...
UInt_t GetNTargets() const
accessor to the number of targets
void SetNEvents(Float_t nev)
TMatrixT< Double_t > TMatrixD
Bool_t DoRegression() const
Double_t fMinLinCorrForFisher
void SetTotalTreeDepth(Int_t depth)
Float_t GetTarget(UInt_t itgt) const
Int_t GetNodeType(void) const
void SetSubTreeR(Double_t r)
virtual void SetLeft(Node *l)
void SetAlpha(Double_t alpha)
UInt_t CleanTree(DecisionTreeNode *node=NULL)
remove those last splits that result in two leaf nodes that are both of the type (i.e.
void SetSampleMin(UInt_t ivar, Float_t xmin)
set the minimum of variable ivar from the training sample that pass/end up in this node ...
void SetCutValue(Float_t c)
void GetRandomisedVariables(Bool_t *useVariable, UInt_t *variableMap, UInt_t &nVars)
Implementation of a Decision Tree.
Double_t TrainNodeFull(const EventConstList &eventSample, DecisionTreeNode *node)
train a node by finding the single optimal cut for a single variable that best separates signal and b...
void SetParentTreeInNodes(Node *n=NULL)
descend a tree to find all its leaf nodes, fill max depth reached in the tree at the same time...
void SetPurity(void)
return the S/(S+B) (purity) for the node REM: even if nodes with purity 0.01 are very PURE background...
bool almost_equal_float(float x, float y, int ulp=4)
void SetCutType(Bool_t t)
void IncrementNSigEvents_unweighted()
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
virtual void ReadXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
read attributes from XML
void PruneNodeInPlace(TMVA::DecisionTreeNode *node)
prune a node temporarily (without actually deleting its descendants which allows testing the pruned t...
TMVA::DecisionTreeNode * GetEventNode(const TMVA::Event &e) const
get the pointer to the leaf node where a particular event ends up in...
void ApplyValidationSample(const EventConstList *validationSample) const
run the validation sample through the (pruned) tree and fill in the nodes the variables NSValidation ...
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
bool almost_equal_double(double x, double y, int ulp=4)
static void SetVarIndex(Int_t iVar)
void AddToSumTarget2(Float_t t2)
Float_t GetPurity(void) const
void SetSampleMax(UInt_t ivar, Float_t xmax)
set the maximum of variable ivar from the training sample that pass/end up in this node ...
virtual Bool_t GoesRight(const Event &) const
test event if it descends the tree at this node to the right
Double_t GetNBValidation() const
Node * GetNode(ULong_t sequence, UInt_t depth)
retrieve node from the tree.
void IncrementNSigEvents(Float_t s)
void SetNodeType(Int_t t)
void ClearTree()
clear the tree nodes (their S/N, Nevents etc), just keep the structure of the tree ...
void SetAlphaMinSubtree(Double_t g)
Types::EAnalysisType fAnalysisType
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML
void SetNEvents_unboosted(Float_t nev)
static constexpr double s
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
UInt_t GetTotalTreeDepth() const
VariableInfo & GetVariableInfo(Int_t i)
void SetNSigEvents_unboosted(Float_t s)
void SetTerminal(Bool_t s=kTRUE)
RegressionVariance * fRegType
void SetNSigEvents(Float_t s)
UInt_t CountNodes(Node *n=NULL)
return the number of nodes in the tree. (make a new count –> takes time)
void SetNBkgEvents_unboosted(Float_t b)
SeparationBase * fSepType
void SetNBkgEvents_unweighted(Float_t b)
void IncrementNBkgEvents_unweighted()
Double_t PruneTree(const EventConstList *validationSample=NULL)
prune (get rid of internal nodes) the Decision tree to avoid overtraining several different pruning m...
Node * GetRightDaughter(Node *n)
get right daughter node current node "n"
Node for the BinarySearch or Decision Trees.
Float_t GetResponse(void) const
Double_t GetNSValidation() const
Short_t Max(Short_t a, Short_t b)
Double_t GetOriginalWeight() const
UInt_t BuildTree(const EventConstList &eventSample, DecisionTreeNode *node=NULL)
building the decision tree by recursively calling the splitting of one (root-) node into two daughter...
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
virtual DecisionTreeNode * GetLeft() const
virtual DecisionTreeNode * GetRight() const
Node * GetLeftDaughter(Node *n)
get left daughter node current node "n"
void ClearNodeAndAllDaughters()
clear the nodes (their S/N, Nevents etc), just keep the structure of the tree
void SetSeparationIndex(Float_t sep)
Short_t GetSelector() const
virtual Int_t Poisson(Double_t mean)
Generates a random integer N according to a Poisson law.
Double_t Sqrt(Double_t x)
DecisionTree(void)
default constructor using the GiniIndex as separation criterion, no restrictions on minium number of ...
Double_t GetPruneStrength() const
static constexpr double sr
void SetNEvents_unweighted(Float_t nev)
void PruneNode(TMVA::DecisionTreeNode *node)
prune away the subtree below the node