99 , fMultiTargetRegression(
kFALSE)
105 , fKernelEstimator(
NULL)
106 , fTargetSelectionStr("Mean")
107 , fTargetSelection(kMean)
108 , fFillFoamWithOrigWeights(
kFALSE)
111 , fDTSeparation(kFoam)
135 , fMultiTargetRegression(
kFALSE)
141 , fKernelEstimator(
NULL)
142 , fTargetSelectionStr(
"Mean")
143 , fTargetSelection(kMean)
144 , fFillFoamWithOrigWeights(
kFALSE)
147 , fDTSeparation(kFoam)
178 fnCells = fnActiveCells*2-1;
184 fFillFoamWithOrigWeights =
kFALSE;
187 fDTSeparation = kFoam;
190 fKernelEstimator=
NULL;
191 fTargetSelection= kMean;
194 fMultiTargetRegression =
kFALSE;
199 SetSignalReferenceCut( 0.0 );
201 SetSignalReferenceCut( 0.5 );
209 DeclareOptionRef( fSigBgSeparated =
kFALSE,
"SigBgSeparate",
"Separate foams for signal and background" );
210 DeclareOptionRef( fFrac = 0.001,
"TailCut",
"Fraction of outlier events that are excluded from the foam in each dimension" );
211 DeclareOptionRef( fVolFrac = 1./15.,
"VolFrac",
"Size of sampling box, used for density calculation during foam build-up (maximum value: 1.0 is equivalent to volume of entire foam)");
212 DeclareOptionRef( fnActiveCells = 500,
"nActiveCells",
"Maximum number of active cells to be created by the foam");
213 DeclareOptionRef( fnSampl = 2000,
"nSampl",
"Number of generated MC events per cell");
214 DeclareOptionRef( fnBin = 5,
"nBin",
"Number of bins in edge histograms");
215 DeclareOptionRef( fCompress =
kTRUE,
"Compress",
"Compress foam output file");
216 DeclareOptionRef( fMultiTargetRegression =
kFALSE,
"MultiTargetRegression",
"Do regression with multiple targets");
217 DeclareOptionRef( fNmin = 100,
"Nmin",
"Number of events in cell required to split cell");
218 DeclareOptionRef( fMaxDepth = 0,
"MaxDepth",
"Maximum depth of cell tree (0=unlimited)");
219 DeclareOptionRef( fFillFoamWithOrigWeights =
kFALSE,
"FillFoamWithOrigWeights",
"Fill foam with original or boost weights");
220 DeclareOptionRef( fUseYesNoCell =
kFALSE,
"UseYesNoCell",
"Return -1 or 1 for bkg or signal like events");
221 DeclareOptionRef( fDTLogic =
"None",
"DTLogic",
"Use decision tree algorithm to split cells");
223 AddPreDefVal(
TString(
"GiniIndex"));
224 AddPreDefVal(
TString(
"MisClassificationError"));
225 AddPreDefVal(
TString(
"CrossEntropy"));
226 AddPreDefVal(
TString(
"GiniIndexWithLaplace"));
227 AddPreDefVal(
TString(
"SdivSqrtSplusB"));
229 DeclareOptionRef( fKernelStr =
"None",
"Kernel",
"Kernel type used");
231 AddPreDefVal(
TString(
"Gauss"));
232 AddPreDefVal(
TString(
"LinNeighbors"));
233 DeclareOptionRef( fTargetSelectionStr =
"Mean",
"TargetSelection",
"Target selection method");
244 DeclareOptionRef(fCutNmin =
kTRUE,
"CutNmin",
"Requirement for minimal number of events in cell");
245 DeclareOptionRef(fPeekMax =
kTRUE,
"PeekMax",
"Peek cell with max. loss for the next split");
253 if (!(fFrac>=0. && fFrac<=1.)) {
254 Log() << kWARNING <<
"TailCut not in [0.,1] ==> using 0.001 instead" <<
Endl;
258 if (fnActiveCells < 1) {
259 Log() << kWARNING <<
"invalid number of active cells specified: "
260 << fnActiveCells <<
"; setting nActiveCells=2" <<
Endl;
263 fnCells = fnActiveCells*2-1;
266 if (fSigBgSeparated && fDTLogic !=
"None") {
267 Log() << kFATAL <<
"Decision tree logic works only for a single foam (SigBgSeparate=F)" <<
Endl;
271 if (fDTLogic ==
"None")
272 fDTSeparation = kFoam;
273 else if (fDTLogic ==
"GiniIndex")
274 fDTSeparation = kGiniIndex;
275 else if (fDTLogic ==
"MisClassificationError")
276 fDTSeparation = kMisClassificationError;
277 else if (fDTLogic ==
"CrossEntropy")
279 else if (fDTLogic ==
"GiniIndexWithLaplace")
280 fDTSeparation = kGiniIndexWithLaplace;
281 else if (fDTLogic ==
"SdivSqrtSplusB")
282 fDTSeparation = kSdivSqrtSplusB;
284 Log() << kWARNING <<
"Unknown separation type: " << fDTLogic
285 <<
", setting to None" <<
Endl;
287 fDTSeparation = kFoam;
290 if (fKernelStr ==
"None" ) fKernel =
kNone;
291 else if (fKernelStr ==
"Gauss" ) fKernel = kGaus;
292 else if (fKernelStr ==
"LinNeighbors") fKernel = kLinN;
294 if (fTargetSelectionStr ==
"Mean" ) fTargetSelection = kMean;
295 else fTargetSelection = kMpv;
298 if (DoRegression() && Data()->GetNTargets() > 1 && !fMultiTargetRegression) {
299 Log() << kWARNING <<
"Warning: number of targets > 1"
300 <<
" and MultiTargetRegression=F was set, this makes no sense!"
301 <<
" --> I'm setting MultiTargetRegression=T" <<
Endl;
302 fMultiTargetRegression =
kTRUE;
313 if (fKernelEstimator !=
NULL)
314 delete fKernelEstimator;
328 if (fMultiTargetRegression)
340 Log() << kDEBUG <<
"Number of training events: " << Data()->GetNTrainingEvents() <<
Endl;
346 for (
Long64_t i=0; i<(GetNEvents()); i++) {
350 if (fMultiTargetRegression) {
352 val =
ev->GetValue(dim);
354 val =
ev->GetTarget(dim-
vDim);
357 val =
ev->GetValue(dim);
375 for (
Long64_t i=0; i<GetNEvents(); i++) {
378 if (fMultiTargetRegression) {
410 fXmin.push_back(
xmin[dim]);
411 fXmax.push_back(
xmax[dim]);
431 Log() << kVERBOSE <<
"Calculate Xmin and Xmax for every dimension" <<
Endl;
438 if (DoRegression()) {
439 if (fMultiTargetRegression)
440 TrainMultiTargetRegression();
442 TrainMonoTargetRegression();
446 TrainMultiClassification();
448 if (DataInfo().GetNormalization() !=
"EQUALNUMEVENTS" ) {
449 Log() << kHEADER <<
"NormMode=" << DataInfo().GetNormalization()
450 <<
" chosen. Note that only NormMode=EqualNumEvents"
451 <<
" ensures that Discriminant values correspond to"
452 <<
" signal probabilities." <<
Endl;
455 Log() << kDEBUG <<
"N_sig for training events: " << Data()->GetNEvtSigTrain() <<
Endl;
456 Log() << kDEBUG <<
"N_bg for training events: " << Data()->GetNEvtBkgdTrain() <<
Endl;
457 Log() << kDEBUG <<
"User normalization: " << DataInfo().GetNormalization().Data() <<
Endl;
460 TrainSeparatedClassification();
462 TrainUnifiedClassification();
467 for(
UInt_t i=0; i<fFoam.size(); i++) {
469 fFoam.at(i)->DeleteBinarySearchTree();
486 for(
int i=0; i<2; i++) {
488 fFoam.push_back( InitFoam(
foamcaption[i], kSeparate) );
490 Log() << kVERBOSE <<
"Filling binary search tree of " <<
foamcaption[i]
491 <<
" with events" <<
Endl;
493 for (
Long64_t k=0; k<GetNEvents(); ++k) {
495 if ((i==0 && DataInfo().IsSignal(
ev)) || (i==1 && !DataInfo().IsSignal(
ev)))
496 if (!(IgnoreEventsWithNegWeightsInTraining() &&
ev->GetWeight()<=0))
497 fFoam.back()->FillBinarySearchTree(
ev);
501 fFoam.back()->Create();
503 Log() << kVERBOSE <<
"Filling foam cells with events" <<
Endl;
505 for (
Long64_t k=0; k<GetNEvents(); ++k) {
507 Float_t weight = fFillFoamWithOrigWeights ?
ev->GetOriginalWeight() :
ev->GetWeight();
508 if ((i==0 && DataInfo().IsSignal(
ev)) || (i==1 && !DataInfo().IsSignal(
ev)))
509 if (!(IgnoreEventsWithNegWeightsInTraining() &&
ev->GetWeight()<=0))
510 fFoam.back()->FillFoamCells(
ev, weight);
521 fFoam.push_back( InitFoam(
"DiscrFoam", kDiscr, fSignalClass) );
523 Log() << kVERBOSE <<
"Filling binary search tree of discriminator foam with events" <<
Endl;
525 for (
Long64_t k=0; k<GetNEvents(); ++k) {
527 if (!(IgnoreEventsWithNegWeightsInTraining() &&
ev->GetWeight()<=0))
528 fFoam.back()->FillBinarySearchTree(
ev);
531 Log() << kINFO <<
"Build up discriminator foam" <<
Endl;
532 fFoam.back()->Create();
534 Log() << kVERBOSE <<
"Filling foam cells with events" <<
Endl;
536 for (
Long64_t k=0; k<GetNEvents(); ++k) {
538 Float_t weight = fFillFoamWithOrigWeights ?
ev->GetOriginalWeight() :
ev->GetWeight();
539 if (!(IgnoreEventsWithNegWeightsInTraining() &&
ev->GetWeight()<=0))
540 fFoam.back()->FillFoamCells(
ev, weight);
543 Log() << kVERBOSE <<
"Calculate cell discriminator"<<
Endl;
545 fFoam.back()->Finalize();
561 Log() << kVERBOSE <<
"Filling binary search tree of multiclass foam "
564 for (
Long64_t k=0; k<GetNEvents(); ++k) {
566 if (!(IgnoreEventsWithNegWeightsInTraining() &&
ev->GetWeight()<=0))
567 fFoam.back()->FillBinarySearchTree(
ev);
570 Log() << kINFO <<
"Build up multiclass foam " <<
iClass <<
Endl;
571 fFoam.back()->Create();
573 Log() << kVERBOSE <<
"Filling foam cells with events" <<
Endl;
576 for (
Long64_t k=0; k<GetNEvents(); ++k) {
578 Float_t weight = fFillFoamWithOrigWeights ?
ev->GetOriginalWeight() :
ev->GetWeight();
579 if (!(IgnoreEventsWithNegWeightsInTraining() &&
ev->GetWeight()<=0))
580 fFoam.back()->FillFoamCells(
ev, weight);
583 Log() << kVERBOSE <<
"Calculate cell discriminator"<<
Endl;
585 fFoam.back()->Finalize();
596 if (Data()->GetNTargets() != 1) {
597 Log() << kFATAL <<
"Can't do mono-target regression with "
598 << Data()->GetNTargets() <<
" targets!" <<
Endl;
601 Log() << kDEBUG <<
"MethodPDEFoam: number of Targets: " << Data()->GetNTargets() <<
Endl;
603 fFoam.push_back( InitFoam(
"MonoTargetRegressionFoam", kMonoTarget) );
605 Log() << kVERBOSE <<
"Filling binary search tree with events" <<
Endl;
607 for (
Long64_t k=0; k<GetNEvents(); ++k) {
609 if (!(IgnoreEventsWithNegWeightsInTraining() &&
ev->GetWeight()<=0))
610 fFoam.back()->FillBinarySearchTree(
ev);
613 Log() << kINFO <<
"Build mono target regression foam" <<
Endl;
614 fFoam.back()->Create();
616 Log() << kVERBOSE <<
"Filling foam cells with events" <<
Endl;
618 for (
Long64_t k=0; k<GetNEvents(); ++k) {
620 Float_t weight = fFillFoamWithOrigWeights ?
ev->GetOriginalWeight() :
ev->GetWeight();
621 if (!(IgnoreEventsWithNegWeightsInTraining() &&
ev->GetWeight()<=0))
622 fFoam.back()->FillFoamCells(
ev, weight);
625 Log() << kVERBOSE <<
"Calculate average cell targets"<<
Endl;
627 fFoam.back()->Finalize();
637 Log() << kDEBUG <<
"Number of variables: " << Data()->GetNVariables() <<
Endl;
638 Log() << kDEBUG <<
"Number of Targets: " << Data()->GetNTargets() <<
Endl;
639 Log() << kDEBUG <<
"Dimension of foam: " << Data()->GetNVariables()+Data()->GetNTargets() <<
Endl;
641 Log() << kFATAL <<
"LinNeighbors kernel currently not supported"
642 <<
" for multi target regression" <<
Endl;
644 fFoam.push_back( InitFoam(
"MultiTargetRegressionFoam", kMultiTarget) );
646 Log() << kVERBOSE <<
"Filling binary search tree of multi target regression foam with events"
649 for (
Long64_t k=0; k<GetNEvents(); ++k) {
653 std::vector<Float_t>
targets(
ev->GetTargets());
657 ev->GetTargets().clear();
658 if (!(IgnoreEventsWithNegWeightsInTraining() &&
ev->GetWeight()<=0))
659 fFoam.back()->FillBinarySearchTree(
ev);
665 Log() << kINFO <<
"Build multi target regression foam" <<
Endl;
666 fFoam.back()->Create();
668 Log() << kVERBOSE <<
"Filling foam cells with events" <<
Endl;
670 for (
Long64_t k=0; k<GetNEvents(); ++k) {
674 std::vector<Float_t>
targets =
ev->GetTargets();
676 Float_t weight = fFillFoamWithOrigWeights ?
ev->GetOriginalWeight() :
ev->GetWeight();
679 ev->GetTargets().clear();
680 if (!(IgnoreEventsWithNegWeightsInTraining() &&
ev->GetWeight()<=0))
681 fFoam.back()->FillFoamCells(
ev, weight);
713 if (fSigBgSeparated) {
714 std::vector<Float_t> xvec =
ev->GetValues();
718 density_sig = fFoam.at(0)->GetCellValue(xvec, kValueDensity, fKernelEstimator);
719 density_bg = fFoam.at(1)->GetCellValue(xvec, kValueDensity, fKernelEstimator);
729 discr = fFoam.at(0)->GetCellValue(
ev->GetValues(), kValue, fKernelEstimator);
740 return (
discr < 0.5 ? -1 : 1);
759 if (fSigBgSeparated) {
760 const std::vector<Float_t>& xvec =
ev->GetValues();
762 const Double_t neventsB = fFoam.at(1)->GetCellValue(xvec, kValue, fKernelEstimator);
763 const Double_t neventsS = fFoam.at(0)->GetCellValue(xvec, kValue, fKernelEstimator);
778 mvaError = fFoam.at(0)->GetCellValue(
ev->GetValues(), kValueError, fKernelEstimator);
791 std::vector<Float_t> xvec =
ev->GetValues();
793 if (fMulticlassReturnVal ==
NULL)
794 fMulticlassReturnVal =
new std::vector<Float_t>();
795 fMulticlassReturnVal->clear();
796 fMulticlassReturnVal->reserve(DataInfo().GetNClasses());
798 std::vector<Float_t> temp;
802 temp.push_back(fFoam.at(
iClass)->GetCellValue(xvec, kValue, fKernelEstimator));
811 fMulticlassReturnVal->push_back(1.0 / (1.0 +
norm));
814 return *fMulticlassReturnVal;
825 fRanking =
new Ranking(GetName(),
"Variable Importance");
826 std::vector<Float_t>
importance(GetNvar(), 0);
832 std::vector<UInt_t>
nCuts(fFoam.at(
ifoam)->GetTotDim(), 0);
894 Log() << kFATAL <<
"Null pointer given!" <<
Endl;
899 if (fMultiTargetRegression)
903 Log()<< kDEBUG <<
"foam: SetXmin[dim="<<
idim<<
"]: " << fXmin.at(
idim) <<
Endl;
904 Log()<< kDEBUG <<
"foam: SetXmax[dim="<<
idim<<
"]: " << fXmax.at(
idim) <<
Endl;
937 if (
ft == kMultiTarget)
939 dim = Data()->GetNTargets() + Data()->GetNVariables();
944 std::vector<Double_t>
box;
946 box.push_back((fXmax.at(
idim) - fXmin.at(
idim))* fVolFrac);
952 if (fDTSeparation == kFoam) {
973 Log() << kFATAL <<
"Unknown PDEFoam type!" <<
Endl;
982 switch (fDTSeparation) {
986 case kMisClassificationError:
992 case kGiniIndexWithLaplace:
995 case kSdivSqrtSplusB:
999 Log() << kFATAL <<
"Separation type " << fDTSeparation
1000 <<
" currently not supported" <<
Endl;
1010 Log() << kFATAL <<
"Decision tree cell split algorithm is only"
1011 <<
" available for (multi) classification with a single"
1012 <<
" PDE-Foam (SigBgSeparate=F)" <<
Endl;
1018 else Log() << kFATAL <<
"PDEFoam pointer not set, exiting.." <<
Endl;
1021 fKernelEstimator = CreatePDEFoamKernel();
1024 pdefoam->Log().SetMinType(this->Log().GetMinType());
1031 pdefoam->SetEvPerBin( fEvPerBin);
1035 pdefoam->SetMaxDepth(fMaxDepth);
1051 if (fRegressionReturnVal == 0) fRegressionReturnVal =
new std::vector<Float_t>();
1052 fRegressionReturnVal->clear();
1053 fRegressionReturnVal->reserve(Data()->GetNTargets());
1056 std::vector<Float_t> vals =
ev->GetValues();
1059 Log() << kWARNING <<
"<GetRegressionValues> value vector is empty. " <<
Endl;
1062 if (fMultiTargetRegression) {
1064 std::map<Int_t, Float_t> xvec;
1065 for (
UInt_t i=0; i<vals.size(); ++i)
1066 xvec.insert(std::pair<Int_t, Float_t>(i, vals.at(i)));
1068 std::vector<Float_t>
targets = fFoam.at(0)->GetCellValue( xvec, kValue );
1071 if (
targets.size() != Data()->GetNTargets())
1072 Log() << kFATAL <<
"Something wrong with multi-target regression foam: "
1073 <<
"number of targets does not match the DataSet()" <<
Endl;
1075 fRegressionReturnVal->push_back(
targets.at(i));
1078 fRegressionReturnVal->push_back(fFoam.at(0)->GetCellValue(vals, kValue, fKernelEstimator));
1084 evT->SetTarget(
itgt, fRegressionReturnVal->at(
itgt) );
1086 const Event*
evT2 = GetTransformationHandler().InverseTransform(
evT );
1087 fRegressionReturnVal->clear();
1089 fRegressionReturnVal->push_back(
evT2->GetTarget(
itgt) );
1094 return (*fRegressionReturnVal);
1111 Log() << kFATAL <<
"Kernel: " << fKernel <<
" not supported!" <<
Endl;
1122 for (
UInt_t i=0; i<fFoam.size(); i++)
1123 if (fFoam.at(i))
delete fFoam.at(i);
1137 if (fKernelEstimator !=
NULL) {
1138 delete fKernelEstimator;
1139 fKernelEstimator =
NULL;
1169 gTools().
AddAttr(
wght,
"TargetSelection", TargetSelectionToUInt(fTargetSelection) );
1175 for (
UInt_t i=0; i<fXmin.size(); i++){
1181 for (
UInt_t i=0; i<fXmax.size(); i++){
1197 FillVariableNamesToFoam();
1202 rfname.ReplaceAll(
TString(
".") +
gConfig().GetIONames().fWeightFileExtension +
".txt",
".xml" );
1205 rfname.ReplaceAll(
".xml",
"_foams.root" );
1212 for (
UInt_t i=0; i<fFoam.size(); ++i) {
1213 Log() <<
"writing foam " << fFoam.at(i)->GetFoamName().Data()
1214 <<
" to file" <<
Endl;
1215 fFoam.at(i)->
Write(fFoam.at(i)->GetFoamName().Data());
1219 Log() << kINFO <<
"Foams written to file: "
1228 istr >> fSigBgSeparated;
1230 istr >> fDiscrErrCut;
1251 fKernel = UIntToKernel(
ker);
1255 fTargetSelection = UIntToTargetSelection(
ts);
1257 istr >> fFillFoamWithOrigWeights;
1258 istr >> fUseYesNoCell;
1264 if (fMultiTargetRegression)
1265 kDim += Data()->GetNTargets();
1266 fXmin.assign(
kDim, 0);
1267 fXmax.assign(
kDim, 0);
1271 istr >> fXmin.at(i);
1273 istr >> fXmax.at(i);
1276 ReadFoamsFromFile();
1304 fKernel = UIntToKernel(
ker);
1307 fTargetSelection = UIntToTargetSelection(
ts);
1317 if (fMultiTargetRegression)
1318 kDim += Data()->GetNTargets();
1319 fXmin.assign(
kDim, 0);
1320 fXmax.assign(
kDim, 0);
1324 for (
UInt_t counter=0; counter<
kDim; counter++) {
1328 Log() << kFATAL <<
"dimension index out of range:" << i <<
Endl;
1334 for (
UInt_t counter=0; counter<
kDim; counter++) {
1338 Log() << kFATAL <<
"dimension index out of range:" << i <<
Endl;
1347 ReadFoamsFromFile();
1350 if (fKernelEstimator !=
NULL)
1351 delete fKernelEstimator;
1352 fKernelEstimator = CreatePDEFoamKernel();
1376 Log() << kWARNING <<
"<ReadClonedFoamFromFile>: NULL pointer given" <<
Endl;
1388 Log() << kWARNING <<
"<ReadClonedFoamFromFile>: " <<
foamname
1389 <<
" could not be cloned!" <<
Endl;
1404 rfname.ReplaceAll(
TString(
".") +
gConfig().GetIONames().fWeightFileExtension +
".txt",
".xml" );
1407 rfname.ReplaceAll(
".xml",
"_foams.root" );
1409 Log() << kINFO <<
"Read foams from file: " <<
gTools().
Color(
"lightblue")
1412 if (
rootFile->IsZombie()) Log() << kFATAL <<
"Cannot open file \"" <<
rfname <<
"\"" <<
Endl;
1415 if (DoRegression()) {
1416 if (fMultiTargetRegression)
1417 fFoam.push_back(ReadClonedFoamFromFile(
rootFile,
"MultiTargetRegressionFoam"));
1419 fFoam.push_back(ReadClonedFoamFromFile(
rootFile,
"MonoTargetRegressionFoam"));
1421 if (fSigBgSeparated) {
1422 fFoam.push_back(ReadClonedFoamFromFile(
rootFile,
"SignalFoam"));
1423 fFoam.push_back(ReadClonedFoamFromFile(
rootFile,
"BgFoam"));
1428 fFoam.push_back(
foam);
1443 for (
UInt_t i=0; i<fFoam.size(); ++i) {
1445 Log() << kFATAL <<
"Could not load foam!" <<
Endl;
1455 case 0:
return kNone;
1456 case 1:
return kGaus;
1457 case 2:
return kLinN;
1459 Log() << kWARNING <<
"<UIntToKernel>: unknown kernel number: " <<
iker <<
Endl;
1471 case 0:
return kMean;
1472 case 1:
return kMpv;
1474 Log() << kWARNING <<
"<UIntToTargetSelection>: unknown method TargetSelection: " <<
its <<
Endl;
1487 if(fMultiTargetRegression && (
UInt_t)
idim>=DataInfo().GetNVariables())
1488 fFoam.at(
ifoam)->AddVariableName(DataInfo().GetTargetInfo(
idim-DataInfo().GetNVariables()).GetExpression().Data());
1490 fFoam.at(
ifoam)->AddVariableName(DataInfo().GetVariableInfo(
idim).GetExpression().Data());
1511 Log() <<
"PDE-Foam is a variation of the PDE-RS method using a self-adapting" <<
Endl;
1512 Log() <<
"binning method to divide the multi-dimensional variable space into a" <<
Endl;
1513 Log() <<
"finite number of hyper-rectangles (cells). The binning algorithm " <<
Endl;
1514 Log() <<
"adjusts the size and position of a predefined number of cells such" <<
Endl;
1515 Log() <<
"that the variance of the signal and background densities inside the " <<
Endl;
1516 Log() <<
"cells reaches a minimum" <<
Endl;
1520 Log() <<
"The PDEFoam classifier supports two different algorithms: " <<
Endl;
1522 Log() <<
" (1) Create one foam, which stores the signal over background" <<
Endl;
1523 Log() <<
" probability density. During foam buildup the variance of the" <<
Endl;
1524 Log() <<
" discriminant inside the cells is minimised." <<
Endl;
1526 Log() <<
" Booking option: SigBgSeparated=F" <<
Endl;
1528 Log() <<
" (2) Create two separate foams, one for the signal events and one for" <<
Endl;
1529 Log() <<
" background events. During foam buildup the variance of the" <<
Endl;
1530 Log() <<
" event density inside the cells is minimised separately for" <<
Endl;
1531 Log() <<
" signal and background." <<
Endl;
1533 Log() <<
" Booking option: SigBgSeparated=T" <<
Endl;
1535 Log() <<
"The following options can be set (the listed values are found to be a" <<
Endl;
1536 Log() <<
"good starting point for most applications):" <<
Endl;
1538 Log() <<
" SigBgSeparate False Separate Signal and Background" <<
Endl;
1539 Log() <<
" TailCut 0.001 Fraction of outlier events that excluded" <<
Endl;
1540 Log() <<
" from the foam in each dimension " <<
Endl;
1541 Log() <<
" VolFrac 0.0666 Volume fraction (used for density calculation" <<
Endl;
1542 Log() <<
" during foam build-up) " <<
Endl;
1543 Log() <<
" nActiveCells 500 Maximal number of active cells in final foam " <<
Endl;
1544 Log() <<
" nSampl 2000 Number of MC events per cell in foam build-up " <<
Endl;
1545 Log() <<
" nBin 5 Number of bins used in foam build-up " <<
Endl;
1546 Log() <<
" Nmin 100 Number of events in cell required to split cell" <<
Endl;
1547 Log() <<
" Kernel None Kernel type used (possible values are: None," <<
Endl;
1548 Log() <<
" Gauss)" <<
Endl;
1549 Log() <<
" Compress True Compress foam output file " <<
Endl;
1551 Log() <<
" Additional regression options:" <<
Endl;
1553 Log() <<
"MultiTargetRegression False Do regression with multiple targets " <<
Endl;
1554 Log() <<
" TargetSelection Mean Target selection method (possible values are: " <<
Endl;
1555 Log() <<
" Mean, Mpv)" <<
Endl;
1559 Log() <<
"The performance of the two implementations was found to be similar for" <<
Endl;
1560 Log() <<
"most examples studied. For the same number of cells per foam, the two-" <<
Endl;
1561 Log() <<
"foam option approximately doubles the amount of computer memory needed" <<
Endl;
1562 Log() <<
"during classification. For special cases where the event-density" <<
Endl;
1563 Log() <<
"distribution of signal and background events is very different, the" <<
Endl;
1564 Log() <<
"two-foam option was found to perform significantly better than the" <<
Endl;
1565 Log() <<
"option with only one foam." <<
Endl;
1567 Log() <<
"In order to gain better classification performance we recommend to set" <<
Endl;
1568 Log() <<
"the parameter \"nActiveCells\" to a high value." <<
Endl;
1570 Log() <<
"The parameter \"VolFrac\" specifies the size of the sampling volume" <<
Endl;
1571 Log() <<
"during foam buildup and should be tuned in order to achieve optimal" <<
Endl;
1572 Log() <<
"performance. A larger box leads to a reduced statistical uncertainty" <<
Endl;
1573 Log() <<
"for small training samples and to smoother sampling. A smaller box on" <<
Endl;
1574 Log() <<
"the other hand increases the sensitivity to statistical fluctuations" <<
Endl;
1575 Log() <<
"in the training samples, but for sufficiently large training samples" <<
Endl;
1576 Log() <<
"it will result in a more precise local estimate of the sampled" <<
Endl;
1577 Log() <<
"density. In general, higher dimensional problems require larger box" <<
Endl;
1578 Log() <<
"sizes, due to the reduced average number of events per box volume. The" <<
Endl;
1579 Log() <<
"default value of 0.0666 was optimised for an example with 5" <<
Endl;
1580 Log() <<
"observables and training samples of the order of 50000 signal and" <<
Endl;
1581 Log() <<
"background events each." <<
Endl;
1583 Log() <<
"Furthermore kernel weighting can be activated, which will lead to an" <<
Endl;
1584 Log() <<
"additional performance improvement. Note that Gauss weighting will" <<
Endl;
1585 Log() <<
"significantly increase the response time of the method. LinNeighbors" <<
Endl;
1586 Log() <<
"weighting performs a linear interpolation with direct neighbor cells" <<
Endl;
1587 Log() <<
"for each dimension and is much faster than Gauss weighting." <<
Endl;
1589 Log() <<
"The classification results were found to be rather insensitive to the" <<
Endl;
1590 Log() <<
"values of the parameters \"nSamples\" and \"nBin\"." <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
int Int_t
Signed integer 4 bytes (int)
float Float_t
Float 4 bytes (float)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
TObject * Get(const char *namecycle) override
Return pointer to object identified by namecycle.
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
1-D histogram with a float per channel (see TH1 documentation)
Implementation of the CrossEntropy as separation criterion.
Class that contains all the data information.
Implementation of the GiniIndex With Laplace correction as separation criterion.
Implementation of the GiniIndex as separation criterion.
Virtual base Class for all MVA method.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
The PDEFoam method is an extension of the PDERS method, which divides the multi-dimensional phase spa...
Double_t CalculateMVAError()
Calculate the error on the Mva value.
void PrintCoefficients(void)
void DeclareCompatibilityOptions() override
options that are used ONLY for the READER to ensure backward compatibility
void TrainMultiClassification()
Create one unified foam (see TrainUnifiedClassification()) for each class, where the cells of foam i ...
void TrainMultiTargetRegression(void)
Training one (multi target regression) foam, whose cells contain the average event density.
void Train(void) override
Train PDE-Foam depending on the set options.
void DeleteFoams()
Deletes all trained foams.
virtual ~MethodPDEFoam(void)
destructor
void Init(void) override
default initialization called by all constructors
PDEFoam * InitFoam(TString, EFoamType, UInt_t cls=0)
Create a new PDEFoam, set the PDEFoam options (nCells, nBin, Xmin, Xmax, etc.) and initialize the PDE...
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
PDEFoam can handle classification with multiple classes and regression with one or more regression-ta...
void GetHelpMessage() const override
provide help message
void FillVariableNamesToFoam() const
store the variable names in all foams
void TrainMonoTargetRegression(void)
Training one (mono target regression) foam, whose cells contain the average 0th target.
void TrainUnifiedClassification(void)
Create only one unified foam (fFoam[0]) whose cells contain the average discriminator (N_sig)/(N_sig ...
void ReadFoamsFromFile()
read foams from file
void ProcessOptions() override
process user options
EKernel UIntToKernel(UInt_t iker)
convert UInt_t to EKernel (used for reading weight files)
PDEFoamKernelBase * CreatePDEFoamKernel()
create a pdefoam kernel estimator, depending on the current value of fKernel
void ReadWeightsFromStream(std::istream &i) override
read options and internal parameters
const std::vector< Float_t > & GetMulticlassValues() override
Get the multiclass MVA response for the PDEFoam classifier.
void Reset() override
reset MethodPDEFoam:
void MakeClassSpecific(std::ostream &, const TString &) const override
write PDEFoam-specific classifier response NOT IMPLEMENTED YET!
void AddWeightsXMLTo(void *parent) const override
create XML output of PDEFoam method variables
void CalcXminXmax()
Determine foam range [fXmin, fXmax] for all dimensions, such that a fraction of 'fFrac' events lie ou...
void GetNCuts(PDEFoamCell *cell, std::vector< UInt_t > &nCuts)
Fill in 'nCuts' the number of cuts made in every foam dimension, starting at the root cell 'cell'.
PDEFoam * ReadClonedFoamFromFile(TFile *, const TString &)
Reads a foam with name 'foamname' from file, and returns a clone of the foam.
const std::vector< Float_t > & GetRegressionValues() override
Return regression values for both multi- and mono-target regression.
MethodPDEFoam(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="PDEFoam")
init PDEFoam objects
ETargetSelection UIntToTargetSelection(UInt_t its)
convert UInt_t to ETargetSelection (used for reading weight files)
void DeclareOptions() override
Declare MethodPDEFoam options.
void ReadWeightsFromXML(void *wghtnode) override
read PDEFoam variables from xml weight file
void TrainSeparatedClassification(void)
Creation of 2 separated foams: one for signal events, one for background events.
void SetXminXmax(TMVA::PDEFoam *)
Set Xmin, Xmax for every dimension in the given pdefoam object.
void WriteFoamsToFile() const
Write PDEFoams to file.
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
Return Mva-Value.
const Ranking * CreateRanking() override
Compute ranking of input variables from the number of cuts made in each PDEFoam dimension.
Implementation of the MisClassificationError as separation criterion.
This is a concrete implementation of PDEFoam.
This PDEFoam variant acts like a decision tree and stores in every cell the discriminant.
This is an abstract class, which provides an interface for a PDEFoam density estimator.
This is a concrete implementation of PDEFoam.
This PDEFoam variant stores in every cell the discriminant.
This is a concrete implementation of PDEFoam.
This PDEFoam variant stores in every cell the sum of event weights and the sum of the squared event w...
This class is the abstract kernel interface for PDEFoam.
This PDEFoam kernel estimates a cell value for a given event by weighting all cell values with a gaus...
This PDEFoam kernel estimates a cell value for a given event by weighting with cell values of the nea...
This class is a trivial PDEFoam kernel estimator.
This PDEFoam variant is used to estimate multiple targets by creating an event density foam (PDEFoamE...
This is a concrete implementation of PDEFoam.
This PDEFoam variant stores in every cell the average target fTarget (see the Constructor) as well as...
Implementation of PDEFoam.
Ranking for variables in method (implementation)
Implementation of the SdivSqrtSplusB as separation criterion.
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
Singleton class for Global types used by TMVA.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
void box(Int_t pat, Double_t x1, Double_t y1, Double_t x2, Double_t y2)
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Double_t Sqrt(Double_t x)
Returns the square root of x.