84 TMVA::MethodPDEFoam::MethodPDEFoam( const
TString& jobName,
89 MethodBase( jobName, Types::kPDEFoam, methodTitle, dsi, theOption, theTargetDir )
100 , fMultiTargetRegression(kFALSE)
106 , fKernelEstimator(
NULL)
107 , fTargetSelectionStr("
Mean")
108 , fTargetSelection(
kMean)
109 , fFillFoamWithOrigWeights(kFALSE)
110 , fUseYesNoCell(kFALSE)
112 , fDTSeparation(
kFoam)
137 , fMultiTargetRegression(
kFALSE)
143 , fKernelEstimator(
NULL)
144 , fTargetSelectionStr(
"Mean")
145 , fTargetSelection(
kMean)
146 , fFillFoamWithOrigWeights(
kFALSE)
149 , fDTSeparation(
kFoam)
180 fnCells = fnActiveCells*2-1;
186 fFillFoamWithOrigWeights =
kFALSE;
189 fDTSeparation =
kFoam;
192 fKernelEstimator=
NULL;
193 fTargetSelection=
kMean;
196 fMultiTargetRegression =
kFALSE;
201 SetSignalReferenceCut( 0.0 );
203 SetSignalReferenceCut( 0.5 );
213 DeclareOptionRef( fSigBgSeparated =
kFALSE,
"SigBgSeparate",
"Separate foams for signal and background" );
214 DeclareOptionRef( fFrac = 0.001,
"TailCut",
"Fraction of outlier events that are excluded from the foam in each dimension" );
215 DeclareOptionRef( fVolFrac = 1./15.,
"VolFrac",
"Size of sampling box, used for density calculation during foam build-up (maximum value: 1.0 is equivalent to volume of entire foam)");
216 DeclareOptionRef( fnActiveCells = 500,
"nActiveCells",
"Maximum number of active cells to be created by the foam");
217 DeclareOptionRef( fnSampl = 2000,
"nSampl",
"Number of generated MC events per cell");
218 DeclareOptionRef( fnBin = 5,
"nBin",
"Number of bins in edge histograms");
219 DeclareOptionRef( fCompress =
kTRUE,
"Compress",
"Compress foam output file");
220 DeclareOptionRef( fMultiTargetRegression =
kFALSE,
"MultiTargetRegression",
"Do regression with multiple targets");
221 DeclareOptionRef( fNmin = 100,
"Nmin",
"Number of events in cell required to split cell");
222 DeclareOptionRef( fMaxDepth = 0,
"MaxDepth",
"Maximum depth of cell tree (0=unlimited)");
223 DeclareOptionRef( fFillFoamWithOrigWeights =
kFALSE,
"FillFoamWithOrigWeights",
"Fill foam with original or boost weights");
224 DeclareOptionRef( fUseYesNoCell =
kFALSE,
"UseYesNoCell",
"Return -1 or 1 for bkg or signal like events");
225 DeclareOptionRef( fDTLogic =
"None",
"DTLogic",
"Use decision tree algorithm to split cells");
227 AddPreDefVal(
TString(
"GiniIndex"));
228 AddPreDefVal(
TString(
"MisClassificationError"));
229 AddPreDefVal(
TString(
"CrossEntropy"));
230 AddPreDefVal(
TString(
"GiniIndexWithLaplace"));
231 AddPreDefVal(
TString(
"SdivSqrtSplusB"));
233 DeclareOptionRef( fKernelStr =
"None",
"Kernel",
"Kernel type used");
235 AddPreDefVal(
TString(
"Gauss"));
236 AddPreDefVal(
TString(
"LinNeighbors"));
237 DeclareOptionRef( fTargetSelectionStr =
"Mean",
"TargetSelection",
"Target selection method");
248 DeclareOptionRef(fCutNmin =
kTRUE,
"CutNmin",
"Requirement for minimal number of events in cell");
249 DeclareOptionRef(fPeekMax =
kTRUE,
"PeekMax",
"Peek cell with max. loss for the next split");
257 if (!(fFrac>=0. && fFrac<=1.)) {
258 Log() <<
kWARNING <<
"TailCut not in [0.,1] ==> using 0.001 instead" <<
Endl;
262 if (fnActiveCells < 1) {
263 Log() <<
kWARNING <<
"invalid number of active cells specified: "
264 << fnActiveCells <<
"; setting nActiveCells=2" <<
Endl;
267 fnCells = fnActiveCells*2-1;
270 if (fSigBgSeparated && fDTLogic !=
"None") {
271 Log() <<
kFATAL <<
"Decision tree logic works only for a single foam (SigBgSeparate=F)" <<
Endl;
275 if (fDTLogic ==
"None")
276 fDTSeparation =
kFoam;
277 else if (fDTLogic ==
"GiniIndex")
279 else if (fDTLogic ==
"MisClassificationError")
281 else if (fDTLogic ==
"CrossEntropy")
283 else if (fDTLogic ==
"GiniIndexWithLaplace")
285 else if (fDTLogic ==
"SdivSqrtSplusB")
288 Log() <<
kWARNING <<
"Unknown separation type: " << fDTLogic
289 <<
", setting to None" <<
Endl;
291 fDTSeparation =
kFoam;
294 if (fKernelStr ==
"None" ) fKernel = kNone;
295 else if (fKernelStr ==
"Gauss" ) fKernel = kGaus;
296 else if (fKernelStr ==
"LinNeighbors") fKernel = kLinN;
298 if (fTargetSelectionStr ==
"Mean" ) fTargetSelection =
kMean;
299 else fTargetSelection =
kMpv;
302 if (DoRegression() &&
Data()->GetNTargets() > 1 && !fMultiTargetRegression) {
303 Log() <<
kWARNING <<
"Warning: number of targets > 1"
304 <<
" and MultiTargetRegression=F was set, this makes no sense!"
305 <<
" --> I'm setting MultiTargetRegression=T" <<
Endl;
306 fMultiTargetRegression =
kTRUE;
317 if (fKernelEstimator !=
NULL)
318 delete fKernelEstimator;
332 if (fMultiTargetRegression)
339 for (
UInt_t dim=0; dim<kDim; dim++) {
344 Log() <<
kDEBUG <<
"Number of training events: " <<
Data()->GetNTrainingEvents() <<
Endl;
345 Int_t nevoutside = (
Int_t)((
Data()->GetNTrainingEvents())*(fFrac));
346 Int_t rangehistbins = 10000;
350 for (
Long64_t i=0; i<(GetNEvents()); i++) {
351 const Event* ev = GetEvent(i);
352 for (
UInt_t dim=0; dim<kDim; dim++) {
354 if (fMultiTargetRegression) {
374 for (
UInt_t dim=0; dim<kDim; dim++) {
375 range_h[dim] =
new TH1F(
Form(
"range%i", dim),
"range", rangehistbins, xmin[dim], xmax[dim]);
379 for (
Long64_t i=0; i<GetNEvents(); i++) {
380 const Event* ev = GetEvent(i);
381 for (
UInt_t dim=0; dim<kDim; dim++) {
382 if (fMultiTargetRegression) {
394 for (
UInt_t dim=0; dim<kDim; dim++) {
395 for (
Int_t i=1; i<(rangehistbins+1); i++) {
396 if (range_h[dim]->Integral(0, i) > nevoutside) {
401 for (
Int_t i=rangehistbins; i>0; i--) {
402 if (range_h[dim]->Integral(i, (rangehistbins+1)) > nevoutside) {
413 for (
UInt_t dim=0; dim<kDim; dim++) {
414 fXmin.push_back(xmin[dim]);
415 fXmax.push_back(xmax[dim]);
423 for (
UInt_t dim=0; dim<kDim; dim++)
435 Log() <<
kVERBOSE <<
"Calculate Xmin and Xmax for every dimension" <<
Endl;
442 if (DoRegression()) {
443 if (fMultiTargetRegression)
444 TrainMultiTargetRegression();
446 TrainMonoTargetRegression();
450 TrainMultiClassification();
452 if (DataInfo().GetNormalization() !=
"EQUALNUMEVENTS" ) {
453 Log() <<
kINFO <<
"NormMode=" << DataInfo().GetNormalization()
454 <<
" chosen. Note that only NormMode=EqualNumEvents"
455 <<
" ensures that Discriminant values correspond to"
456 <<
" signal probabilities." <<
Endl;
459 Log() <<
kDEBUG <<
"N_sig for training events: " <<
Data()->GetNEvtSigTrain() <<
Endl;
460 Log() <<
kDEBUG <<
"N_bg for training events: " <<
Data()->GetNEvtBkgdTrain() <<
Endl;
461 Log() <<
kDEBUG <<
"User normalization: " << DataInfo().GetNormalization().Data() <<
Endl;
464 TrainSeparatedClassification();
466 TrainUnifiedClassification();
471 for(
UInt_t i=0; i<fFoam.size(); i++) {
473 fFoam.at(i)->DeleteBinarySearchTree();
486 foamcaption[0] =
"SignalFoam";
487 foamcaption[1] =
"BgFoam";
489 for(
int i=0; i<2; i++) {
491 fFoam.push_back( InitFoam(foamcaption[i],
kSeparate) );
493 Log() <<
kVERBOSE <<
"Filling binary search tree of " << foamcaption[i]
494 <<
" with events" <<
Endl;
496 for (
Long64_t k=0; k<GetNEvents(); ++k) {
497 const Event* ev = GetEvent(k);
498 if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
499 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
500 fFoam.back()->FillBinarySearchTree(ev);
503 Log() <<
kINFO <<
"Build up " << foamcaption[i] <<
Endl;
504 fFoam.back()->Create();
508 for (
Long64_t k=0; k<GetNEvents(); ++k) {
509 const Event* ev = GetEvent(k);
511 if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
512 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
513 fFoam.back()->FillFoamCells(ev, weight);
524 fFoam.push_back( InitFoam(
"DiscrFoam",
kDiscr, fSignalClass) );
526 Log() <<
kVERBOSE <<
"Filling binary search tree of discriminator foam with events" <<
Endl;
528 for (
Long64_t k=0; k<GetNEvents(); ++k) {
529 const Event* ev = GetEvent(k);
530 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
531 fFoam.back()->FillBinarySearchTree(ev);
534 Log() <<
kINFO <<
"Build up discriminator foam" <<
Endl;
535 fFoam.back()->Create();
539 for (
Long64_t k=0; k<GetNEvents(); ++k) {
540 const Event* ev = GetEvent(k);
542 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
543 fFoam.back()->FillFoamCells(ev, weight);
548 fFoam.back()->Finalize();
560 for (
UInt_t iClass=0; iClass<DataInfo().GetNClasses(); ++iClass) {
562 fFoam.push_back( InitFoam(
Form(
"MultiClassFoam%u",iClass),
kMultiClass, iClass) );
564 Log() <<
kVERBOSE <<
"Filling binary search tree of multiclass foam "
565 << iClass <<
" with events" <<
Endl;
567 for (
Long64_t k=0; k<GetNEvents(); ++k) {
568 const Event* ev = GetEvent(k);
569 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
570 fFoam.back()->FillBinarySearchTree(ev);
573 Log() <<
kINFO <<
"Build up multiclass foam " << iClass <<
Endl;
574 fFoam.back()->Create();
579 for (
Long64_t k=0; k<GetNEvents(); ++k) {
580 const Event* ev = GetEvent(k);
582 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
583 fFoam.back()->FillFoamCells(ev, weight);
588 fFoam.back()->Finalize();
599 if (
Data()->GetNTargets() != 1) {
600 Log() <<
kFATAL <<
"Can't do mono-target regression with "
601 <<
Data()->GetNTargets() <<
" targets!" <<
Endl;
604 Log() <<
kDEBUG <<
"MethodPDEFoam: number of Targets: " <<
Data()->GetNTargets() <<
Endl;
606 fFoam.push_back( InitFoam(
"MonoTargetRegressionFoam",
kMonoTarget) );
610 for (
Long64_t k=0; k<GetNEvents(); ++k) {
611 const Event* ev = GetEvent(k);
612 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
613 fFoam.back()->FillBinarySearchTree(ev);
616 Log() <<
kINFO <<
"Build mono target regression foam" <<
Endl;
617 fFoam.back()->Create();
621 for (
Long64_t k=0; k<GetNEvents(); ++k) {
622 const Event* ev = GetEvent(k);
624 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
625 fFoam.back()->FillFoamCells(ev, weight);
630 fFoam.back()->Finalize();
644 Log() <<
kFATAL <<
"LinNeighbors kernel currently not supported"
645 <<
" for multi target regression" <<
Endl;
647 fFoam.push_back( InitFoam(
"MultiTargetRegressionFoam",
kMultiTarget) );
649 Log() <<
kVERBOSE <<
"Filling binary search tree of multi target regression foam with events"
652 for (
Long64_t k=0; k<GetNEvents(); ++k) {
656 std::vector<Float_t> targets(ev->
GetTargets());
658 for (
UInt_t i = 0; i < targets.size(); ++i)
659 ev->
SetVal(i+nVariables, targets.at(i));
661 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
662 fFoam.back()->FillBinarySearchTree(ev);
668 Log() <<
kINFO <<
"Build multi target regression foam" <<
Endl;
669 fFoam.back()->Create();
673 for (
Long64_t k=0; k<GetNEvents(); ++k) {
677 std::vector<Float_t> targets = ev->
GetTargets();
680 for (
UInt_t i = 0; i < targets.size(); ++i)
681 ev->
SetVal(i+nVariables, targets.at(i));
683 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
684 fFoam.back()->FillFoamCells(ev, weight);
713 const Event* ev = GetEvent();
716 if (fSigBgSeparated) {
717 std::vector<Float_t> xvec = ev->
GetValues();
721 density_sig = fFoam.at(0)->GetCellValue(xvec,
kValueDensity, fKernelEstimator);
722 density_bg = fFoam.at(1)->GetCellValue(xvec,
kValueDensity, fKernelEstimator);
725 if ( (density_sig+density_bg) > 0 )
726 discr = density_sig/(density_sig+density_bg);
732 discr = fFoam.at(0)->GetCellValue(ev->
GetValues(),
kValue, fKernelEstimator);
736 if (err || errUpper) {
737 const Double_t discr_error = CalculateMVAError();
738 if (err != 0) *err = discr_error;
739 if (errUpper != 0) *errUpper = discr_error;
743 return (discr < 0.5 ? -1 : 1);
759 const Event* ev = GetEvent();
762 if (fSigBgSeparated) {
763 const std::vector<Float_t>& xvec = ev->
GetValues();
765 const Double_t neventsB = fFoam.at(1)->GetCellValue(xvec,
kValue, fKernelEstimator);
766 const Double_t neventsS = fFoam.at(0)->GetCellValue(xvec,
kValue, fKernelEstimator);
772 if ((neventsS > 1e-10) || (neventsB > 1e-10)) {
774 mvaError =
TMath::Sqrt(Sqr(scaleB * neventsB / Sqr(neventsS + scaleB * neventsB) * errorS) +
775 Sqr(scaleB * neventsS / Sqr(neventsS + scaleB * neventsB) * errorB));
794 std::vector<Float_t> xvec = ev->
GetValues();
796 if (fMulticlassReturnVal ==
NULL)
797 fMulticlassReturnVal =
new std::vector<Float_t>();
798 fMulticlassReturnVal->clear();
799 fMulticlassReturnVal->reserve(DataInfo().GetNClasses());
801 std::vector<Float_t> temp;
802 UInt_t nClasses = DataInfo().GetNClasses();
803 temp.reserve(nClasses);
804 for (
UInt_t iClass = 0; iClass < nClasses; ++iClass) {
805 temp.push_back(fFoam.at(iClass)->GetCellValue(xvec,
kValue, fKernelEstimator));
808 for (
UInt_t iClass = 0; iClass < nClasses; ++iClass) {
810 for (
UInt_t j = 0; j < nClasses; ++j) {
812 norm +=
exp(temp[j] - temp[iClass]);
814 fMulticlassReturnVal->push_back(1.0 / (1.0 + norm));
817 return *fMulticlassReturnVal;
828 fRanking =
new Ranking(GetName(),
"Variable Importance");
829 std::vector<Float_t> importance(GetNvar(), 0);
832 for (
UInt_t ifoam = 0; ifoam < fFoam.size(); ++ifoam) {
834 PDEFoamCell *root_cell = fFoam.at(ifoam)->GetRootCell();
835 std::vector<UInt_t> nCuts(fFoam.at(ifoam)->GetTotDim(), 0);
836 GetNCuts(root_cell, nCuts);
841 std::vector<Float_t> tmp_importance;
842 for (
UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
843 sumOfCuts += nCuts.at(ivar);
844 tmp_importance.push_back( nCuts.at(ivar) );
848 for (
UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
850 tmp_importance.at(ivar) /= sumOfCuts;
852 tmp_importance.at(ivar) = 0;
855 for (
UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
856 importance.at(ivar) += tmp_importance.at(ivar) / fFoam.size();
861 for (
UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
862 fRanking->AddRank(
Rank(GetInputLabel(ivar), importance.at(ivar)));
886 GetNCuts(cell->
GetDau0(), nCuts);
888 GetNCuts(cell->
GetDau1(), nCuts);
901 UInt_t num_vars = GetNvar();
902 if (fMultiTargetRegression)
903 num_vars +=
Data()->GetNTargets();
905 for (
UInt_t idim=0; idim<num_vars; idim++) {
906 Log()<<
kDEBUG <<
"foam: SetXmin[dim="<<idim<<
"]: " << fXmin.at(idim) <<
Endl;
907 Log()<<
kDEBUG <<
"foam: SetXmax[dim="<<idim<<
"]: " << fXmax.at(idim) <<
Endl;
908 pdefoam->
SetXmin(idim, fXmin.at(idim));
909 pdefoam->
SetXmax(idim, fXmax.at(idim));
941 dim =
Data()->GetNTargets() +
Data()->GetNVariables();
946 std::vector<Double_t>
box;
947 for (
Int_t idim = 0; idim < dim; ++idim) {
948 box.push_back((fXmax.at(idim) - fXmin.at(idim))* fVolFrac);
954 if (fDTSeparation ==
kFoam) {
984 switch (fDTSeparation) {
1001 Log() <<
kFATAL <<
"Separation type " << fDTSeparation
1002 <<
" currently not supported" <<
Endl;
1012 Log() <<
kFATAL <<
"Decision tree cell split algorithm is only"
1013 <<
" available for (multi) classification with a single"
1014 <<
" PDE-Foam (SigBgSeparate=F)" <<
Endl;
1020 else Log() <<
kFATAL <<
"PDEFoam pointer not set, exiting.." <<
Endl;
1023 fKernelEstimator = CreatePDEFoamKernel();
1043 SetXminXmax(pdefoam);
1053 if (fRegressionReturnVal == 0) fRegressionReturnVal =
new std::vector<Float_t>();
1054 fRegressionReturnVal->clear();
1055 fRegressionReturnVal->reserve(
Data()->GetNTargets());
1057 const Event* ev = GetEvent();
1058 std::vector<Float_t> vals = ev->
GetValues();
1061 Log() <<
kWARNING <<
"<GetRegressionValues> value vector is empty. " <<
Endl;
1064 if (fMultiTargetRegression) {
1066 std::map<Int_t, Float_t> xvec;
1067 for (
UInt_t i=0; i<vals.size(); ++i)
1068 xvec.insert(std::pair<Int_t, Float_t>(i, vals.at(i)));
1070 std::vector<Float_t> targets = fFoam.at(0)->GetCellValue( xvec,
kValue );
1073 if (targets.size() !=
Data()->GetNTargets())
1074 Log() <<
kFATAL <<
"Something wrong with multi-target regression foam: "
1075 <<
"number of targets does not match the DataSet()" <<
Endl;
1076 for(
UInt_t i=0; i<targets.size(); i++)
1077 fRegressionReturnVal->push_back(targets.at(i));
1080 fRegressionReturnVal->push_back(fFoam.at(0)->GetCellValue(vals,
kValue, fKernelEstimator));
1085 for (
UInt_t itgt = 0; itgt <
Data()->GetNTargets(); itgt++) {
1086 evT->
SetTarget(itgt, fRegressionReturnVal->at(itgt) );
1088 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
1089 fRegressionReturnVal->clear();
1090 for (
UInt_t itgt = 0; itgt <
Data()->GetNTargets(); itgt++) {
1091 fRegressionReturnVal->push_back( evT2->
GetTarget(itgt) );
1096 return (*fRegressionReturnVal);
1113 Log() <<
kFATAL <<
"Kernel: " << fKernel <<
" not supported!" <<
Endl;
1124 for (
UInt_t i=0; i<fFoam.size(); i++)
1125 if (fFoam.at(i))
delete fFoam.at(i);
1138 if (fKernelEstimator !=
NULL) {
1139 delete fKernelEstimator;
1140 fKernelEstimator =
NULL;
1155 gTools().
AddAttr( wght,
"SigBgSeparated", fSigBgSeparated );
1170 gTools().
AddAttr( wght,
"TargetSelection", TargetSelectionToUInt(fTargetSelection) );
1171 gTools().
AddAttr( wght,
"FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
1176 for (
UInt_t i=0; i<fXmin.size(); i++){
1182 for (
UInt_t i=0; i<fXmax.size(); i++){
1198 FillVariableNamesToFoam();
1200 TString rfname( GetWeightFileName() );
1208 TFile *rootFile = 0;
1209 if (fCompress) rootFile =
new TFile(rfname,
"RECREATE",
"foamfile", 9);
1210 else rootFile =
new TFile(rfname,
"RECREATE");
1213 for (
UInt_t i=0; i<fFoam.size(); ++i) {
1214 Log() <<
"writing foam " << fFoam.at(i)->GetFoamName().Data()
1215 <<
" to file" <<
Endl;
1216 fFoam.at(i)->
Write(fFoam.at(i)->GetFoamName().Data());
1220 Log() <<
kINFO <<
"Foams written to file: "
1229 istr >> fSigBgSeparated;
1231 istr >> fDiscrErrCut;
1243 Bool_t CutNmin, CutRMSmin;
1252 fKernel = UIntToKernel(ker);
1256 fTargetSelection = UIntToTargetSelection(ts);
1258 istr >> fFillFoamWithOrigWeights;
1259 istr >> fUseYesNoCell;
1265 if (fMultiTargetRegression)
1266 kDim +=
Data()->GetNTargets();
1267 fXmin.assign(kDim, 0);
1268 fXmax.assign(kDim, 0);
1271 for (
UInt_t i=0; i<kDim; i++)
1272 istr >> fXmin.at(i);
1273 for (
UInt_t i=0; i<kDim; i++)
1274 istr >> fXmax.at(i);
1277 ReadFoamsFromFile();
1285 gTools().
ReadAttr( wghtnode,
"SigBgSeparated", fSigBgSeparated );
1305 fKernel = UIntToKernel(ker);
1308 fTargetSelection = UIntToTargetSelection(ts);
1309 if (
gTools().HasAttr(wghtnode,
"FillFoamWithOrigWeights"))
1310 gTools().
ReadAttr( wghtnode,
"FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
1311 if (
gTools().HasAttr(wghtnode,
"UseYesNoCell"))
1318 if (fMultiTargetRegression)
1319 kDim +=
Data()->GetNTargets();
1320 fXmin.assign(kDim, 0);
1321 fXmax.assign(kDim, 0);
1325 for (
UInt_t counter=0; counter<kDim; counter++) {
1329 Log() <<
kFATAL <<
"dimension index out of range:" << i <<
Endl;
1334 void *xmax_wrap = xmin_wrap;
1335 for (
UInt_t counter=0; counter<kDim; counter++) {
1339 Log() <<
kFATAL <<
"dimension index out of range:" << i <<
Endl;
1348 ReadFoamsFromFile();
1351 if (fKernelEstimator !=
NULL)
1352 delete fKernelEstimator;
1353 fKernelEstimator = CreatePDEFoamKernel();
1377 Log() <<
kWARNING <<
"<ReadClonedFoamFromFile>: NULL pointer given" <<
Endl;
1389 Log() <<
kWARNING <<
"<ReadClonedFoamFromFile>: " << foamname
1390 <<
" could not be cloned!" <<
Endl;
1402 TString rfname( GetWeightFileName() );
1412 TFile *rootFile =
new TFile( rfname,
"READ" );
1416 if (DoRegression()) {
1417 if (fMultiTargetRegression)
1418 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"MultiTargetRegressionFoam"));
1420 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"MonoTargetRegressionFoam"));
1422 if (fSigBgSeparated) {
1423 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"SignalFoam"));
1424 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"BgFoam"));
1427 PDEFoam *foam = ReadClonedFoamFromFile(rootFile,
"DiscrFoam");
1429 fFoam.push_back(foam);
1432 for (
UInt_t iClass=0; iClass<DataInfo().GetNClasses(); ++iClass) {
1433 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
Form(
"MultiClassFoam%u",iClass)));
1444 for (
UInt_t i=0; i<fFoam.size(); ++i) {
1456 case 0:
return kNone;
1457 case 1:
return kGaus;
1458 case 2:
return kLinN;
1460 Log() <<
kWARNING <<
"<UIntToKernel>: unknown kernel number: " << iker <<
Endl;
1472 case 0:
return kMean;
1473 case 1:
return kMpv;
1475 Log() <<
kWARNING <<
"<UIntToTargetSelection>: unknown method TargetSelection: " << its <<
Endl;
1486 for (
UInt_t ifoam=0; ifoam<fFoam.size(); ifoam++) {
1487 for (
Int_t idim=0; idim<fFoam.at(ifoam)->GetTotDim(); idim++) {
1488 if(fMultiTargetRegression && (
UInt_t)idim>=DataInfo().GetNVariables())
1489 fFoam.at(ifoam)->AddVariableName(DataInfo().GetTargetInfo(idim-DataInfo().GetNVariables()).GetExpression().
Data());
1491 fFoam.at(ifoam)->AddVariableName(DataInfo().GetVariableInfo(idim).GetExpression().
Data());
1512 Log() <<
"PDE-Foam is a variation of the PDE-RS method using a self-adapting" <<
Endl;
1513 Log() <<
"binning method to divide the multi-dimensional variable space into a" <<
Endl;
1514 Log() <<
"finite number of hyper-rectangles (cells). The binning algorithm " <<
Endl;
1515 Log() <<
"adjusts the size and position of a predefined number of cells such" <<
Endl;
1516 Log() <<
"that the variance of the signal and background densities inside the " <<
Endl;
1517 Log() <<
"cells reaches a minimum" <<
Endl;
1521 Log() <<
"The PDEFoam classifier supports two different algorithms: " <<
Endl;
1523 Log() <<
" (1) Create one foam, which stores the signal over background" <<
Endl;
1524 Log() <<
" probability density. During foam buildup the variance of the" <<
Endl;
1525 Log() <<
" discriminant inside the cells is minimised." <<
Endl;
1527 Log() <<
" Booking option: SigBgSeparated=F" <<
Endl;
1529 Log() <<
" (2) Create two separate foams, one for the signal events and one for" <<
Endl;
1530 Log() <<
" background events. During foam buildup the variance of the" <<
Endl;
1531 Log() <<
" event density inside the cells is minimised separately for" <<
Endl;
1532 Log() <<
" signal and background." <<
Endl;
1534 Log() <<
" Booking option: SigBgSeparated=T" <<
Endl;
1536 Log() <<
"The following options can be set (the listed values are found to be a" <<
Endl;
1537 Log() <<
"good starting point for most applications):" <<
Endl;
1539 Log() <<
" SigBgSeparate False Separate Signal and Background" <<
Endl;
1540 Log() <<
" TailCut 0.001 Fraction of outlier events that excluded" <<
Endl;
1541 Log() <<
" from the foam in each dimension " <<
Endl;
1542 Log() <<
" VolFrac 0.0666 Volume fraction (used for density calculation" <<
Endl;
1543 Log() <<
" during foam build-up) " <<
Endl;
1544 Log() <<
" nActiveCells 500 Maximal number of active cells in final foam " <<
Endl;
1545 Log() <<
" nSampl 2000 Number of MC events per cell in foam build-up " <<
Endl;
1546 Log() <<
" nBin 5 Number of bins used in foam build-up " <<
Endl;
1547 Log() <<
" Nmin 100 Number of events in cell required to split cell" <<
Endl;
1548 Log() <<
" Kernel None Kernel type used (possible valuses are: None," <<
Endl;
1550 Log() <<
" Compress True Compress foam output file " <<
Endl;
1552 Log() <<
" Additional regression options:" <<
Endl;
1554 Log() <<
"MultiTargetRegression False Do regression with multiple targets " <<
Endl;
1555 Log() <<
" TargetSelection Mean Target selection method (possible valuses are: " <<
Endl;
1556 Log() <<
" Mean, Mpv)" <<
Endl;
1560 Log() <<
"The performance of the two implementations was found to be similar for" <<
Endl;
1561 Log() <<
"most examples studied. For the same number of cells per foam, the two-" <<
Endl;
1562 Log() <<
"foam option approximately doubles the amount of computer memory needed" <<
Endl;
1563 Log() <<
"during classification. For special cases where the event-density" <<
Endl;
1564 Log() <<
"distribution of signal and background events is very different, the" <<
Endl;
1565 Log() <<
"two-foam option was found to perform significantly better than the" <<
Endl;
1566 Log() <<
"option with only one foam." <<
Endl;
1568 Log() <<
"In order to gain better classification performance we recommend to set" <<
Endl;
1569 Log() <<
"the parameter \"nActiveCells\" to a high value." <<
Endl;
1571 Log() <<
"The parameter \"VolFrac\" specifies the size of the sampling volume" <<
Endl;
1572 Log() <<
"during foam buildup and should be tuned in order to achieve optimal" <<
Endl;
1573 Log() <<
"performance. A larger box leads to a reduced statistical uncertainty" <<
Endl;
1574 Log() <<
"for small training samples and to smoother sampling. A smaller box on" <<
Endl;
1575 Log() <<
"the other hand increases the sensitivity to statistical fluctuations" <<
Endl;
1576 Log() <<
"in the training samples, but for sufficiently large training samples" <<
Endl;
1577 Log() <<
"it will result in a more precise local estimate of the sampled" <<
Endl;
1578 Log() <<
"density. In general, higher dimensional problems require larger box" <<
Endl;
1579 Log() <<
"sizes, due to the reduced average number of events per box volume. The" <<
Endl;
1580 Log() <<
"default value of 0.0666 was optimised for an example with 5" <<
Endl;
1581 Log() <<
"observables and training samples of the order of 50000 signal and" <<
Endl;
1582 Log() <<
"background events each." <<
Endl;
1584 Log() <<
"Furthermore kernel weighting can be activated, which will lead to an" <<
Endl;
1585 Log() <<
"additional performance improvement. Note that Gauss weighting will" <<
Endl;
1586 Log() <<
"significantly increase the response time of the method. LinNeighbors" <<
Endl;
1587 Log() <<
"weighting performs a linear interpolation with direct neighbor cells" <<
Endl;
1588 Log() <<
"for each dimension and is much faster than Gauss weighting." <<
Endl;
1590 Log() <<
"The classification results were found to be rather insensitive to the" <<
Endl;
1591 Log() <<
"values of the parameters \"nSamples\" and \"nBin\"." <<
Endl;
void Train(void)
Train PDE-Foam depending on the set options.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
virtual void Reset()
reset MethodPDEFoam:
MsgLogger & Endl(MsgLogger &ml)
void GetNCuts(PDEFoamCell *cell, std::vector< UInt_t > &nCuts)
Fill in 'nCuts' the number of cuts made in every foam dimension, starting at the root cell 'cell'...
PDEFoam * InitFoam(TString, EFoamType, UInt_t cls=0)
Create a new PDEFoam, set the PDEFoam options (nCells, nBin, Xmin, Xmax, etc.) and initialize the PDE...
TString & ReplaceAll(const TString &s1, const TString &s2)
void PrintCoefficients(void)
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
1-D histogram with a float per channel (see TH1 documentation)}
void SetXmin(Int_t idim, Double_t wmin)
set lower foam bound in dimension idim
ClassImp(TMVA::MethodPDEFoam) TMVA
init PDEFoam objects
void TrainUnifiedClassification(void)
Create only one unified foam (fFoam[0]) whose cells contain the average discriminator (N_sig)/(N_sig ...
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
void box(Int_t pat, Double_t x1, Double_t y1, Double_t x2, Double_t y2)
void SetVal(UInt_t ivar, Float_t val)
set variable ivar to val
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
void GetHelpMessage() const
provide help message
virtual Double_t GetBinLowEdge(Int_t bin) const
return bin lower edge for 1D historam Better to use h1.GetXaxis().GetBinLowEdge(bin) ...
void ReadWeightsFromStream(std::istream &i)
read options and internal parameters
virtual ~MethodPDEFoam(void)
destructor
std::vector< std::vector< double > > Data
void SetMinType(EMsgType minType)
void DeclareOptions()
Declare MethodPDEFoam options.
Double_t GetOriginalWeight() const
void SetXminXmax(TMVA::PDEFoam *)
Set Xmin, Xmax for every dimension in the given pdefoam object.
PDEFoam * ReadClonedFoamFromFile(TFile *, const TString &)
Reads a foam with name 'foamname' from file, and returns a clone of the foam.
void SetMaxDepth(UInt_t maxdepth)
void WriteFoamsToFile() const
Write PDEFoams to file.
std::vector< Float_t > & GetTargets()
void SetnSampl(Long_t nSampl)
void SetXmax(Int_t idim, Double_t wmax)
set upper foam bound in dimension idim
Double_t CalculateMVAError()
Calculate the error on the Mva value.
void Init(void)
default initialization called by all constructors
void CalcXminXmax()
Determine foam range [fXmin, fXmax] for all dimensions, such that a fraction of 'fFrac' events lie ou...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
PDEFoam can handle classification with multiple classes and regression with one or more regression-ta...
Double_t Mean(Long64_t n, const T *a, const Double_t *w=0)
EKernel UIntToKernel(UInt_t iker)
convert UInt_t to EKernel (used for reading weight files)
char * Form(const char *fmt,...)
ETargetSelection UIntToTargetSelection(UInt_t its)
convert UInt_t to ETargetSelection (used for reading weight files)
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Return Mva-Value.
void TrainMultiTargetRegression(void)
Training one (multi target regression) foam, whose cells contain the average event density...
void SetDensity(PDEFoamDensityBase *dens)
MethodPDEFoam(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="PDEFoam", TDirectory *theTargetDir=0)
void SetDim(Int_t kDim)
Sets dimension of cubical space.
void FillVariableNamesToFoam() const
store the variable names in all foams
void TrainSeparatedClassification(void)
Creation of 2 separated foams: one for signal events, one for backgound events.
void ReadWeightsFromXML(void *wghtnode)
read PDEFoam variables from xml weight file
void TrainMultiClassification()
Create one unified foam (see TrainUnifiedClassification()) for each class, where the cells of foam i ...
Describe directory structure in memory.
void SetnCells(Long_t nCells)
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
PDEFoamKernelBase * CreatePDEFoamKernel()
create a pdefoam kernel estimator, depending on the current value of fKernel
void TrainMonoTargetRegression(void)
Training one (mono target regression) foam, whose cells contain the average 0th target.
Float_t GetTarget(UInt_t itgt) const
const std::vector< Float_t > & GetMulticlassValues()
Get the multiclass MVA response for the PDEFoam classifier.
#define REGISTER_METHOD(CLASS)
for example
const Ranking * CreateRanking()
Compute ranking of input variables from the number of cuts made in each PDEFoam dimension.
void DeleteFoams()
Deletes all trained foams.
std::vector< Float_t > & GetValues()
PDEFoamCell * GetDau1() const
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
void SetEvPerBin(Int_t EvPerBin)
virtual const std::vector< Float_t > & GetRegressionValues()
Return regression values for both multi- and mono-target regression.
Double_t Sqrt(Double_t x)
void ProcessOptions()
process user options
void MakeClassSpecific(std::ostream &, const TString &) const
write PDEFoam-specific classifier response NOT IMPLEMENTED YET!
double norm(double *x, double *p)
void AddWeightsXMLTo(void *parent) const
create XML output of PDEFoam method variables
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
void ReadFoamsFromFile()
read foams from file
virtual void Close(Option_t *option="")
Close a file.
PDEFoamCell * GetDau0() const