#include <iomanip>
#include <cassert>
#include "TMath.h"
#include "Riostream.h"
#include "TFile.h"
#include "TMVA/MethodPDEFoam.h"
#include "TMVA/Tools.h"
#include "TMatrix.h"
#include "TMVA/Ranking.h"
#include "TMVA/Types.h"
#include "TMVA/ClassifierFactory.h"
#include "TMVA/Config.h"
REGISTER_METHOD(PDEFoam)
ClassImp(TMVA::MethodPDEFoam)
TMVA::MethodPDEFoam::MethodPDEFoam( const TString& jobName,
const TString& methodTitle,
DataSetInfo& dsi,
const TString& theOption,
TDirectory* theTargetDir ) :
MethodBase( jobName, Types::kPDEFoam, methodTitle, dsi, theOption, theTargetDir )
{
}
TMVA::MethodPDEFoam::MethodPDEFoam( DataSetInfo& dsi,
const TString& theWeightFile,
TDirectory* theTargetDir ) :
MethodBase( Types::kPDEFoam, dsi, theWeightFile, theTargetDir )
{
}
Bool_t TMVA::MethodPDEFoam::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t )
{
if (type == Types::kClassification && numberClasses == 2) return kTRUE;
if (type == Types::kRegression) return kTRUE;
return kFALSE;
}
void TMVA::MethodPDEFoam::Init( void )
{
fSigBgSeparated = kFALSE;
fFrac = 0.001;
fDiscrErrCut = -1.;
fVolFrac = 30;
fVolFracInv = 1./30.;
fnActiveCells = 500;
fnCells = fnActiveCells*2-1;
fnSampl = 2000;
fnBin = 5;
fEvPerBin = 10000;
fCutNmin = true;
fNmin = 100;
fCutRMSmin = false;
fRMSmin = 0.01;
fKernel = kNone;
fTargetSelection= kMean;
fCompress = kTRUE;
fMultiTargetRegression = kFALSE;
for (int i=0; i<FOAM_NUMBER; i++) foam[i] = NULL;
SetSignalReferenceCut( 0.0 );
}
void TMVA::MethodPDEFoam::DeclareOptions()
{
DeclareOptionRef( fSigBgSeparated = kFALSE, "SigBgSeparate", "Separate foams for signal and background" );
DeclareOptionRef( fFrac = 0.001, "TailCut", "Fraction of outlier events that are excluded from the foam in each dimension" );
DeclareOptionRef( fVolFracInv = 1./30., "VolFrac", "Size of sampling box, used for density calculation during foam build-up (maximum value: 1.0 is equivalent to volume of entire foam)");
DeclareOptionRef( fnActiveCells = 500, "nActiveCells", "Maximum number of active cells to be created by the foam");
DeclareOptionRef( fnSampl = 2000, "nSampl", "Number of generated MC events per cell");
DeclareOptionRef( fnBin = 5, "nBin", "Number of bins in edge histograms");
DeclareOptionRef( fCompress = kTRUE, "Compress", "Compress foam output file");
DeclareOptionRef( fMultiTargetRegression = kFALSE, "MultiTargetRegression", "Do regression with multiple targets");
DeclareOptionRef( fCutNmin = true, "CutNmin", "Requirement for minimal number of events in cell");
DeclareOptionRef( fNmin = 100, "Nmin", "Number of events in cell required to split cell");
DeclareOptionRef( fKernelStr = "None", "Kernel", "Kernel type used");
AddPreDefVal(TString("None"));
AddPreDefVal(TString("Gauss"));
AddPreDefVal(TString("LinNeighbors"));
DeclareOptionRef( fTargetSelectionStr = "Mean", "TargetSelection", "Target selection method");
AddPreDefVal(TString("Mean"));
AddPreDefVal(TString("Mpv"));
}
void TMVA::MethodPDEFoam::ProcessOptions()
{
if (!(fFrac>0. && fFrac<=1.)) {
Log() << kWARNING << "TailCut not in [0.,1] ==> using 0.001 instead" << Endl;
fFrac = 0.001;
}
fnCells = fnActiveCells*2-1;
fVolFrac = Int_t(1./fVolFracInv + 0.5);
if (fCutRMSmin && fRMSmin>1.0) {
Log() << kWARNING << "RMSmin > 1.0 ==> using 1.0 instead" << Endl;
fRMSmin = 1.0;
}
if (fNmin==0)
fCutNmin = false;
if (fKernelStr == "None" ) fKernel = kNone;
else if (fKernelStr == "Gauss" ) fKernel = kGaus;
else if (fKernelStr == "LinNeighbors") fKernel = kLinN;
if (fTargetSelectionStr == "Mean" ) fTargetSelection = kMean;
else fTargetSelection = kMpv;
}
TMVA::MethodPDEFoam::~MethodPDEFoam( void )
{
if(foam[0]) {
delete foam[0];
foam[0]=0;
}
if (!DoRegression() && fSigBgSeparated && foam[1]){
delete foam[1];
foam[1]=0;
}
}
void TMVA::MethodPDEFoam::CalcXminXmax()
{
Xmin.clear();
Xmax.clear();
UInt_t kDim = GetNvar();
UInt_t tDim = Data()->GetNTargets();
UInt_t vDim = Data()->GetNVariables();
if (fMultiTargetRegression)
kDim += tDim;
Double_t *xmin = new Double_t[kDim];
Double_t *xmax = new Double_t[kDim];
for (UInt_t dim=0; dim<kDim; dim++) {
xmin[dim] = 1.e100;
xmax[dim] = -1.e100;
}
Log() << kDEBUG << "Number of training events: " << Data()->GetNTrainingEvents() << Endl;
Int_t nevoutside = (Int_t)((Data()->GetNTrainingEvents())*(fFrac));
Int_t rangehistbins = 10000;
for (Long64_t i=0; i<(GetNEvents()); i++) {
const Event* ev = GetEvent(i);
for (UInt_t dim=0; dim<kDim; dim++) {
Double_t val;
if (fMultiTargetRegression) {
if (dim < vDim)
val = ev->GetValue(dim);
else
val = ev->GetTarget(dim-vDim);
}
else
val = ev->GetValue(dim);
if (val<xmin[dim])
xmin[dim] = val;
if (val>xmax[dim])
xmax[dim] = val;
}
}
TH1F **range_h = new TH1F*[kDim];
char text[200];
for (UInt_t dim=0; dim<kDim; dim++) {
sprintf(text, "range%i", dim);
range_h[dim] = new TH1F(text, "range", rangehistbins, xmin[dim], xmax[dim]);
}
for (Long64_t i=0; i<GetNEvents(); i++) {
const Event* ev = GetEvent(i);
for (UInt_t dim=0; dim<kDim; dim++) {
if (fMultiTargetRegression) {
if (dim < vDim)
range_h[dim]->Fill(ev->GetValue(dim));
else
range_h[dim]->Fill(ev->GetTarget(dim-vDim));
}
else
range_h[dim]->Fill(ev->GetValue(dim));
}
}
for (UInt_t dim=0; dim<kDim; dim++) {
for (Int_t i=1; i<(rangehistbins+1); i++) {
if (range_h[dim]->Integral(0, i) > nevoutside) {
xmin[dim]=range_h[dim]->GetBinLowEdge(i);
break;
}
}
for (Int_t i=rangehistbins; i>0; i--) {
if (range_h[dim]->Integral(i, (rangehistbins+1)) > nevoutside) {
xmax[dim]=range_h[dim]->GetBinLowEdge(i+1);
break;
}
}
}
Xmin.clear();
Xmax.clear();
for (UInt_t dim=0; dim<kDim; dim++) {
Xmin.push_back(xmin[dim]);
Xmax.push_back(xmax[dim]);
}
delete[] xmin;
delete[] xmax;
for (UInt_t dim=0; dim<kDim; dim++)
delete range_h[dim];
delete[] range_h;
return;
}
void TMVA::MethodPDEFoam::Train( void )
{
Log() << kDEBUG << "Calculate Xmin and Xmax for every dimension" << Endl;
CalcXminXmax();
if (DoRegression()) {
if (fMultiTargetRegression)
TrainMultiTargetRegression();
else
TrainMonoTargetRegression();
}
else {
if (DataInfo().GetNormalization() != "EQUALNUMEVENTS" ) {
Log() << kINFO << "NormMode=" << DataInfo().GetNormalization()
<< " chosen. Note that only NormMode=EqualNumEvents"
<< " ensures that Discriminant values correspond to"
<< " signal probabilities." << Endl;
}
Log() << kDEBUG << "N_sig for training events: " << Data()->GetNEvtSigTrain() << Endl;
Log() << kDEBUG << "N_bg for training events: " << Data()->GetNEvtBkgdTrain() << Endl;
Log() << kDEBUG << "User normalization: " << DataInfo().GetNormalization().Data() << Endl;
if (fSigBgSeparated)
TrainSeparatedClassification();
else
TrainUnifiedClassification();
}
}
void TMVA::MethodPDEFoam::TrainSeparatedClassification()
{
TString foamcaption[2];
foamcaption[0] = "SignalFoam";
foamcaption[1] = "BgFoam";
for(int i=0; i<FOAM_NUMBER; i++) {
foam[i] = new PDEFoam(foamcaption[i]);
InitFoam(foam[i], kSeparate);
Log() << kINFO << "Filling binary search tree of " << foamcaption[i]
<< " with events" << Endl;
for (Long64_t k=0; k<GetNEvents(); k++) {
const Event* ev = GetEvent(k);
if ((i==0 && ev->IsSignal()) || (i==1 && !ev->IsSignal()))
foam[i]->FillBinarySearchTree(ev, IgnoreEventsWithNegWeightsInTraining());
}
Log() << kINFO << "Build " << foamcaption[i] << Endl;
foam[i]->SetNElements(1);
foam[i]->Create(fCutNmin);
foam[i]->SetNElements(2);
foam[i]->ResetCellElements();
Log() << "Filling foam cells with events" << Endl;
for (Long64_t k=0; k<GetNEvents(); k++) {
const Event* ev = GetEvent(k);
if ((i==0 && ev->IsSignal()) || (i==1 && !ev->IsSignal()))
foam[i]->FillFoamCells(ev, IgnoreEventsWithNegWeightsInTraining());
}
Log() << kDEBUG << "Check all cells and remove cells with volume 0" << Endl;
foam[i]->CheckCells(true);
}
}
void TMVA::MethodPDEFoam::TrainUnifiedClassification()
{
foam[0] = new PDEFoam("DiscrFoam");
InitFoam(foam[0], kDiscr);
Log() << kINFO << "Filling binary search tree of discriminator foam with events" << Endl;
for (Long64_t k=0; k<GetNEvents(); k++)
foam[0]->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
Log() << kINFO << "Build up discriminator foam" << Endl;
foam[0]->SetNElements(1);
foam[0]->Create(fCutNmin);
Log() << kDEBUG << "Resetting cell integrals" << Endl;
foam[0]->SetNElements(2);
foam[0]->ResetCellElements();
Log() << "Filling foam cells with events" << Endl;
for (UInt_t k=0; k<GetNEvents(); k++)
foam[0]->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
Log() << "Calculate cell discriminator"<< Endl;
foam[0]->CalcCellDiscr();
Log() << kDEBUG << "Check all cells and remove cells with volume 0" << Endl;
foam[0]->CheckCells(true);
}
void TMVA::MethodPDEFoam::TrainMonoTargetRegression()
{
if (Data()->GetNTargets() < 1) {
Log() << kFATAL << "Error: number of targets = " << Data()->GetNTargets() << Endl;
return;
}
else if (Data()->GetNTargets() > 1) {
Log() << kWARNING << "Warning: number of targets = " << Data()->GetNTargets()
<< " --> using only first target" << Endl;
}
else
Log() << kDEBUG << "MethodPDEFoam: number of Targets: " << Data()->GetNTargets() << Endl;
TString foamcaption = "MonoTargetRegressionFoam";
foam[0] = new PDEFoam(foamcaption);
InitFoam(foam[0], kMonoTarget);
Log() << kINFO << "Filling binary search tree with events" << Endl;
for (Long64_t k=0; k<GetNEvents(); k++)
foam[0]->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
Log() << kINFO << "Build mono target regression foam" << Endl;
foam[0]->SetNElements(1);
foam[0]->Create(fCutNmin);
Log() << kDEBUG << "Resetting cell elements" << Endl;
foam[0]->SetNElements(2);
foam[0]->ResetCellElements();
Log() << "Filling foam cells with events" << Endl;
for (UInt_t k=0; k<GetNEvents(); k++)
foam[0]->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
Log() << kDEBUG << "Calculate cell average targets"<< Endl;
foam[0]->CalcCellTarget();
Log() << kDEBUG << "Check all cells and remove cells with volume 0" << Endl;
foam[0]->CheckCells(true);
}
void TMVA::MethodPDEFoam::TrainMultiTargetRegression()
{
Log() << kDEBUG << "Number of variables: " << Data()->GetNVariables() << Endl;
Log() << kDEBUG << "Number of Targets: " << Data()->GetNTargets() << Endl;
Log() << kDEBUG << "Dimension of foam: " << Data()->GetNVariables()+Data()->GetNTargets() << Endl;
if (fKernel==kLinN)
Log() << kFATAL << "LinNeighbors kernel currently not supported"
<< " for multi target regression" << Endl;
TString foamcaption = "MultiTargetRegressionFoam";
foam[0] = new PDEFoam(foamcaption);
InitFoam(foam[0], kMultiTarget);
Log() << kINFO << "Filling binary search tree of multi target regression foam with events"
<< Endl;
for (Long64_t k=0; k<GetNEvents(); k++)
foam[0]->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
Log() << kINFO << "Build multi target regression foam" << Endl;
foam[0]->SetNElements(1);
foam[0]->Create(fCutNmin);
Log() << kDEBUG << "Resetting cell elements" << Endl;
foam[0]->SetNElements(2);
foam[0]->ResetCellElements();
Log() << kINFO << "Filling foam cells with events" << Endl;
for (UInt_t k=0; k<GetNEvents(); k++)
foam[0]->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
Log() << kDEBUG << "Check all cells and remove cells with volume 0" << Endl;
foam[0]->CheckCells(true);
}
Double_t TMVA::MethodPDEFoam::GetMvaValue( Double_t* err )
{
const Event* ev = GetEvent();
Double_t discr = 0.;
Double_t discr_error = 0.;
if (fSigBgSeparated) {
std::vector<Float_t> xvec = ev->GetValues();
Double_t density_sig = 0.;
Double_t density_bg = 0.;
density_sig = foam[0]->GetCellDensity(xvec, fKernel);
density_bg = foam[1]->GetCellDensity(xvec, fKernel);
if ( (density_sig+density_bg) > 0 )
discr = density_sig/(density_sig+density_bg);
else
discr = 0.5;
Double_t neventsB = foam[1]->GetCellValue(xvec, kNev);
Double_t neventsS = foam[0]->GetCellValue(xvec, kNev);
Double_t scaleB = 1.;
Double_t errorS = TMath::Sqrt(neventsS);
Double_t errorB = TMath::Sqrt(neventsB);
if (neventsS == 0)
errorS = 1.;
if (neventsB == 0)
errorB = 1.;
if ( (neventsS>1e-10) || (neventsB>1e-10) )
discr_error = TMath::Sqrt( TMath::Power ( scaleB*neventsB
/ TMath::Power((neventsS+scaleB*neventsB),2)
* errorS, 2) +
TMath::Power ( (scaleB*neventsS)
/ TMath::Power((neventsS+scaleB*neventsB),2)
* errorB, 2) );
else discr_error = 1.;
if (discr_error < 1e-10) discr_error = 1.;
if (fDiscrErrCut>=0.0 && discr_error > fDiscrErrCut) discr = -1.;
}
else {
std::vector<Float_t> xvec = ev->GetValues();
discr = foam[0]->GetCellDiscr(xvec, fKernel);
discr_error = foam[0]->GetCellValue(xvec, kDiscriminatorError);
if (fDiscrErrCut>=0.0 && discr_error > fDiscrErrCut) discr = -1.;
}
if (err != 0) *err = discr_error;
return discr;
}
void TMVA::MethodPDEFoam::SetXminXmax( TMVA::PDEFoam *pdefoam )
{
if (!pdefoam){
Log() << kFATAL << "Null pointer given!" << Endl;
return;
}
UInt_t num_vars = GetNvar();
if (fMultiTargetRegression)
num_vars += Data()->GetNTargets();
for (UInt_t idim=0; idim<num_vars; idim++) {
Log()<< kDEBUG << "foam: SetXmin[dim="<<idim<<"]: " << Xmin.at(idim) << Endl;
Log()<< kDEBUG << "foam: SetXmax[dim="<<idim<<"]: " << Xmax.at(idim) << Endl;
pdefoam->SetXmin(idim, Xmin.at(idim));
pdefoam->SetXmax(idim, Xmax.at(idim));
}
}
void TMVA::MethodPDEFoam::InitFoam(TMVA::PDEFoam *pdefoam, EFoamType ft){
if (!pdefoam){
Log() << kFATAL << "Null pointer given!" << Endl;
return;
}
if (ft==kSeparate || ft==kDiscr){
pdefoam->SetSignalClass (fSignalClass);
pdefoam->SetBackgroundClass(fBackgroundClass);
}
pdefoam->SetFoamType(ft);
if (ft==kMultiTarget)
pdefoam->SetkDim( Data()->GetNTargets()+Data()->GetNVariables());
else
pdefoam->SetkDim( GetNvar());
pdefoam->SetVolumeFraction(fVolFrac);
pdefoam->SetnCells( fnCells);
pdefoam->SetnSampl( fnSampl);
pdefoam->SetnBin( fnBin);
pdefoam->SetEvPerBin( fEvPerBin);
pdefoam->CutNmin(fCutNmin);
pdefoam->SetNmin(fNmin);
pdefoam->CutRMSmin(fCutRMSmin);
pdefoam->SetRMSmin(fRMSmin);
pdefoam->Init();
SetXminXmax(pdefoam);
}
const std::vector<Float_t>& TMVA::MethodPDEFoam::GetRegressionValues()
{
if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>;
fRegressionReturnVal->clear();
const Event* ev = GetEvent();
std::vector<Float_t> vals = ev->GetValues();
if (vals.size() == 0) {
Log() << kWARNING << "<GetRegressionValues> value vector has size 0. " << Endl;
}
if (fMultiTargetRegression) {
std::vector<Float_t> targets = foam[0]->GetProjectedRegValue(vals, fKernel, fTargetSelection);
for(UInt_t i=0; i<(Data()->GetNTargets()); i++)
fRegressionReturnVal->push_back(targets.at(i));
}
else {
fRegressionReturnVal->push_back(foam[0]->GetCellRegValue0(vals, fKernel));
}
Event * evT = new Event(*ev);
for (UInt_t itgt = 0; itgt < evT->GetNTargets(); itgt++) {
evT->SetTarget(itgt, fRegressionReturnVal->at(itgt) );
}
const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
fRegressionReturnVal->clear();
for (UInt_t itgt = 0; itgt < evT->GetNTargets(); itgt++) {
fRegressionReturnVal->push_back( evT2->GetTarget(itgt) );
}
delete evT;
return (*fRegressionReturnVal);
}
void TMVA::MethodPDEFoam::PrintCoefficients( void )
{}
void TMVA::MethodPDEFoam::AddWeightsXMLTo( void* parent ) const
{
void* wght = gTools().xmlengine().NewChild(parent, 0, "Weights");
gTools().AddAttr( wght, "SigBgSeparated", fSigBgSeparated );
gTools().AddAttr( wght, "Frac", fFrac );
gTools().AddAttr( wght, "DiscrErrCut", fDiscrErrCut );
gTools().AddAttr( wght, "VolFrac", fVolFrac );
gTools().AddAttr( wght, "nCells", fnCells );
gTools().AddAttr( wght, "nSampl", fnSampl );
gTools().AddAttr( wght, "nBin", fnBin );
gTools().AddAttr( wght, "EvPerBin", fEvPerBin );
gTools().AddAttr( wght, "Compress", fCompress );
gTools().AddAttr( wght, "DoRegression", DoRegression() );
gTools().AddAttr( wght, "CutNmin", fCutNmin );
gTools().AddAttr( wght, "Nmin", fNmin );
gTools().AddAttr( wght, "CutRMSmin", fCutRMSmin );
gTools().AddAttr( wght, "RMSmin", fRMSmin );
gTools().AddAttr( wght, "Kernel", KernelToUInt(fKernel) );
gTools().AddAttr( wght, "TargetSelection", TargetSelectionToUInt(fTargetSelection) );
void *xmin_wrap;
for (UInt_t i=0; i<Xmin.size(); i++){
xmin_wrap = gTools().xmlengine().NewChild( wght, 0, "Xmin" );
gTools().AddAttr( xmin_wrap, "Index", i );
gTools().AddAttr( xmin_wrap, "Value", Xmin.at(i) );
}
void *xmax_wrap;
for (UInt_t i=0; i<Xmin.size(); i++){
xmax_wrap = gTools().xmlengine().NewChild( wght, 0, "Xmax" );
gTools().AddAttr( xmax_wrap, "Index", i );
gTools().AddAttr( xmax_wrap, "Value", Xmax.at(i) );
}
WriteFoamsToFile();
}
void TMVA::MethodPDEFoam::WriteFoamsToFile() const
{
FillVariableNamesToFoam();
TString rfname( GetWeightFileName() );
rfname.ReplaceAll( TString(".") + gConfig().GetIONames().fWeightFileExtension + ".txt", ".xml" );
rfname.ReplaceAll( ".xml", "_foams.root" );
TFile *rootFile = 0;
if (fCompress) rootFile = new TFile(rfname, "RECREATE", "foamfile", 9);
else rootFile = new TFile(rfname, "RECREATE");
foam[0]->Write(foam[0]->GetFoamName().Data());
if (!DoRegression() && fSigBgSeparated)
foam[1]->Write(foam[1]->GetFoamName().Data());
rootFile->Close();
Log() << kINFO << "Foams written to file: "
<< gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
}
void TMVA::MethodPDEFoam::ReadWeightsFromStream( istream& istr )
{
istr >> fSigBgSeparated;
istr >> fFrac;
istr >> fDiscrErrCut;
istr >> fVolFrac;
istr >> fnCells;
istr >> fnSampl;
istr >> fnBin;
istr >> fEvPerBin;
istr >> fCompress;
Bool_t regr;
istr >> regr;
SetAnalysisType( (regr ? Types::kRegression : Types::kClassification ) );
istr >> fCutNmin;
istr >> fNmin;
istr >> fCutRMSmin;
istr >> fRMSmin;
UInt_t ker = 0;
istr >> ker;
fKernel = UIntToKernel(ker);
UInt_t ts = 0;
istr >> ts;
fTargetSelection = UIntToTargetSelection(ts);
Xmin.clear();
Xmax.clear();
UInt_t kDim = GetNvar();
if (fMultiTargetRegression)
kDim += Data()->GetNTargets();
for (UInt_t i=0; i<kDim; i++) {
Xmin.push_back(0.);
Xmax.push_back(0.);
}
for (UInt_t i=0; i<kDim; i++)
istr >> Xmin.at(i);
for (UInt_t i=0; i<kDim; i++)
istr >> Xmax.at(i);
ReadFoamsFromFile();
}
void TMVA::MethodPDEFoam::ReadWeightsFromXML( void* wghtnode )
{
gTools().ReadAttr( wghtnode, "SigBgSeparated", fSigBgSeparated );
gTools().ReadAttr( wghtnode, "Frac", fFrac );
gTools().ReadAttr( wghtnode, "DiscrErrCut", fDiscrErrCut );
gTools().ReadAttr( wghtnode, "VolFrac", fVolFrac );
gTools().ReadAttr( wghtnode, "nCells", fnCells );
gTools().ReadAttr( wghtnode, "nSampl", fnSampl );
gTools().ReadAttr( wghtnode, "nBin", fnBin );
gTools().ReadAttr( wghtnode, "EvPerBin", fEvPerBin );
gTools().ReadAttr( wghtnode, "Compress", fCompress );
Bool_t regr;
gTools().ReadAttr( wghtnode, "DoRegression", regr );
SetAnalysisType( (regr ? Types::kRegression : Types::kClassification ) );
gTools().ReadAttr( wghtnode, "CutNmin", fCutNmin );
gTools().ReadAttr( wghtnode, "Nmin", fNmin );
gTools().ReadAttr( wghtnode, "CutRMSmin", fCutRMSmin );
gTools().ReadAttr( wghtnode, "RMSmin", fRMSmin );
UInt_t ker = 0;
gTools().ReadAttr( wghtnode, "Kernel", ker );
fKernel = UIntToKernel(ker);
UInt_t ts = 0;
gTools().ReadAttr( wghtnode, "TargetSelection", ts );
fTargetSelection = UIntToTargetSelection(ts);
Xmin.clear();
Xmax.clear();
UInt_t kDim = GetNvar();
if (fMultiTargetRegression)
kDim += Data()->GetNTargets();
for (UInt_t i=0; i<kDim; i++) {
Xmin.push_back(0.);
Xmax.push_back(0.);
}
void *xmin_wrap = gTools().xmlengine().GetChild( wghtnode );
for (UInt_t counter=0; counter<kDim; counter++) {
UInt_t i=0;
gTools().ReadAttr( xmin_wrap , "Index", i );
if (i>=kDim)
Log() << kFATAL << "dimension index out of range:" << i << Endl;
gTools().ReadAttr( xmin_wrap , "Value", Xmin.at(i) );
xmin_wrap = gTools().xmlengine().GetNext( xmin_wrap );
}
void *xmax_wrap = xmin_wrap;
for (UInt_t counter=0; counter<kDim; counter++) {
UInt_t i=0;
gTools().ReadAttr( xmax_wrap , "Index", i );
if (i>=kDim)
Log() << kFATAL << "dimension index out of range:" << i << Endl;
gTools().ReadAttr( xmax_wrap , "Value", Xmax.at(i) );
xmax_wrap = gTools().xmlengine().GetNext( xmax_wrap );
}
if (foam[0]) delete foam[0];
if (foam[1]) delete foam[1];
ReadFoamsFromFile();
}
void TMVA::MethodPDEFoam::ReadFoamsFromFile()
{
TString rfname( GetWeightFileName() );
rfname.ReplaceAll( TString(".") + gConfig().GetIONames().fWeightFileExtension + ".txt", ".xml" );
rfname.ReplaceAll( ".xml", "_foams.root" );
Log() << kINFO << "Read foams from file: " << gTools().Color("lightblue")
<< rfname << gTools().Color("reset") << Endl;
TFile *rootFile = new TFile( rfname, "READ" );
if (rootFile->IsZombie()) Log() << kFATAL << "Cannot open file \"" << rfname << "\"" << Endl;
if (DoRegression()) {
if (fMultiTargetRegression)
foam[0] = dynamic_cast<PDEFoam*>(rootFile->Get("MultiTargetRegressionFoam"));
else
foam[0] = dynamic_cast<PDEFoam*>(rootFile->Get("MonoTargetRegressionFoam"));
}
else {
if (fSigBgSeparated) {
foam[0] = dynamic_cast<PDEFoam*>(rootFile->Get("SignalFoam"));
foam[1] = dynamic_cast<PDEFoam*>(rootFile->Get("BgFoam"));
}
else
foam[0] = dynamic_cast<PDEFoam*>(rootFile->Get("DiscrFoam"));
}
if (!foam[0] || (!DoRegression() && fSigBgSeparated && !foam[1]))
Log() << kFATAL << "Could not load foam!" << Endl;
}
void TMVA::MethodPDEFoam::FillVariableNamesToFoam() const {
UInt_t nfoams=1;
if (fSigBgSeparated && !DoRegression()) nfoams=2;
for (UInt_t ifoam=0; ifoam<nfoams; ifoam++) {
for (Int_t idim=0; idim<foam[ifoam]->GetTotDim(); idim++) {
if(fMultiTargetRegression && (UInt_t)idim>=DataInfo().GetNVariables())
foam[ifoam]->AddVariableName(DataInfo().GetTargetInfo(idim-DataInfo().GetNVariables()).GetExpression().Data());
else
foam[ifoam]->AddVariableName(DataInfo().GetVariableInfo(idim).GetExpression().Data());
}
}
}
void TMVA::MethodPDEFoam::MakeClassSpecific( std::ostream& , const TString& ) const
{
}
void TMVA::MethodPDEFoam::GetHelpMessage() const
{
Log() << Endl;
Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
Log() << Endl;
Log() << "PDE-Foam is a variation of the PDE-RS method using a self-adapting" << Endl;
Log() << "binning method to divide the multi-dimensional variable space into a" << Endl;
Log() << "finite number of hyper-rectangles (cells). The binning algorithm " << Endl;
Log() << "adjusts the size and position of a predefined number of cells such" << Endl;
Log() << "that the variance of the signal and background densities inside the " << Endl;
Log() << "cells reaches a minimum" << Endl;
Log() << Endl;
Log() << gTools().Color("bold") << "--- Use of booking options:" << gTools().Color("reset") << Endl;
Log() << Endl;
Log() << "The PDEFoam classifier supports two different algorithms: " << Endl;
Log() << Endl;
Log() << " (1) Create one foam, which stores the signal over background" << Endl;
Log() << " probability density. During foam buildup the variance of the" << Endl;
Log() << " discriminant inside the cells is minimised." << Endl;
Log() << Endl;
Log() << " Booking option: SigBgSeparated=F" << Endl;
Log() << Endl;
Log() << " (2) Create two separate foams, one for the signal events and one for" << Endl;
Log() << " background events. During foam buildup the variance of the" << Endl;
Log() << " event density inside the cells is minimised separately for" << Endl;
Log() << " signal and background." << Endl;
Log() << Endl;
Log() << " Booking option: SigBgSeparated=T" << Endl;
Log() << Endl;
Log() << "The following options can be set (the listed values are found to be a" << Endl;
Log() << "good starting point for most applications):" << Endl;
Log() << Endl;
Log() << " SigBgSeparate False Separate Signal and Background" << Endl;
Log() << " TailCut 0.001 Fraction of outlier events that excluded" << Endl;
Log() << " from the foam in each dimension " << Endl;
Log() << " VolFrac 0.0333 Volume fraction (used for density calculation" << Endl;
Log() << " during foam build-up) " << Endl;
Log() << " nActiveCells 500 Maximal number of active cells in final foam " << Endl;
Log() << " nSampl 2000 Number of MC events per cell in foam build-up " << Endl;
Log() << " nBin 5 Number of bins used in foam build-up " << Endl;
Log() << " CutNmin True Requirement for minimal number of events in cell " << Endl;
Log() << " Nmin 100 Number of events in cell required to split cell" << Endl;
Log() << " Kernel None Kernel type used (possible valuses are: None," << Endl;
Log() << " Gauss)" << Endl;
Log() << " Compress True Compress foam output file " << Endl;
Log() << Endl;
Log() << " Additional regression options:" << Endl;
Log() << Endl;
Log() << "MultiTargetRegression False Do regression with multiple targets " << Endl;
Log() << " TargetSelection Mean Target selection method (possible valuses are: " << Endl;
Log() << " Mean, Mpv)" << Endl;
Log() << Endl;
Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
Log() << Endl;
Log() << "The performance of the two implementations was found to be similar for" << Endl;
Log() << "most examples studied. For the same number of cells per foam, the two-" << Endl;
Log() << "foam option approximately doubles the amount of computer memory needed" << Endl;
Log() << "during classification. For special cases where the event-density" << Endl;
Log() << "distribution of signal and background events is very different, the" << Endl;
Log() << "two-foam option was found to perform significantly better than the" << Endl;
Log() << "option with only one foam." << Endl;
Log() << Endl;
Log() << "In order to gain better classification performance we recommend to set" << Endl;
Log() << "the parameter \"nActiveCells\" to a high value." << Endl;
Log() << Endl;
Log() << "The parameter \"VolFrac\" specifies the size of the sampling volume" << Endl;
Log() << "during foam buildup and should be tuned in order to achieve optimal" << Endl;
Log() << "performance. A larger box leads to a reduced statistical uncertainty" << Endl;
Log() << "for small training samples and to smoother sampling. A smaller box on" << Endl;
Log() << "the other hand increases the sensitivity to statistical fluctuations" << Endl;
Log() << "in the training samples, but for sufficiently large training samples" << Endl;
Log() << "it will result in a more precise local estimate of the sampled" << Endl;
Log() << "density. In general, higher dimensional problems require larger box" << Endl;
Log() << "sizes, due to the reduced average number of events per box volume. The" << Endl;
Log() << "default value of 0.0333 was optimised for an example with 5" << Endl;
Log() << "observables and training samples of the order of 50000 signal and" << Endl;
Log() << "background events each." << Endl;
Log() << Endl;
Log() << "Furthermore kernel weighting can be activated, which will lead to an" << Endl;
Log() << "additional performance improvement. Note that Gauss weighting will" << Endl;
Log() << "significantly increase the response time of the method. LinNeighbors" << Endl;
Log() << "weighting performs a linear interpolation with direct neighbor cells" << Endl;
Log() << "for each dimension and is much faster than Gauss weighting." << Endl;
Log() << Endl;
Log() << "The classification results were found to be rather insensitive to the" << Endl;
Log() << "values of the parameters \"nSamples\" and \"nBin\"." << Endl;
}