// This class calculates sWeights used to create an sPlot.
// The code is based on
// ``SPlot: A statistical tool to unfold data distributions,''
// Nucl. Instrum. Meth. A 555, 356 (2005)
// [arXiv:physics/0402083].
//
// An SPlot gives us the distribution of some variable, x in our
// data sample for a given species (eg. signal or background).
// The result is similar to a likelihood projection plot, but no cuts are made,
// so every event contributes to the distribution.
//
// [Usage]
// To use this class, you first must have a pdf that includes
// yields for (possibly several) different species.
// Create an instance of the class by supplying a data set,
// the pdf, and a list of the yield variables. The SPlot Class
// will calculate SWeights and include these as columns in the RooDataSet.
//END_HTML
#include <vector>
#include <map>
#include "RooStats/SPlot.h"
#include "RooAbsPdf.h"
#include "RooDataSet.h"
#include "RooRealVar.h"
#include "RooGlobalFunc.h"
#include "TTree.h"
#include "RooStats/RooStatsUtils.h"
#include "TMatrixD.h"
ClassImp(RooStats::SPlot) ;
using namespace RooStats;
SPlot::~SPlot()
{
if(fSData)
delete fSData;
}
SPlot::SPlot():
TNamed()
{
RooArgList Args;
fSWeightVars = Args;
fSData = NULL;
}
SPlot::SPlot(const char* name, const char* title):
TNamed(name, title)
{
RooArgList Args;
fSWeightVars = Args;
fSData = NULL;
}
SPlot::SPlot(const char* name, const char* title, const RooDataSet &data):
TNamed(name, title)
{
RooArgList Args;
fSWeightVars = Args;
fSData = (RooDataSet*) &data;
}
SPlot::SPlot(const SPlot &other):
TNamed(other)
{
RooArgList Args = (RooArgList) other.GetSWeightVars();
fSWeightVars.addClone(Args);
fSData = (RooDataSet*) other.GetSDataSet();
}
SPlot::SPlot(const char* name, const char* title, RooDataSet& data, RooAbsPdf* pdf,
const RooArgList &yieldsList, const RooArgSet &projDeps,
bool includeWeights, bool cloneData, const char* newName):
TNamed(name, title)
{
if(cloneData == 1)
fSData = (RooDataSet*) data.Clone(newName);
else
fSData = (RooDataSet*) &data;
this->AddSWeight(pdf, yieldsList, projDeps, includeWeights);
}
RooDataSet* SPlot::SetSData(RooDataSet* data)
{
if(data) {
fSData = (RooDataSet*) data;
return fSData;
} else
return NULL;
}
RooDataSet* SPlot::GetSDataSet() const
{
return fSData;
}
Double_t SPlot::GetSWeight(Int_t numEvent, const char* sVariable) const
{
if(numEvent > fSData->numEntries() )
{
coutE(InputArguments) << "Invalid Entry Number" << endl;
return -1;
}
if(numEvent < 0)
{
coutE(InputArguments) << "Invalid Entry Number" << endl;
return -1;
}
Double_t totalYield = 0;
std::string varname(sVariable);
varname += "_sw";
if(fSWeightVars.find(sVariable) )
{
RooArgSet Row(*fSData->get(numEvent));
totalYield += Row.getRealValue(sVariable);
return totalYield;
}
if( fSWeightVars.find(varname.c_str()) )
{
RooArgSet Row(*fSData->get(numEvent));
totalYield += Row.getRealValue(varname.c_str() );
return totalYield;
}
else
coutE(InputArguments) << "InputVariable not in list of sWeighted variables" << endl;
return -1;
}
Double_t SPlot::GetSumOfEventSWeight(Int_t numEvent) const
{
if(numEvent > fSData->numEntries() )
{
coutE(InputArguments) << "Invalid Entry Number" << endl;
return -1;
}
if(numEvent < 0)
{
coutE(InputArguments) << "Invalid Entry Number" << endl;
return -1;
}
Int_t numSWeightVars = this->GetNumSWeightVars();
Double_t eventSWeight = 0;
RooArgSet Row(*fSData->get(numEvent));
for (Int_t i = 0; i < numSWeightVars; i++)
eventSWeight += Row.getRealValue(fSWeightVars.at(i)->GetName() );
return eventSWeight;
}
Double_t SPlot::GetYieldFromSWeight(const char* sVariable) const
{
Double_t totalYield = 0;
std::string varname(sVariable);
varname += "_sw";
if(fSWeightVars.find(sVariable) )
{
for(Int_t i=0; i < fSData->numEntries(); i++)
{
RooArgSet Row(*fSData->get(i));
totalYield += Row.getRealValue(sVariable);
}
return totalYield;
}
if( fSWeightVars.find(varname.c_str()) )
{
for(Int_t i=0; i < fSData->numEntries(); i++)
{
RooArgSet Row(*fSData->get(i));
totalYield += Row.getRealValue(varname.c_str() );
}
return totalYield;
}
else
coutE(InputArguments) << "InputVariable not in list of sWeighted variables" << endl;
return -1;
}
RooArgList SPlot::GetSWeightVars() const
{
RooArgList Args = fSWeightVars;
return Args;
}
Int_t SPlot::GetNumSWeightVars() const
{
RooArgList Args = fSWeightVars;
return Args.getSize();
}
void SPlot::AddSWeight( RooAbsPdf* pdf, const RooArgList &yieldsTmp,
const RooArgSet &projDeps, bool includeWeights)
{
RooFit::MsgLevel currentLevel = RooMsgService::instance().globalKillBelow();
RooArgList* constParameters = (RooArgList*)pdf->getParameters(fSData) ;
constParameters->remove(yieldsTmp, kTRUE, kTRUE);
std::vector<RooRealVar*> constVarHolder;
for(Int_t i = 0; i < constParameters->getSize(); i++)
{
RooRealVar* varTemp = ( dynamic_cast<RooRealVar*>( constParameters->at(i) ) );
if( varTemp->isConstant() == 0 )
{
varTemp->setConstant();
constVarHolder.push_back(varTemp);
}
}
pdf->fitTo(*fSData, RooFit::Extended(kTRUE), RooFit::SumW2Error(kTRUE), RooFit::PrintLevel(-1), RooFit::PrintEvalErrors(-1) );
std::vector<double> yieldsHolder;
for(Int_t i = 0; i < yieldsTmp.getSize(); i++)
yieldsHolder.push_back( ((RooRealVar*) yieldsTmp.at(i))->getVal());
Int_t nspec = yieldsTmp.getSize();
RooArgList yields = *(RooArgList*)yieldsTmp.snapshot(kFALSE);
if(currentLevel <= RooFit::DEBUG)
{
coutI(InputArguments) << "Printing Yields" << endl;
yields.Print();
}
RooArgSet vars(*fSData->get() );
vars.remove(projDeps, kTRUE, kTRUE);
pdf->attachDataSet(*fSData);
std::vector<RooRealVar*> yieldvars ;
RooArgSet* parameters = pdf->getParameters(fSData) ;
std::vector<Double_t> yieldvalues ;
for (Int_t k = 0; k < nspec; ++k)
{
RooRealVar* thisyield = dynamic_cast<RooRealVar*>(yields.at(k)) ;
RooRealVar* yieldinpdf = dynamic_cast<RooRealVar*>(parameters->find(thisyield->GetName() )) ;
coutI(InputArguments)<< "yield in pdf: " << yieldinpdf->GetName() << " " << thisyield->getVal() << endl;
yieldvars.push_back(yieldinpdf) ;
yieldvalues.push_back(thisyield->getVal()) ;
}
Int_t numevents = fSData->numEntries() ;
std::vector<std::vector<Double_t> > pdfvalues(numevents,std::vector<Double_t>(nspec,0)) ;
for(Int_t m=0; m<nspec; ++m) yieldvars[m]->setVal(0) ;
for (Int_t ievt = 0; ievt <numevents; ievt++)
{
RooStats::SetParameters(fSData->get(ievt), pdf->getVariables());
for(Int_t k = 0; k < nspec; ++k)
{
if(yieldvars[k]->getMin() > 0)
{
coutW(InputArguments) << "Minimum Range for " << yieldvars[k]->GetName() << " must be 0. ";
coutW(InputArguments) << "Setting min range to 0" << std::endl;
yieldvars[k]->setMin(0);
}
if(yieldvars[k]->getMax() < 1)
{
coutW(InputArguments) << "Maximum Range for " << yieldvars[k]->GetName() << " must be 1. ";
coutW(InputArguments) << "Setting max range to 1" << std::endl;
yieldvars[k]->setMax(1);
}
yieldvars[k]->setVal( 1 ) ;
Double_t f_k = pdf->getVal(&vars) ;
pdfvalues[ievt][k] = f_k ;
if( !(f_k>1 || f_k<1) )
coutW(InputArguments) << "Strange pdf value: " << ievt << " " << k << " " << f_k << std::endl ;
yieldvars[k]->setVal( 0 ) ;
}
}
std::vector<Double_t> norm(nspec,0) ;
for (Int_t ievt = 0; ievt <numevents ; ievt++)
{
Double_t dnorm(0) ;
for(Int_t k=0; k<nspec; ++k) dnorm += yieldvalues[k] * pdfvalues[ievt][k] ;
for(Int_t j=0; j<nspec; ++j) norm[j] += pdfvalues[ievt][j]/dnorm ;
}
coutI(Contents) << "likelihood norms: " ;
for(Int_t k=0; k<nspec; ++k) coutI(Contents) << norm[k] << " " ;
coutI(Contents) << std::endl ;
TMatrixD covInv(nspec, nspec);
for (Int_t i = 0; i < nspec; i++) for (Int_t j = 0; j < nspec; j++) covInv(i,j) = 0;
coutI(Contents) << "Calculating covariance matrix";
for (Int_t ievt = 0; ievt < numevents; ++ievt)
{
fSData->get(ievt) ;
Double_t dsum(0);
for(Int_t k = 0; k < nspec; ++k)
dsum += pdfvalues[ievt][k] * yieldvalues[k] ;
for(Int_t n=0; n<nspec; ++n)
for(Int_t j=0; j<nspec; ++j)
{
if(includeWeights == kTRUE)
covInv(n,j) += fSData->weight()*pdfvalues[ievt][n]*pdfvalues[ievt][j]/(dsum*dsum) ;
else
covInv(n,j) += pdfvalues[ievt][n]*pdfvalues[ievt][j]/(dsum*dsum) ;
}
}
if (covInv.Determinant() <=0)
{
coutE(Eval) << "SPlot Error: covariance matrix is singular; I can't invert it!" << std::endl;
covInv.Print();
return;
}
TMatrixD covMatrix(TMatrixD::kInverted,covInv);
if(currentLevel <= RooFit::DEBUG)
{
coutI(Eval) << "Checking Likelihood normalization: " << std::endl;
coutI(Eval) << "Yield of specie Sum of Row in Matrix Norm" << std::endl;
for(Int_t k=0; k<nspec; ++k)
{
Double_t covnorm(0) ;
for(Int_t m=0; m<nspec; ++m) covnorm += covInv[k][m]*yieldvalues[m] ;
Double_t sumrow(0) ;
for(Int_t m = 0; m < nspec; ++m) sumrow += covMatrix[k][m] ;
coutI(Eval) << yieldvalues[k] << " " << sumrow << " " << covnorm << endl ;
}
}
coutI(Eval) << "Calculating sWeight" << std::endl;
std::vector<RooRealVar*> sweightvec ;
std::vector<RooRealVar*> pdfvec ;
RooArgSet sweightset ;
char wname[256] ;
fSWeightVars.Clear();
for(Int_t k=0; k<nspec; ++k)
{
sprintf(wname,"%s_sw", yieldvars[k]->GetName()) ;
RooRealVar* var = new RooRealVar(wname,wname,0) ;
sweightvec.push_back( var) ;
sweightset.add(*var) ;
fSWeightVars.add(*var);
sprintf(wname,"L_%s", yieldvars[k]->GetName()) ;
var = new RooRealVar(wname,wname,0) ;
pdfvec.push_back( var) ;
sweightset.add(*var) ;
}
RooDataSet* sWeightData = new RooDataSet("dataset", "dataset with sWeights", sweightset);
for(Int_t ievt = 0; ievt < numevents; ++ievt)
{
fSData->get(ievt) ;
Double_t dsum(0);
for(Int_t k = 0; k < nspec; ++k) dsum += pdfvalues[ievt][k] * yieldvalues[k] ;
for(Int_t n=0; n<nspec; ++n)
{
Double_t nsum(0) ;
for(Int_t j=0; j<nspec; ++j) nsum += covMatrix(n,j) * pdfvalues[ievt][j] ;
if(includeWeights == kTRUE) sweightvec[n]->setVal(fSData->weight() * nsum/dsum) ;
else sweightvec[n]->setVal( nsum/dsum) ;
pdfvec[n]->setVal( pdfvalues[ievt][n] ) ;
if( !(fabs(nsum/dsum)>=0 ) )
{
coutE(Contents) << "error: " << nsum/dsum << endl ;
return;
}
}
sWeightData->add(sweightset) ;
}
fSData->merge(sWeightData);
for(Int_t i = 0; i < yieldsTmp.getSize(); i++)
((RooRealVar*) yieldsTmp.at(i))->setVal(yieldsHolder.at(i));
for(Int_t i=0; i < (Int_t) constVarHolder.size(); i++)
constVarHolder.at(i)->setConstant(kFALSE);
return;
}