ROOT logo

From $ROOTSYS/tutorials/roostats/rs301_splot.C

/////////////////////////////////////////////////////////////////////////
//
// SPlot tutorial
// author: Kyle Cranmer
// date Dec. 2008 
//
// This tutorial shows an example of using SPlot to unfold two distributions.
// The physics context for the example is that we want to know 
// the isolation distribution for real electrons from Z events 
// and fake electrons from QCD.  Isolation is our 'control' variable
// To unfold them, we need a model for an uncorrelated variable that
// discriminates between Z and QCD.  To do this, we use the invariant 
// mass of two electrons.  We model the Z with a Gaussian and the QCD
// with a falling exponential.
//
// Note, since we don't have real data in this tutorial, we need to generate
// toy data.  To do that we need a model for the isolation variable for
// both Z and QCD.  This is only used to generate the toy data, and would
// not be needed if we had real data.
/////////////////////////////////////////////////////////////////////////

#ifndef __CINT__
#include "RooGlobalFunc.h"
#endif
#include "RooRealVar.h"
#include "RooStats/SPlot.h"
#include "RooDataSet.h"
#include "RooRealVar.h"
#include "RooGaussian.h"
#include "RooExponential.h"
#include "RooChebychev.h"
#include "RooAddPdf.h"
#include "RooProdPdf.h"
#include "RooAddition.h"
#include "RooProduct.h"
#include "TCanvas.h"
#include "RooAbsPdf.h"
#include "RooFit.h"
#include "RooFitResult.h"
#include "RooWorkspace.h"
#include "RooConstVar.h"

// use this order for safety on library loading
using namespace RooFit ;
using namespace RooStats ;


// see below for implementation
void AddModel(RooWorkspace*);
void AddData(RooWorkspace*);
void DoSPlot(RooWorkspace*);
void MakePlots(RooWorkspace*);

void rs301_splot()
{

  // Create a new workspace to manage the project.
  RooWorkspace* wspace = new RooWorkspace("myWS");

  // add the signal and background models to the workspace.
  // Inside this function you will find a discription our model.
  AddModel(wspace);

  // add some toy data to the workspace
  AddData(wspace);

  // inspect the workspace if you wish
  //  wspace->Print();

  // do sPlot.  
  //This wil make a new dataset with sWeights added for every event.
  DoSPlot(wspace);

  // Make some plots showing the discriminating variable and 
  // the control variable after unfolding.
  MakePlots(wspace);

  // cleanup
  delete wspace;
  
}

 
//____________________________________
void AddModel(RooWorkspace* ws){

  // Make models for signal (Higgs) and background (Z+jets and QCD)
  // In real life, this part requires an intellegent modeling 
  // of signal and background -- this is only an example.  

  // set range of observable
  Double_t lowRange = 00, highRange = 200;

  // make a RooRealVar for the observables
  RooRealVar invMass("invMass", "M_{inv}", lowRange, highRange,"GeV");
  RooRealVar isolation("isolation", "isolation", 0., 20., "GeV");
 

  /////////////////////////////////////////////
  // make 2-d model for Z including the invariant mass 
  // distribution  and an isolation distribution which we want to
  // unfold from QCD.
  std::cout << "make z model" << std::endl;
  // mass model for Z
  RooRealVar mZ("mZ", "Z Mass", 91.2, lowRange, highRange);
  RooRealVar sigmaZ("sigmaZ", "Width of Gaussian", 2,0,10,"GeV");
  RooGaussian mZModel("mZModel", "Z+jets Model", invMass, mZ, sigmaZ);
  // we know Z mass
  mZ.setConstant();
  // we leave the width of the Z free during the fit in this example.

  // isolation model for Z.  Only used to generate toy MC.
  // the exponential is of the form exp(c*x).  If we want
  // the isolation to decay an e-fold every R GeV, we use 
  // c = -1/R.
  RooConstVar zIsolDecayConst("zIsolDecayConst",
			      "z isolation decay  constant", -1);
  RooExponential zIsolationModel("zIsolationModel", "z isolation model",
				 isolation, zIsolDecayConst);

  // make the combined Z model
  RooProdPdf zModel("zModel", "4-d model for Z", 
		    RooArgSet(mZModel, zIsolationModel));

  //////////////////////////////////////////////
  // make QCD model

  std::cout << "make qcd model" << std::endl;
  // mass model for QCD.  
  // the exponential is of the form exp(c*x).  If we want
  // the mass to decay an e-fold every R GeV, we use 
  // c = -1/R.
  // We can leave this parameter free during the fit.
  RooRealVar qcdMassDecayConst("qcdMassDecayConst",
			       "Decay const for QCD mass spectrum", 
			       -0.01, -100, 100,"1/GeV");
  RooExponential qcdMassModel("qcdMassModel", "qcd Mass Model",
			      invMass, qcdMassDecayConst);


  // isolation model for QCD.  Only used to generate toy MC
  // the exponential is of the form exp(c*x).  If we want
  // the isolation to decay an e-fold every R GeV, we use 
  // c = -1/R.
  RooConstVar qcdIsolDecayConst("qcdIsolDecayConst",
			      "Et resolution constant", -.1);
  RooExponential qcdIsolationModel("qcdIsolationModel", "QCD isolation model",
				   isolation, qcdIsolDecayConst);

  // make the 2-d model
  RooProdPdf qcdModel("qcdModel", "2-d model for QCD", 
    RooArgSet(qcdMassModel, qcdIsolationModel));

  //////////////////////////////////////////////
  // combined model

  // These variables represent the number of Z or QCD events
  // They will be fitted.
  RooRealVar zYield("zYield","fitted yield for Z",50 ,0.,1000) ; 
  RooRealVar qcdYield("qcdYield","fitted yield for QCD", 100 ,0.,1000) ; 

  // now make the combined model
  std::cout << "make full model" << std::endl;
  RooAddPdf model("model","z+qcd background models",
		  RooArgList(zModel, qcdModel), 
		  RooArgList(zYield,qcdYield));


  // interesting for debugging and visualizing the model
  model.graphVizTree("fullModel.dot");

  std::cout << "import model" << std::endl;

  ws->import(model);
}

//____________________________________
void AddData(RooWorkspace* ws){
  // Add a toy dataset

  // how many events do we want?
  Int_t nEvents = 1000;

  // get what we need out of the workspace to make toy data
  RooAbsPdf* model = ws->pdf("model");
  RooRealVar* invMass = ws->var("invMass");
  RooRealVar* isolation = ws->var("isolation");
 
  // make the toy data
  std::cout << "make data set and import to workspace" << std::endl;
  RooDataSet* data = model->generate(RooArgSet(*invMass, *isolation),nEvents);
  
  // import data into workspace
  ws->import(*data, Rename("data"));

}

//____________________________________
void DoSPlot(RooWorkspace* ws){
  std::cout << "Calculate sWeights" << std::endl;

  // get what we need out of the workspace to do the fit
  RooAbsPdf* model = ws->pdf("model");
  RooRealVar* zYield = ws->var("zYield");
  RooRealVar* qcdYield = ws->var("qcdYield");
  RooDataSet* data = (RooDataSet*) ws->data("data");

  // fit the model to the data.
  model->fitTo(*data, Extended() );

  // The sPlot technique requires that we fix the parameters
  // of the model that are not yields after doing the fit.
  RooRealVar* sigmaZ = ws->var("sigmaZ");
  RooRealVar* qcdMassDecayConst = ws->var("qcdMassDecayConst");  
  sigmaZ->setConstant();
  qcdMassDecayConst->setConstant();


  RooMsgService::instance().setSilentMode(true);


  // Now we use the SPlot class to add SWeights to our data set
  // based on our model and our yield variables
  RooStats::SPlot* sData = new RooStats::SPlot("sData","An SPlot",
		            *data, model, RooArgList(*zYield,*qcdYield) );


  // Check that our weights have the desired properties

  std::cout << "Check SWeights:" << std::endl;


  std::cout << std::endl <<  "Yield of Z is " 
	    << zYield->getVal() << ".  From sWeights it is "
	    << sData->GetYieldFromSWeight("zYield") << std::endl;


  std::cout << "Yield of QCD is " 
	    << qcdYield->getVal() << ".  From sWeights it is "
	    << sData->GetYieldFromSWeight("qcdYield") << std::endl
	    << std::endl;

  for(Int_t i=0; i < 10; i++)
    {
      std::cout << "z Weight   " << sData->GetSWeight(i,"zYield") 
		<< "   qcd Weight   " << sData->GetSWeight(i,"qcdYield") 
		<< "  Total Weight   " << sData->GetSumOfEventSWeight(i) 
		<< std::endl;
    }

  std::cout << std::endl;

  // import this new dataset with sWeights
 std::cout << "import new dataset with sWeights" << std::endl;
 ws->import(*data, Rename("dataWithSWeights"));


}

void MakePlots(RooWorkspace* ws){

  // Here we make plots of the discriminating variable (invMass) after the fit
  // and of the control variable (isolation) after unfolding with sPlot.
  std::cout << "make plots" << std::endl;

  // make our canvas
  TCanvas* cdata = new TCanvas("sPlot","sPlot demo", 400, 600);
  cdata->Divide(1,3);

  // get what we need out of the workspace
  RooAbsPdf* model = ws->pdf("model");
  RooAbsPdf* zModel = ws->pdf("zModel");
  RooAbsPdf* qcdModel = ws->pdf("qcdModel");

  RooRealVar* isolation = ws->var("isolation");
  RooRealVar* invMass = ws->var("invMass");

  // note, we get the dataset with sWeights
  RooDataSet* data = (RooDataSet*) ws->data("dataWithSWeights");

  // this shouldn't be necessary, need to fix something with workspace
  // do this to set parameters back to their fitted values.
  model->fitTo(*data, Extended() );

  //plot invMass for data with full model and individual componenets overlayed
  //  TCanvas* cdata = new TCanvas();
  cdata->cd(1);
  RooPlot* frame = invMass->frame() ; 
  data->plotOn(frame ) ; 
  model->plotOn(frame) ;   
  model->plotOn(frame,Components(*zModel),LineStyle(kDashed), LineColor(kRed)) ;   
  model->plotOn(frame,Components(*qcdModel),LineStyle(kDashed),LineColor(kGreen)) ;   
    
  frame->SetTitle("Fit of model to discriminating variable");
  frame->Draw() ;
 

  // Now use the sWeights to show isolation distribution for Z and QCD.  
  // The SPlot class can make this easier, but here we demonstrait in more
  // detail how the sWeights are used.  The SPlot class should make this 
  // very easy and needs some more development.

  // Plot isolation for Z component.  
  // Do this by plotting all events weighted by the sWeight for the Z component.
  // The SPlot class adds a new variable that has the name of the corresponding
  // yield + "_sw".
  cdata->cd(2);

  // create weightfed data set 
  RooDataSet * dataw_z = new RooDataSet(data->GetName(),data->GetTitle(),data,*data->get(),0,"zYield_sw") ;

  RooPlot* frame2 = isolation->frame() ; 
  dataw_z->plotOn(frame2, DataError(RooAbsData::SumW2) ) ; 
    
  frame2->SetTitle("isolation distribution for Z");
  frame2->Draw() ;

  // Plot isolation for QCD component.  
  // Eg. plot all events weighted by the sWeight for the QCD component.
  // The SPlot class adds a new variable that has the name of the corresponding
  // yield + "_sw".
  cdata->cd(3);
  RooDataSet * dataw_qcd = new RooDataSet(data->GetName(),data->GetTitle(),data,*data->get(),0,"qcdYield_sw") ;
  RooPlot* frame3 = isolation->frame() ; 
  dataw_qcd->plotOn(frame3,DataError(RooAbsData::SumW2) ) ; 
    
  frame3->SetTitle("isolation distribution for QCD");
  frame3->Draw() ;

  //  cdata->SaveAs("SPlot.gif");

}
 rs301_splot.C:1
 rs301_splot.C:2
 rs301_splot.C:3
 rs301_splot.C:4
 rs301_splot.C:5
 rs301_splot.C:6
 rs301_splot.C:7
 rs301_splot.C:8
 rs301_splot.C:9
 rs301_splot.C:10
 rs301_splot.C:11
 rs301_splot.C:12
 rs301_splot.C:13
 rs301_splot.C:14
 rs301_splot.C:15
 rs301_splot.C:16
 rs301_splot.C:17
 rs301_splot.C:18
 rs301_splot.C:19
 rs301_splot.C:20
 rs301_splot.C:21
 rs301_splot.C:22
 rs301_splot.C:23
 rs301_splot.C:24
 rs301_splot.C:25
 rs301_splot.C:26
 rs301_splot.C:27
 rs301_splot.C:28
 rs301_splot.C:29
 rs301_splot.C:30
 rs301_splot.C:31
 rs301_splot.C:32
 rs301_splot.C:33
 rs301_splot.C:34
 rs301_splot.C:35
 rs301_splot.C:36
 rs301_splot.C:37
 rs301_splot.C:38
 rs301_splot.C:39
 rs301_splot.C:40
 rs301_splot.C:41
 rs301_splot.C:42
 rs301_splot.C:43
 rs301_splot.C:44
 rs301_splot.C:45
 rs301_splot.C:46
 rs301_splot.C:47
 rs301_splot.C:48
 rs301_splot.C:49
 rs301_splot.C:50
 rs301_splot.C:51
 rs301_splot.C:52
 rs301_splot.C:53
 rs301_splot.C:54
 rs301_splot.C:55
 rs301_splot.C:56
 rs301_splot.C:57
 rs301_splot.C:58
 rs301_splot.C:59
 rs301_splot.C:60
 rs301_splot.C:61
 rs301_splot.C:62
 rs301_splot.C:63
 rs301_splot.C:64
 rs301_splot.C:65
 rs301_splot.C:66
 rs301_splot.C:67
 rs301_splot.C:68
 rs301_splot.C:69
 rs301_splot.C:70
 rs301_splot.C:71
 rs301_splot.C:72
 rs301_splot.C:73
 rs301_splot.C:74
 rs301_splot.C:75
 rs301_splot.C:76
 rs301_splot.C:77
 rs301_splot.C:78
 rs301_splot.C:79
 rs301_splot.C:80
 rs301_splot.C:81
 rs301_splot.C:82
 rs301_splot.C:83
 rs301_splot.C:84
 rs301_splot.C:85
 rs301_splot.C:86
 rs301_splot.C:87
 rs301_splot.C:88
 rs301_splot.C:89
 rs301_splot.C:90
 rs301_splot.C:91
 rs301_splot.C:92
 rs301_splot.C:93
 rs301_splot.C:94
 rs301_splot.C:95
 rs301_splot.C:96
 rs301_splot.C:97
 rs301_splot.C:98
 rs301_splot.C:99
 rs301_splot.C:100
 rs301_splot.C:101
 rs301_splot.C:102
 rs301_splot.C:103
 rs301_splot.C:104
 rs301_splot.C:105
 rs301_splot.C:106
 rs301_splot.C:107
 rs301_splot.C:108
 rs301_splot.C:109
 rs301_splot.C:110
 rs301_splot.C:111
 rs301_splot.C:112
 rs301_splot.C:113
 rs301_splot.C:114
 rs301_splot.C:115
 rs301_splot.C:116
 rs301_splot.C:117
 rs301_splot.C:118
 rs301_splot.C:119
 rs301_splot.C:120
 rs301_splot.C:121
 rs301_splot.C:122
 rs301_splot.C:123
 rs301_splot.C:124
 rs301_splot.C:125
 rs301_splot.C:126
 rs301_splot.C:127
 rs301_splot.C:128
 rs301_splot.C:129
 rs301_splot.C:130
 rs301_splot.C:131
 rs301_splot.C:132
 rs301_splot.C:133
 rs301_splot.C:134
 rs301_splot.C:135
 rs301_splot.C:136
 rs301_splot.C:137
 rs301_splot.C:138
 rs301_splot.C:139
 rs301_splot.C:140
 rs301_splot.C:141
 rs301_splot.C:142
 rs301_splot.C:143
 rs301_splot.C:144
 rs301_splot.C:145
 rs301_splot.C:146
 rs301_splot.C:147
 rs301_splot.C:148
 rs301_splot.C:149
 rs301_splot.C:150
 rs301_splot.C:151
 rs301_splot.C:152
 rs301_splot.C:153
 rs301_splot.C:154
 rs301_splot.C:155
 rs301_splot.C:156
 rs301_splot.C:157
 rs301_splot.C:158
 rs301_splot.C:159
 rs301_splot.C:160
 rs301_splot.C:161
 rs301_splot.C:162
 rs301_splot.C:163
 rs301_splot.C:164
 rs301_splot.C:165
 rs301_splot.C:166
 rs301_splot.C:167
 rs301_splot.C:168
 rs301_splot.C:169
 rs301_splot.C:170
 rs301_splot.C:171
 rs301_splot.C:172
 rs301_splot.C:173
 rs301_splot.C:174
 rs301_splot.C:175
 rs301_splot.C:176
 rs301_splot.C:177
 rs301_splot.C:178
 rs301_splot.C:179
 rs301_splot.C:180
 rs301_splot.C:181
 rs301_splot.C:182
 rs301_splot.C:183
 rs301_splot.C:184
 rs301_splot.C:185
 rs301_splot.C:186
 rs301_splot.C:187
 rs301_splot.C:188
 rs301_splot.C:189
 rs301_splot.C:190
 rs301_splot.C:191
 rs301_splot.C:192
 rs301_splot.C:193
 rs301_splot.C:194
 rs301_splot.C:195
 rs301_splot.C:196
 rs301_splot.C:197
 rs301_splot.C:198
 rs301_splot.C:199
 rs301_splot.C:200
 rs301_splot.C:201
 rs301_splot.C:202
 rs301_splot.C:203
 rs301_splot.C:204
 rs301_splot.C:205
 rs301_splot.C:206
 rs301_splot.C:207
 rs301_splot.C:208
 rs301_splot.C:209
 rs301_splot.C:210
 rs301_splot.C:211
 rs301_splot.C:212
 rs301_splot.C:213
 rs301_splot.C:214
 rs301_splot.C:215
 rs301_splot.C:216
 rs301_splot.C:217
 rs301_splot.C:218
 rs301_splot.C:219
 rs301_splot.C:220
 rs301_splot.C:221
 rs301_splot.C:222
 rs301_splot.C:223
 rs301_splot.C:224
 rs301_splot.C:225
 rs301_splot.C:226
 rs301_splot.C:227
 rs301_splot.C:228
 rs301_splot.C:229
 rs301_splot.C:230
 rs301_splot.C:231
 rs301_splot.C:232
 rs301_splot.C:233
 rs301_splot.C:234
 rs301_splot.C:235
 rs301_splot.C:236
 rs301_splot.C:237
 rs301_splot.C:238
 rs301_splot.C:239
 rs301_splot.C:240
 rs301_splot.C:241
 rs301_splot.C:242
 rs301_splot.C:243
 rs301_splot.C:244
 rs301_splot.C:245
 rs301_splot.C:246
 rs301_splot.C:247
 rs301_splot.C:248
 rs301_splot.C:249
 rs301_splot.C:250
 rs301_splot.C:251
 rs301_splot.C:252
 rs301_splot.C:253
 rs301_splot.C:254
 rs301_splot.C:255
 rs301_splot.C:256
 rs301_splot.C:257
 rs301_splot.C:258
 rs301_splot.C:259
 rs301_splot.C:260
 rs301_splot.C:261
 rs301_splot.C:262
 rs301_splot.C:263
 rs301_splot.C:264
 rs301_splot.C:265
 rs301_splot.C:266
 rs301_splot.C:267
 rs301_splot.C:268
 rs301_splot.C:269
 rs301_splot.C:270
 rs301_splot.C:271
 rs301_splot.C:272
 rs301_splot.C:273
 rs301_splot.C:274
 rs301_splot.C:275
 rs301_splot.C:276
 rs301_splot.C:277
 rs301_splot.C:278
 rs301_splot.C:279
 rs301_splot.C:280
 rs301_splot.C:281
 rs301_splot.C:282
 rs301_splot.C:283
 rs301_splot.C:284
 rs301_splot.C:285
 rs301_splot.C:286
 rs301_splot.C:287
 rs301_splot.C:288
 rs301_splot.C:289
 rs301_splot.C:290
 rs301_splot.C:291
 rs301_splot.C:292
 rs301_splot.C:293
 rs301_splot.C:294
 rs301_splot.C:295
 rs301_splot.C:296
 rs301_splot.C:297
 rs301_splot.C:298
 rs301_splot.C:299
 rs301_splot.C:300
 rs301_splot.C:301
 rs301_splot.C:302
 rs301_splot.C:303
 rs301_splot.C:304
 rs301_splot.C:305
 rs301_splot.C:306
 rs301_splot.C:307
 rs301_splot.C:308
 rs301_splot.C:309
 rs301_splot.C:310
 rs301_splot.C:311
 rs301_splot.C:312
 rs301_splot.C:313
 rs301_splot.C:314
 rs301_splot.C:315
 rs301_splot.C:316
 rs301_splot.C:317
 rs301_splot.C:318
 rs301_splot.C:319
 rs301_splot.C:320
 rs301_splot.C:321
 rs301_splot.C:322
 rs301_splot.C:323
 rs301_splot.C:324
 rs301_splot.C:325
 rs301_splot.C:326
 rs301_splot.C:327
 rs301_splot.C:328
 rs301_splot.C:329
 rs301_splot.C:330
 rs301_splot.C:331
 rs301_splot.C:332
 rs301_splot.C:333