Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df106_HiggsToFourLeptons.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_dataframe
3/// \notebook -draw
4/// The Higgs to four lepton analysis from the ATLAS Open Data release of 2020, with RDataFrame.
5///
6/// This tutorial is the Higgs to four lepton analysis from the ATLAS Open Data release in 2020
7/// (http://opendata.atlas.cern/release/2020/documentation/). The data was taken with the ATLAS detector
8/// during 2016 at a center-of-mass energy of 13 TeV. The decay of the Standard Model Higgs boson
9/// to two Z bosons and subsequently to four leptons is called the "golden channel". The selection leads
10/// to a narrow invariant mass peak on top a relatively smooth and small background, revealing
11/// the Higgs at 125 GeV.
12/// The analysis is translated to an RDataFrame workflow processing about 300 MB of simulated events and data.
13///
14/// Lepton selection efficiency corrections ("scale factors") are applied to simulated samples to correct for the
15/// differences in the trigger, reconstruction, and identification efficiencies in simulation compared to real data.
16/// Systematic uncertainties for those scale factors are evaluated and the Vary function of RDataFrame is used to
17/// propagate the variations to the final four leptons mass distribution.
18///
19/// See the [corresponding spec json file](https://github.com/root-project/root/blob/master/tutorials/dataframe/df106_HiggsToFourLeptons_spec.json).
20///
21/// \macro_code
22/// \macro_image
23///
24/// \date March 2020, August 2022, August 2023
25/// \authors Stefan Wunsch (KIT, CERN), Julia Mathe (CERN), Marta Czurylo (CERN)
26
27#include "TInterpreter.h"
28#include <Math/Vector4D.h>
29#include <ROOT/RDFHelpers.hxx>
30#include <ROOT/RDataFrame.hxx>
31#include <ROOT/RVec.hxx>
32#include <TCanvas.h>
33#include <TGraph.h>
34#include <TH1D.h>
35#include <THStack.h>
36#include <TLatex.h>
37#include <TLegend.h>
38#include <TProfile.h>
39#include <TStyle.h>
40
41using namespace ROOT::VecOps;
43using ROOT::RVecF;
45using namespace ROOT::RDF::Experimental;
46
47// Define functions needed in the analysis
48// Select events for the analysis
49bool GoodElectronsAndMuons(const ROOT::RVecI &type, const RVecF &pt, const RVecF &eta, const RVecF &phi, const RVecF &e,
50 const RVecF &trackd0pv, const RVecF &tracksigd0pv, const RVecF &z0)
51{
52 for (size_t i = 0; i < type.size(); i++) {
53 PtEtaPhiEVectorF p(0.001 * pt[i], eta[i], phi[i], 0.001 * e[i]);
54 if (type[i] == 11) {
55 if (pt[i] < 7000 || abs(eta[i]) > 2.47 || abs(trackd0pv[i] / tracksigd0pv[i]) > 5 ||
56 abs(z0[i] * sin(p.Theta())) > 0.5)
57 return false;
58 } else {
59 if (abs(trackd0pv[i] / tracksigd0pv[i]) > 5 || abs(z0[i] * sin(p.Theta())) > 0.5)
60 return false;
61 }
62 }
63 return true;
64}
65
66// Compute the invariant mass of a four-lepton-system.
67float ComputeInvariantMass(const RVecF &pt, const RVecF &eta, const RVecF &phi, const RVecF &e)
68{
69 PtEtaPhiEVectorF p1(pt[0], eta[0], phi[0], e[0]);
70 PtEtaPhiEVectorF p2(pt[1], eta[1], phi[1], e[1]);
71 PtEtaPhiEVectorF p3(pt[2], eta[2], phi[2], e[2]);
72 PtEtaPhiEVectorF p4(pt[3], eta[3], phi[3], e[3]);
73 return 0.001 * (p1 + p2 + p3 + p4).M();
74}
75
77{
78
79 // Enable Multithreading
81
82 // Create the RDataFrame from the spec json file. The df106_HiggsToFourLeptons_spec.json is provided in the same
83 // folder as this tutorial
84 std::string dataset_spec = gROOT->GetTutorialsDir() + std::string("/dataframe/df106_HiggsToFourLeptons_spec.json");
86
87 // Add the ProgressBar feature
89
90#ifndef __CLING__
91 // If this tutorial is compiled, rather than run as a ROOT macro, the interpreter needs to be fed the signatures
92 // of all the functions we want to JIT in our analysis, as well as any type used in those signatures.
93 // clang-format off
94 gInterpreter->Declare(
95 "using ROOT::RVecF;"
96 "bool GoodElectronsAndMuons(const ROOT::RVecI &type, const RVecF &pt, const RVecF &eta, const RVecF &phi, const RVecF &e,"
97 "const RVecF &trackd0pv, const RVecF &tracksigd0pv, const RVecF &z0);"
98 "float ComputeInvariantMass(const RVecF &pt, const RVecF &eta, const RVecF &phi, const RVecF &e);"
99 );
100 // clang-format on
101#endif
102
103 // Perform the analysis
104 // Access metadata information that is stored in the JSON config file of the RDataFrame
105 // The metadata contained in the JSON file is accessible within a `DefinePerSample` call, through the `RSampleInfo`
106 // class
107 auto df_analysis =
108 df.DefinePerSample("xsecs", [](unsigned int slot, const RSampleInfo &id) { return id.GetD("xsecs"); })
109 .DefinePerSample("lumi", [](unsigned int slot, const RSampleInfo &id) { return id.GetD("lumi"); })
110 .DefinePerSample("sumws", [](unsigned int slot, const RSampleInfo &id) { return id.GetD("sumws"); })
111 .DefinePerSample("sample_category",
112 [](unsigned int slot, const RSampleInfo &id) { return id.GetS("sample_category"); })
113 // Apply an MC correction for the ZZ decay due to missing gg->ZZ process
114 .DefinePerSample("scale",
115 [](unsigned int slot, const ROOT::RDF::RSampleInfo &id) {
116 return id.Contains("mc_363490.llll.4lep.root") ? 1.3f : 1.0f;
117 })
118 // Select electron or muon trigger
119 .Filter("trigE || trigM")
120 // Select events with exactly four good leptons conserving charge and lepton numbers
121 // Note that all collections are RVecs and good_lep is the mask for the good leptons.
122 // The lepton types are PDG numbers and set to 11 or 13 for an electron or muon
123 // irrespective of the charge.
124 .Define("good_lep",
125 "abs(lep_eta) < 2.5 && lep_pt > 5000 && lep_ptcone30 / lep_pt < 0.3 && lep_etcone20 / lep_pt < 0.3")
126 .Filter("Sum(good_lep) == 4")
127 .Filter("Sum(lep_charge[good_lep]) == 0")
128 .Define("goodlep_sumtypes", "Sum(lep_type[good_lep])")
129 .Filter("goodlep_sumtypes == 44 || goodlep_sumtypes == 52 || goodlep_sumtypes == 48")
130 // Apply additional cuts depending on lepton flavour
131 .Filter(
132 "GoodElectronsAndMuons(lep_type[good_lep], lep_pt[good_lep], lep_eta[good_lep], lep_phi[good_lep], "
133 "lep_E[good_lep], lep_trackd0pvunbiased[good_lep], lep_tracksigd0pvunbiased[good_lep], lep_z0[good_lep])")
134 // Create new columns with the kinematics of good leptons
135 .Define("goodlep_pt", "lep_pt[good_lep]")
136 .Define("goodlep_eta", "lep_eta[good_lep]")
137 .Define("goodlep_phi", "lep_phi[good_lep]")
138 .Define("goodlep_E", "lep_E[good_lep]")
139 .Define("goodlep_type", "lep_type[good_lep]")
140 // Select leptons with high transverse momentum
141 .Filter("goodlep_pt[0] > 25000 && goodlep_pt[1] > 15000 && goodlep_pt[2] > 10000")
142 // Compute invariant mass
143 .Define("m4l", "ComputeInvariantMass(goodlep_pt, goodlep_eta, goodlep_phi, goodlep_E)")
144 // Reweighting of the samples is different for "data" and "MC"
145 .DefinePerSample("reweighting", [](unsigned int slot, const RSampleInfo &id) { return id.Contains("mc"); });
146
147 // Define the weight column (scale factor) for the MC samples
148 auto df_mc = df_analysis.Filter("reweighting == true")
149 .Define("weight", ("scaleFactor_ELE * scaleFactor_MUON * scaleFactor_LepTRIGGER * "
150 "scaleFactor_PILEUP * mcWeight * scale * xsecs / sumws * lumi"));
151
152 // Book histograms for individual MC samples
153 auto df_higgs = df_mc.Filter(R"(sample_category == "higgs")")
154 .Histo1D<float>(ROOT::RDF::TH1DModel("higgs", "m4l", 24, 80, 170), "m4l", "weight");
155 auto df_zz = df_mc.Filter("sample_category == \"zz\"")
156 .Histo1D<float>(ROOT::RDF::TH1DModel("zz", "m4l", 24, 80, 170), "m4l", "weight");
157 auto df_other = df_mc.Filter("sample_category == \"other\"")
158 .Histo1D<float>(ROOT::RDF::TH1DModel("other", "m4l", 24, 80, 170), "m4l", "weight");
159
160 // Book the invariant mass histogram for the data
161 auto df_h_mass_data = df_analysis.Filter("reweighting == false")
162 .Filter("sample_category == \"data\"")
163 .Define("weight_", []() { return 1; })
164 .Histo1D<float>(ROOT::RDF::TH1DModel("data", "m4l", 24, 80, 170), "m4l", "weight_");
165
166 // Evaluate the systematic uncertainty
167
168 // The systematic uncertainty in this analysis is the MC scale factor uncertainty that depends on lepton
169 // kinematics such as pT or pseudorapidity.
170 // Muons uncertainties are negligible, as stated in https://atlas.web.cern.ch/Atlas/GROUPS/PHYSICS/PAPERS/MUON
171 // Electrons uncertainties are evaluated based on the plots available in https://doi.org/10.48550/arXiv.1908.0
172 // The uncertainties are linearly interpolated, using the `TGraph::Eval()` method, to cover a range of pT values
173 // covered by the analysis.
174 const std::vector<double> x{5.50e3, 5.52e3, 12.54e3, 17.43e3, 22.40e3, 27.48e3, 30e3, 10000e3};
175 const std::vector<double> y{0.06628, 0.06395, 0.06396, 0.03372, 0.02441, 0.01403, 0, 0};
176 TGraph graph(x.size(), x.data(), y.data());
177
178 // Use the Vary method to add the systematic variations to the total MC scale factor ("weight") of the analysis
179 // The input consists of the input column to be varied and the lambda function to compute the systematic variations.
180 // The new output columns contain the varied values of the input column.
181 auto df_with_variations_mc =
182 df_mc
183 .Vary("weight",
184 [&graph](double x, const RVecF &pt, const RVec<unsigned int> &type) {
185 const auto v = Mean(Map(pt[type == 11], [&graph](auto p) { return graph.Eval(p); }));
186 return RVec<double>{(1 + v) * x, (1 - v) * x};
187 },
188 {"weight", "goodlep_pt", "goodlep_type"}, {"up", "down"})
189 .Histo1D<float>(ROOT::RDF::TH1DModel("Invariant Mass", "m4l", 24, 80, 170), "m4l", "weight");
190
191 // Create the total MC scale factor histograms: "nominal", "weight:up" and "weight:down".
192 auto histos_mc = VariationsFor(df_with_variations_mc);
193
194 // Evaluate the total MC uncertainty based on the variations. Note, in this case the uncertainties are symmetric.
195 for (unsigned int i = 0; i < histos_mc["nominal"].GetXaxis()->GetNbins(); i++) {
196 histos_mc["nominal"].SetBinError(
197 i, (histos_mc["weight:up"].GetBinContent(i) - histos_mc["nominal"].GetBinContent(i)));
198 }
199
200 // Make the plot of the data, individual MC contributions and the total MC scale factor systematic variations.
201 gROOT->SetStyle("ATLAS");
202
203 // Create canvas with pad
204 auto c = new TCanvas("c", " ", 600, 600);
205 auto pad = new TPad("upper_pad", "", 0, 0, 1, 1);
206 pad->SetTickx(0);
207 pad->SetTicky(0);
208 pad->Draw();
209 pad->cd();
210
211 // Draw stack with MC contributions
212 // Draw cloned histograms to preserve graphics when original objects goes out of scope
213 df_other->SetFillColor(kViolet - 9);
214 df_zz->SetFillColor(kAzure - 9);
215 df_higgs->SetFillColor(kRed + 2);
216
217 auto stack = new THStack("stack", "");
218 auto h_other = static_cast<TH1 *>(df_other->Clone());
219 stack->Add(h_other);
220 auto h_zz = static_cast<TH1 *>(df_zz->Clone());
221 stack->Add(h_zz);
222 auto h_higgs = static_cast<TH1 *>(df_higgs->Clone());
223 stack->Add(h_higgs);
224 stack->Draw("HIST");
225
226 // stack histogram can be accessed only after drawing
227 stack->GetHistogram()->SetTitle("");
228 stack->GetHistogram()->GetXaxis()->SetLabelSize(0.035);
229 stack->GetHistogram()->GetXaxis()->SetTitleSize(0.045);
230 stack->GetHistogram()->GetXaxis()->SetTitleOffset(1.3);
231 stack->GetHistogram()->GetXaxis()->SetTitle("m_{4l}^{H#rightarrow ZZ} [GeV]");
232 stack->GetHistogram()->GetYaxis()->SetLabelSize(0.035);
233 stack->GetHistogram()->GetYaxis()->SetTitleSize(0.045);
234 stack->GetHistogram()->GetYaxis()->SetTitle("Events");
235 stack->SetMaximum(35);
236 stack->GetHistogram()->GetYaxis()->ChangeLabel(1, -1, 0);
237
238 // Draw MC scale factor and variations
239 histos_mc["nominal"].SetFillColor(kBlack);
240 histos_mc["nominal"].SetFillStyle(3254);
241 auto h_nominal = histos_mc["nominal"].DrawClone("E2 same");
242 histos_mc["weight:up"].SetLineColor(kGreen + 2);
243 auto h_weight_up = histos_mc["weight:up"].DrawClone("HIST same");
244 histos_mc["weight:down"].SetLineColor(kBlue + 2);
245 auto h_weight_down = histos_mc["weight:down"].DrawClone("HIST same");
246
247 // Draw data histogram
248 df_h_mass_data->SetMarkerStyle(20);
249 df_h_mass_data->SetMarkerSize(1.);
250 df_h_mass_data->SetLineWidth(2);
251 df_h_mass_data->SetLineColor(kBlack);
252 df_h_mass_data->SetStats(false);
253 auto h_mass_data = df_h_mass_data->DrawClone("E sames");
254
255 // Add legend
256 auto legend = new TLegend(0.57, 0.65, 0.94, 0.94);
257 legend->SetTextFont(42);
258 legend->SetFillStyle(0);
259 legend->SetBorderSize(0);
260 legend->SetTextSize(0.025);
261 legend->SetTextAlign(32);
262 legend->AddEntry(h_mass_data, "Data", "lep");
263 legend->AddEntry(h_higgs, "Higgs MC", "f");
264 legend->AddEntry(h_zz, "ZZ MC", "f");
265 legend->AddEntry(h_other, "Other MC", "f");
266 legend->AddEntry(h_weight_down, "Total MC Variations Down", "l");
267 legend->AddEntry(h_weight_up, "Total MC Variations Up", "l");
268 legend->AddEntry(h_nominal, "Total MC Uncertainty", "f");
269 legend->Draw();
270
271 // Add ATLAS label
272 TLatex atlas_label;
273 atlas_label.SetTextFont(70);
274 atlas_label.SetTextSize(0.04);
275 atlas_label.DrawLatexNDC(0.19, 0.85, "ATLAS");
276 TLatex data_label;
277 data_label.SetTextFont(42);
278 data_label.DrawLatexNDC(0.19 + 0.13, 0.85, "Open Data");
279 TLatex header;
280 data_label.SetTextFont(42);
281 header.SetTextSize(0.035);
282 header.DrawLatexNDC(0.21, 0.8, "#sqrt{s} = 13 TeV, 10 fb^{-1}");
283
284 // Save the plot.
285 c->SaveAs("df106_HiggsToFourLeptons_cpp.png");
286 std::cout << "Saved figure to df106_HiggsToFourLeptons_cpp.png" << std::endl;
287}
288
289int main()
290{
292}
int main()
Definition Prototype.cxx:12
#define c(i)
Definition RSha256.hxx:101
#define e(i)
Definition RSha256.hxx:103
@ kRed
Definition Rtypes.h:66
@ kBlack
Definition Rtypes.h:65
@ kGreen
Definition Rtypes.h:66
@ kBlue
Definition Rtypes.h:66
@ kAzure
Definition Rtypes.h:67
@ kViolet
Definition Rtypes.h:67
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
#define gInterpreter
#define gROOT
Definition TROOT.h:406
Class describing a generic LorentzVector in the 4D space-time, using the specified coordinate system ...
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, F expression)
Define a new column that is updated when the input sample changes.
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
virtual void SetTextFont(Font_t tfont=62)
Set the text font.
Definition TAttText.h:46
virtual void SetTextSize(Float_t tsize=1)
Set the text size.
Definition TAttText.h:47
The Canvas class.
Definition TCanvas.h:23
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:59
virtual Bool_t Add(TF1 *h1, Double_t c1=1, Option_t *option="")
Performs the operation: this = this + c1*f1 if errors are defined (see TH1::Sumw2),...
Definition TH1.cxx:826
The Histogram stack class.
Definition THStack.h:40
To draw Mathematical Formula.
Definition TLatex.h:18
TLatex * DrawLatexNDC(Double_t x, Double_t y, const char *text)
Draw this TLatex with new coordinates in NDC.
Definition TLatex.cxx:1957
This class displays a legend box (TPaveText) containing several legend entries.
Definition TLegend.h:23
virtual void SaveAs(const char *filename="", Option_t *option="") const
Save this object in the file specified by filename.
Definition TObject.cxx:692
The most important graphics class in the ROOT system.
Definition TPad.h:28
TPaveText * pt
RVec< PromoteType< T > > abs(const RVec< T > &v)
Definition RVec.hxx:1832
auto Map(Args &&... args)
Create new collection applying a callable to the elements of the input collection.
Definition RVec.hxx:2150
RVec< PromoteType< T > > sin(const RVec< T > &v)
Definition RVec.hxx:1851
RVec< T > Filter(const RVec< T > &v, F &&f)
Create a new collection with the elements passing the filter expressed by the predicate.
Definition RVec.hxx:2182
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
ROOT::RDataFrame FromSpec(const std::string &jsonFile)
Factory method to create an RDataFrame from a JSON specification file.
RResultMap< T > VariationsFor(RResultPtr< T > resPtr)
Produce all required systematic variations for the given result.
void AddProgressBar(ROOT::RDF::RNode df)
Add ProgressBar to a ROOT::RDF::RNode.
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition TROOT.cxx:539
Double_t Mean(Long64_t n, const T *a, const Double_t *w=nullptr)
Returns the weighted mean of an array a with length n.
Definition TMath.h:1089
Definition graph.py:1
A struct which stores the parameters of a TH1D.