Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df106_HiggsToFourLeptons.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_dataframe
3## \notebook -draw
4## The Higgs to four lepton analysis from the ATLAS Open Data release of 2020, with RDataFrame.
5##
6## This tutorial is the Higgs to four lepton analysis from the ATLAS Open Data release in 2020
7## (http://opendata.atlas.cern/release/2020/documentation/). The data was taken with the ATLAS detector
8## during 2016 at a center-of-mass energy of 13 TeV. The decay of the Standard Model Higgs boson
9## to two Z bosons and subsequently to four leptons is called the "golden channel". The selection leads
10## to a narrow invariant mass peak on top a relatively smooth and small background, revealing
11## the Higgs at 125 GeV.
12##
13## The analysis is translated to a RDataFrame workflow processing about 300 MB of simulated events and data.
14##
15## \macro_image
16## \macro_code
17## \macro_output
18##
19## \date March 2020
20## \author Stefan Wunsch (KIT, CERN)
21
22import ROOT
23import json
24import os
25
26# Create a ROOT dataframe for each dataset
27# Note that we load the filenames from the external json file placed in the same folder than this script.
28path = "root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets/2020-01-22"
29files = json.load(open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "df106_HiggsToFourLeptons.json")))
30processes = files.keys()
31df = {}
32xsecs = {}
33sumws = {}
34samples = []
35for p in processes:
36 for d in files[p]:
37 # Construct the dataframes
38 folder = d[0] # Folder name
39 sample = d[1] # Sample name
40 xsecs[sample] = d[2] # Cross-section
41 sumws[sample] = d[3] # Sum of weights
42 samples.append(sample)
43 df[sample] = ROOT.RDataFrame("mini", "{}/4lep/{}/{}.4lep.root".format(path, folder, sample))
44
45# Select events for the analysis
46ROOT.gInterpreter.Declare("""
47using VecF_t = const ROOT::RVec<float>&;
48using VecI_t = const ROOT::RVec<int>&;
49bool GoodElectronsAndMuons(VecI_t type, VecF_t pt, VecF_t eta, VecF_t phi, VecF_t e, VecF_t trackd0pv, VecF_t tracksigd0pv, VecF_t z0)
50{
51 for (size_t i = 0; i < type.size(); i++) {
52 ROOT::Math::PtEtaPhiEVector p(pt[i] / 1000.0, eta[i], phi[i], e[i] / 1000.0);
53 if (type[i] == 11) {
54 if (pt[i] < 7000 || abs(eta[i]) > 2.47 || abs(trackd0pv[i] / tracksigd0pv[i]) > 5 || abs(z0[i] * sin(p.Theta())) > 0.5) return false;
55 } else {
56 if (abs(trackd0pv[i] / tracksigd0pv[i]) > 5 || abs(z0[i] * sin(p.Theta())) > 0.5) return false;
57 }
58 }
59 return true;
60}
61""")
62
63for s in samples:
64 # Select electron or muon trigger
65 df[s] = df[s].Filter("trigE || trigM")
66
67 # Select events with exactly four good leptons conserving charge and lepton numbers
68 # Note that all collections are RVecs and good_lep is the mask for the good leptons.
69 # The lepton types are PDG numbers and set to 11 or 13 for an electron or muon
70 # irrespective of the charge.
71 df[s] = df[s].Define("good_lep", "abs(lep_eta) < 2.5 && lep_pt > 5000 && lep_ptcone30 / lep_pt < 0.3 && lep_etcone20 / lep_pt < 0.3")\
72 .Filter("Sum(good_lep) == 4")\
73 .Filter("Sum(lep_charge[good_lep]) == 0")\
74 .Define("goodlep_sumtypes", "Sum(lep_type[good_lep])")\
75 .Filter("goodlep_sumtypes == 44 || goodlep_sumtypes == 52 || goodlep_sumtypes == 48")
76
77 # Apply additional cuts depending on lepton flavour
78 df[s] = df[s].Filter("GoodElectronsAndMuons(lep_type[good_lep], lep_pt[good_lep], lep_eta[good_lep], lep_phi[good_lep], lep_E[good_lep], lep_trackd0pvunbiased[good_lep], lep_tracksigd0pvunbiased[good_lep], lep_z0[good_lep])")
79
80 # Create new columns with the kinematics of good leptons
81 df[s] = df[s].Define("goodlep_pt", "lep_pt[good_lep]")\
82 .Define("goodlep_eta", "lep_eta[good_lep]")\
83 .Define("goodlep_phi", "lep_phi[good_lep]")\
84 .Define("goodlep_E", "lep_E[good_lep]")
85
86 # Select leptons with high transverse momentum
87 df[s] = df[s].Filter("goodlep_pt[0] > 25000 && goodlep_pt[1] > 15000 && goodlep_pt[2] > 10000")
88
89# Apply luminosity, scale factors and MC weights for simulated events
90lumi = 10064.0
91for s in samples:
92 if "data" in s:
93 df[s] = df[s].Define("weight", "1.0")
94 else:
95 df[s] = df[s].Define("weight", "scaleFactor_ELE * scaleFactor_MUON * scaleFactor_LepTRIGGER * scaleFactor_PILEUP * mcWeight * {} / {} * {}".format(xsecs[s], sumws[s], lumi))
96
97# Compute invariant mass of the four lepton system and make a histogram
98ROOT.gInterpreter.Declare("""
99float ComputeInvariantMass(VecF_t pt, VecF_t eta, VecF_t phi, VecF_t e)
100{
101 ROOT::Math::PtEtaPhiEVector p1(pt[0], eta[0], phi[0], e[0]);
102 ROOT::Math::PtEtaPhiEVector p2(pt[1], eta[1], phi[1], e[1]);
103 ROOT::Math::PtEtaPhiEVector p3(pt[2], eta[2], phi[2], e[2]);
104 ROOT::Math::PtEtaPhiEVector p4(pt[3], eta[3], phi[3], e[3]);
105 return (p1 + p2 + p3 + p4).M() / 1000;
106}
107""")
108
109histos = {}
110for s in samples:
111 df[s] = df[s].Define("m4l", "ComputeInvariantMass(goodlep_pt, goodlep_eta, goodlep_phi, goodlep_E)")
112 histos[s] = df[s].Histo1D(ROOT.RDF.TH1DModel(s, "m4l", 24, 80, 170), "m4l", "weight")
113
114# Run the event loop and merge histograms of the respective processes
115
116# RunGraphs allows to run the event loops of the separate RDataFrame graphs
117# concurrently. This results in an improved usage of the available resources
118# if each separate RDataFrame can not utilize all available resources, e.g.,
119# because not enough data is available.
120ROOT.RDF.RunGraphs([histos[s] for s in samples])
121
122def merge_histos(label):
123 h = None
124 for i, d in enumerate(files[label]):
125 t = histos[d[1]].GetValue()
126 if i == 0: h = t.Clone()
127 else: h.Add(t)
128 h.SetNameTitle(label, label)
129 return h
130
131data = merge_histos("data")
132higgs = merge_histos("higgs")
133zz = merge_histos("zz")
134other = merge_histos("other")
135
136# Apply MC correction for ZZ due to missing gg->ZZ process
137zz.Scale(1.3)
138
139# Create the plot
140
141# Set styles
142ROOT.gROOT.SetStyle("ATLAS")
143
144# Create canvas with pad
145c = ROOT.TCanvas("c", "", 600, 600)
146pad = ROOT.TPad("upper_pad", "", 0, 0, 1, 1)
147pad.SetTickx(False)
148pad.SetTicky(False)
149pad.Draw()
150pad.cd()
151
152# Draw stack with MC contributions
153stack = ROOT.THStack()
154for h, color in zip([other, zz, higgs], [(155, 152, 204), (100, 192, 232), (191, 34, 41)]):
155 h.SetLineWidth(1)
156 h.SetLineColor(1)
157 h.SetFillColor(ROOT.TColor.GetColor(*color))
158 stack.Add(h)
159stack.Draw("HIST")
160stack.GetXaxis().SetLabelSize(0.04)
161stack.GetXaxis().SetTitleSize(0.045)
162stack.GetXaxis().SetTitleOffset(1.3)
163stack.GetXaxis().SetTitle("m_{4l}^{H#rightarrow ZZ} [GeV]")
164stack.GetYaxis().SetTitle("Events")
165stack.GetYaxis().SetLabelSize(0.04)
166stack.GetYaxis().SetTitleSize(0.045)
167stack.SetMaximum(33)
168stack.GetYaxis().ChangeLabel(1, -1, 0)
169
170# Draw data
171data.SetMarkerStyle(20)
172data.SetMarkerSize(1.2)
173data.SetLineWidth(2)
174data.SetLineColor(ROOT.kBlack)
175data.Draw("E SAME")
176
177# Add legend
178legend = ROOT.TLegend(0.60, 0.65, 0.92, 0.92)
179legend.SetTextFont(42)
180legend.SetFillStyle(0)
181legend.SetBorderSize(0)
182legend.SetTextSize(0.04)
183legend.SetTextAlign(32)
184legend.AddEntry(data, "Data" ,"lep")
185legend.AddEntry(higgs, "Higgs", "f")
186legend.AddEntry(zz, "ZZ", "f")
187legend.AddEntry(other, "Other", "f")
188legend.Draw("SAME")
189
190# Add ATLAS label
191text = ROOT.TLatex()
192text.SetNDC()
193text.SetTextFont(72)
194text.SetTextSize(0.045)
195text.DrawLatex(0.21, 0.86, "ATLAS")
196text.SetTextFont(42)
197text.DrawLatex(0.21 + 0.16, 0.86, "Open Data")
198text.SetTextSize(0.04)
199text.DrawLatex(0.21, 0.80, "#sqrt{s} = 13 TeV, 10 fb^{-1}")
200
201# Save the plot
202c.SaveAs("df106_HiggsToFourLeptons.png")
203print("Saved figure to df106_HiggsToFourLeptons.png")
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
void RunGraphs(std::vector< RResultHandle > handles)
Trigger the event loop of multiple RDataFrames concurrently.
A struct which stores the parameters of a TH1D.