Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df105.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_rcanvas
3## The W boson mass analysis from the ATLAS Open Data release of 2020, with RDataFrame.
4##
5## This tutorial is the analysis of the W boson mass taken from the ATLAS Open Data release in 2020
6## (http://opendata.atlas.cern/release/2020/documentation/). The data was recorded with the ATLAS detector
7## during 2016 at a center-of-mass energy of 13 TeV. W bosons are produced frequently at the LHC and
8## are an important background to studies of Standard Model processes, for example the Higgs boson analyses.
9##
10## The analysis is translated to a RDataFrame workflow processing up to 60 GB of simulated events and data.
11## By default the analysis runs on a preskimmed dataset to reduce the runtime. The full dataset can be used with
12## the --full-dataset argument and you can also run only on a fraction of the original dataset using the argument --lumi-scale.
13##
14## This macro is replica of tutorials/analysis/dataframe/df105_WBosonAnalysis.py, but with usage of ROOT7 graphics
15## Run macro with python3 -i df105.py command to get interactive canvas
16##
17## \macro_image (rcanvas_js)
18## \macro_code
19##
20## \date March 2020
21## \authors Stefan Wunsch (KIT, CERN) Sergey Linev (GSI)
22
23import ROOT
24import sys
25import json
26import argparse
27import os
28from ROOT.Experimental import RCanvas, RText, RAttrText, RAttrFont, RPadPos, TObjectDrawable
29
30# Argument parsing
33 "--lumi-scale",
34 type=float,
35 default=0.001,
36 help="Run only on a fraction of the total available 10 fb^-1 (only usable together with --full-dataset)",
37)
39 "--full-dataset",
40 action="store_true",
41 default=False,
42 help="Use the full dataset (use --lumi-scale to run only on a fraction of it)",
43)
44parser.add_argument("-b", action="store_true", default=False, help="Use ROOT batch mode")
46 "-t",
47 action="store_true",
48 default=False,
49 help="Use implicit multi threading (for the full dataset only possible with --lumi-scale 1.0)",
50)
51if "df105.py" in sys.argv[0]:
52 # Script
53 args = parser.parse_args()
54else:
55 # Notebook
56 args = parser.parse_args(args=[])
57
58if args.b:
60if args.t:
62
64 lumi_scale = 0.001 # The preskimmed dataset contains only 0.01 fb^-1
65else:
66 lumi_scale = args.lumi_scale
67lumi = 10064.0
68print("Run on data corresponding to {:.2f} fb^-1 ...".format(lumi * lumi_scale / 1000.0))
69
71 dataset_path = "root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets/2020-01-22"
72else:
73 dataset_path = "root://eospublic.cern.ch//eos/root-eos/reduced_atlas_opendata/w"
74
75# Create a ROOT dataframe for each dataset
76# Note that we load the filenames from the external json file placed in the same folder than this script.
77files = json.load(open(os.path.join(ROOT.gROOT.GetTutorialsDir(), "analysis/dataframe/df105_WBosonAnalysis.json")))
78processes = files.keys()
79df = {}
80xsecs = {}
81sumws = {}
82samples = []
83for p in processes:
84 for d in files[p]:
85 # Construct the dataframes
86 folder = d[0] # Folder name
87 sample = d[1] # Sample name
88 xsecs[sample] = d[2] # Cross-section
89 sumws[sample] = d[3] # Sum of weights
90 num_events = d[4] # Number of events
91 samples.append(sample)
92 df[sample] = ROOT.RDataFrame("mini", "{}/1lep/{}/{}.1lep.root".format(dataset_path, folder, sample))
93
94 # Scale down the datasets if requested
95 if args.full_dataset and lumi_scale < 1.0:
96 df[sample] = df[sample].Range(int(num_events * lumi_scale))
97
98# Select events for the analysis
99
100# Just-in-time compile custom helper function performing complex computations
102bool GoodElectronOrMuon(int type, float pt, float eta, float phi, float e, float trackd0pv, float tracksigd0pv, float z0)
103{
104 ROOT::Math::PtEtaPhiEVector p(pt / 1000.0, eta, phi, e / 1000.0);
105 if (abs(z0 * sin(p.theta())) > 0.5) return false;
106 if (type == 11 && abs(eta) < 2.46 && (abs(eta) < 1.37 || abs(eta) > 1.52)) {
107 if (abs(trackd0pv / tracksigd0pv) > 5) return false;
108 return true;
109 }
110 if (type == 13 && abs(eta) < 2.5) {
111 if (abs(trackd0pv / tracksigd0pv) > 3) return false;
112 return true;
113 }
114 return false;
115}
116""")
117
118for s in samples:
119 # Select events with a muon or electron trigger and with a missing transverse energy larger than 30 GeV
120 df[s] = df[s].Filter("trigE || trigM").Filter("met_et > 30000")
121
122 # Find events with exactly one good lepton
123 df[s] = (
124 df[s]
125 .Define(
126 "good_lep", "lep_isTightID && lep_pt > 35000 && lep_ptcone30 / lep_pt < 0.1 && lep_etcone20 / lep_pt < 0.1"
127 )
128 .Filter("ROOT::VecOps::Sum(good_lep) == 1")
129 )
130
131 # Apply additional cuts in case the lepton is an electron or muon
132 df[s] = (
133 df[s]
134 .Define("idx", "ROOT::VecOps::ArgMax(good_lep)")
135 .Filter(
136 "GoodElectronOrMuon(lep_type[idx], lep_pt[idx], lep_eta[idx], lep_phi[idx], lep_E[idx], lep_trackd0pvunbiased[idx], lep_tracksigd0pvunbiased[idx], lep_z0[idx])"
137 )
138 )
139
140# Apply luminosity, scale factors and MC weights for simulated events
141for s in samples:
142 if "data" in s:
143 df[s] = df[s].Define("weight", "1.0")
144 else:
145 df[s] = df[s].Define(
146 "weight",
147 "scaleFactor_ELE * scaleFactor_MUON * scaleFactor_LepTRIGGER * scaleFactor_PILEUP * mcWeight * {} / {} * {}".format(
148 xsecs[s], sumws[s], lumi
149 ),
150 )
151
152# Compute transverse mass of the W boson using the lepton and the missing transverse energy and make a histogram
154float ComputeTransverseMass(float met_et, float met_phi, float lep_pt, float lep_eta, float lep_phi, float lep_e)
155{
156 ROOT::Math::PtEtaPhiEVector met(met_et, 0, met_phi, met_et);
157 ROOT::Math::PtEtaPhiEVector lep(lep_pt, lep_eta, lep_phi, lep_e);
158 return (met + lep).Mt() / 1000.0;
159}
160""")
161
162histos = {}
163for s in samples:
164 df[s] = df[s].Define(
165 "mt_w", "ComputeTransverseMass(met_et, met_phi, lep_pt[idx], lep_eta[idx], lep_phi[idx], lep_E[idx])"
166 )
167 histos[s] = df[s].Histo1D(ROOT.RDF.TH1DModel(s, "mt_w", 24, 60, 180), "mt_w", "weight")
168
169# Run the event loop and merge histograms of the respective processes
170
171# RunGraphs allows to run the event loops of the separate RDataFrame graphs
172# concurrently. This results in an improved usage of the available resources
173# if each separate RDataFrame can not utilize all available resources, e.g.,
174# because not enough data is available.
175ROOT.RDF.RunGraphs([histos[s] for s in samples])
176
177
178def merge_histos(label):
179 h = None
180 for i, d in enumerate(files[label]):
181 t = histos[d[1]].GetValue()
182 if i == 0:
183 h = t.Clone()
184 else:
185 h.Add(t)
186 h.SetNameTitle(label, label)
187 return h
188
189
190data = merge_histos("data")
191wjets = merge_histos("wjets")
192zjets = merge_histos("zjets")
193ttbar = merge_histos("ttbar")
194diboson = merge_histos("diboson")
195singletop = merge_histos("singletop")
196
197# Create the plot
198
199# Set styles
200ROOT.gROOT.SetStyle("ATLAS")
201
202# Create canvas and configure frame with axis attributes
203c = RCanvas.Create("df105_WBosonAnalysis")
204frame = c.AddFrame()
209# c.SetTickx(0)
210# c.SetTicky(0)
211
212frame.x.min = 60
213frame.x.max = 180
214frame.x.title.value = "m_{T}^{W#rightarrow l#nu} [GeV]"
215frame.x.title.size = 0.045
218
219frame.y.min = 1
221frame.y.log = 10
222frame.y.title.value = "Events"
223frame.y.title.size = 0.045
225
226# instruct RFrame to draw axes
227frame.drawAxes = True
228
229# Draw stack with MC contributions
230stack = ROOT.THStack()
231for h, color in zip(
232 [singletop, diboson, ttbar, zjets, wjets],
233 [(0.82, 0.94, 0.76), (0.76, 0.54, 0.57), (0.61, 0.6, 0.8), (0.97, 0.81, 0.41), (0.87, 0.35, 0.42)],
234):
236 h.SetLineColor("black")
238 # From Python 3.11 onward, implicit conversion of the tuple works too:
239 # h.SetFillColor(color)
240 stack.Add(h)
241c.Add[TObjectDrawable]().Set(stack, "HIST SAME")
242
243# Draw data
247data.SetLineColor("black")
248c.Add[TObjectDrawable]().Set(data, "E SAME")
249
250# Add TLegend while histograms packed in the THStack
251legend = ROOT.TLegend(0.60, 0.65, 0.92, 0.92)
257legend.AddEntry(data, "Data", "lep")
258legend.AddEntry(wjets, "W+jets", "f")
259legend.AddEntry(zjets, "Z+jets", "f")
260legend.AddEntry(ttbar, "t#bar{t}", "f")
261legend.AddEntry(diboson, "Diboson", "f")
262legend.AddEntry(singletop, "Single top", "f")
263c.Add[TObjectDrawable]().Set(legend)
264
265# Add ATLAS label
266lbl1 = c.Add[RText](RPadPos(0.05, 0.88), "ATLAS")
267lbl1.onFrame = True
269lbl1.text.size = 0.04
271lbl2 = c.Add[RText](RPadPos(0.05 + 0.20, 0.88), "Open Data")
272lbl2.onFrame = True
274lbl2.text.size = 0.04
276lbl3 = c.Add[RText](
277 RPadPos(0.05, 0.82), "#sqrt{{s}} = 13 TeV, {:.2f} fb^{{-1}}".format(lumi * args.lumi_scale / 1000.0)
278)
279lbl3.onFrame = True
281lbl3.text.size = 0.03
283
284# show canvas finally
285c.SetSize(600, 600)
286c.Show()
287
288# Save the plot
289if c.SaveAs("df105.png"):
290 print("Saved figure to df105.png")
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
A struct which stores some basic parameters of a TH1D.
Ta Range(0, 0, 1, 1)