Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df105_WBosonAnalysis.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_dataframe
3## \notebook -draw
4## The W boson mass analysis from the ATLAS Open Data release of 2020, with RDataFrame.
5##
6## This tutorial is the analysis of the W boson mass taken from the ATLAS Open Data release in 2020
7## (http://opendata.atlas.cern/release/2020/documentation/). The data was recorded with the ATLAS detector
8## during 2016 at a center-of-mass energy of 13 TeV. W bosons are produced frequently at the LHC and
9## are an important background to studies of Standard Model processes, for example the Higgs boson analyses.
10##
11## The analysis is translated to a RDataFrame workflow processing up to 60 GB of simulated events and data.
12## By default the analysis runs on a preskimmed dataset to reduce the runtime. The full dataset can be used with
13## the --full-dataset argument and you can also run only on a fraction of the original dataset using the argument --lumi-scale.
14##
15## See the [corresponding spec json file](https://github.com/root-project/root/blob/master/tutorials/dataframe/df105_WBosonAnalysis.json).
16##
17## \macro_image
18## \macro_code
19## \macro_output
20##
21## \date March 2020
22## \author Stefan Wunsch (KIT, CERN)
23
24import ROOT
25import sys
26import json
27import argparse
28import os
29
30# Argument parsing
31parser = argparse.ArgumentParser()
32parser.add_argument("--lumi-scale", type=float, default=0.001,
33 help="Run only on a fraction of the total available 10 fb^-1 (only usable together with --full-dataset)")
34parser.add_argument("--full-dataset", action="store_true", default=False,
35 help="Use the full dataset (use --lumi-scale to run only on a fraction of it)")
36parser.add_argument("-b", action="store_true", default=False, help="Use ROOT batch mode")
37parser.add_argument("-t", action="store_true", default=False, help="Use implicit multi threading (for the full dataset only possible with --lumi-scale 1.0)")
38if 'df105_WBosonAnalysis.py' in sys.argv[0]:
39 # Script
40 args = parser.parse_args()
41else:
42 # Notebook
43 args = parser.parse_args(args=[])
44
45if args.b: ROOT.gROOT.SetBatch(True)
46if args.t: ROOT.EnableImplicitMT()
47
48if not args.full_dataset: lumi_scale = 0.001 # The preskimmed dataset contains only 0.01 fb^-1
49else: lumi_scale = args.lumi_scale
50lumi = 10064.0
51print('Run on data corresponding to {:.2f} fb^-1 ...'.format(lumi * lumi_scale / 1000.0))
52
53if args.full_dataset: dataset_path = "root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets/2020-01-22"
54else: dataset_path = "root://eospublic.cern.ch//eos/root-eos/reduced_atlas_opendata/w"
55
56# Create a ROOT dataframe for each dataset
57# Note that we load the filenames from the external json file placed in the same folder than this script.
58files = json.load(open(os.path.join(ROOT.gROOT.GetTutorialsDir(), "dataframe/df105_WBosonAnalysis.json")))
59processes = files.keys()
60df = {}
61xsecs = {}
62sumws = {}
63samples = []
64for p in processes:
65 for d in files[p]:
66 # Construct the dataframes
67 folder = d[0] # Folder name
68 sample = d[1] # Sample name
69 xsecs[sample] = d[2] # Cross-section
70 sumws[sample] = d[3] # Sum of weights
71 num_events = d[4] # Number of events
72 samples.append(sample)
73 df[sample] = ROOT.RDataFrame("mini", "{}/1lep/{}/{}.1lep.root".format(dataset_path, folder, sample))
74
75 # Scale down the datasets if requested
76 if args.full_dataset and lumi_scale < 1.0:
77 df[sample] = df[sample].Range(int(num_events * lumi_scale))
78
79# Select events for the analysis
80
81# Just-in-time compile custom helper function performing complex computations
82ROOT.gInterpreter.Declare("""
83bool GoodElectronOrMuon(int type, float pt, float eta, float phi, float e, float trackd0pv, float tracksigd0pv, float z0)
84{
85 ROOT::Math::PtEtaPhiEVector p(pt / 1000.0, eta, phi, e / 1000.0);
86 if (abs(z0 * sin(p.theta())) > 0.5) return false;
87 if (type == 11 && abs(eta) < 2.46 && (abs(eta) < 1.37 || abs(eta) > 1.52)) {
88 if (abs(trackd0pv / tracksigd0pv) > 5) return false;
89 return true;
90 }
91 if (type == 13 && abs(eta) < 2.5) {
92 if (abs(trackd0pv / tracksigd0pv) > 3) return false;
93 return true;
94 }
95 return false;
96}
97""")
98
99for s in samples:
100 # Select events with a muon or electron trigger and with a missing transverse energy larger than 30 GeV
101 df[s] = df[s].Filter("trigE || trigM")\
102 .Filter("met_et > 30000")
103
104 # Find events with exactly one good lepton
105 df[s] = df[s].Define("good_lep", "lep_isTightID && lep_pt > 35000 && lep_ptcone30 / lep_pt < 0.1 && lep_etcone20 / lep_pt < 0.1")\
106 .Filter("ROOT::VecOps::Sum(good_lep) == 1")
107
108 # Apply additional cuts in case the lepton is an electron or muon
109 df[s] = df[s].Define("idx", "ROOT::VecOps::ArgMax(good_lep)")\
110 .Filter("GoodElectronOrMuon(lep_type[idx], lep_pt[idx], lep_eta[idx], lep_phi[idx], lep_E[idx], lep_trackd0pvunbiased[idx], lep_tracksigd0pvunbiased[idx], lep_z0[idx])")
111
112# Apply luminosity, scale factors and MC weights for simulated events
113for s in samples:
114 if "data" in s:
115 df[s] = df[s].Define("weight", "1.0")
116 else:
117 df[s] = df[s].Define("weight", "scaleFactor_ELE * scaleFactor_MUON * scaleFactor_LepTRIGGER * scaleFactor_PILEUP * mcWeight * {} / {} * {}".format(xsecs[s], sumws[s], lumi))
118
119# Compute transverse mass of the W boson using the lepton and the missing transverse energy and make a histogram
120ROOT.gInterpreter.Declare("""
121float ComputeTransverseMass(float met_et, float met_phi, float lep_pt, float lep_eta, float lep_phi, float lep_e)
122{
123 ROOT::Math::PtEtaPhiEVector met(met_et, 0, met_phi, met_et);
124 ROOT::Math::PtEtaPhiEVector lep(lep_pt, lep_eta, lep_phi, lep_e);
125 return (met + lep).Mt() / 1000.0;
126}
127""")
128
129histos = {}
130for s in samples:
131 df[s] = df[s].Define("mt_w", "ComputeTransverseMass(met_et, met_phi, lep_pt[idx], lep_eta[idx], lep_phi[idx], lep_E[idx])")
132 histos[s] = df[s].Histo1D(ROOT.RDF.TH1DModel(s, "mt_w", 24, 60, 180), "mt_w", "weight")
133
134# Run the event loop and merge histograms of the respective processes
135
136# RunGraphs allows to run the event loops of the separate RDataFrame graphs
137# concurrently. This results in an improved usage of the available resources
138# if each separate RDataFrame can not utilize all available resources, e.g.,
139# because not enough data is available.
140ROOT.RDF.RunGraphs([histos[s] for s in samples])
141
142def merge_histos(label):
143 h = None
144 for i, d in enumerate(files[label]):
145 t = histos[d[1]].GetValue()
146 if i == 0: h = t.Clone()
147 else: h.Add(t)
148 h.SetNameTitle(label, label)
149 return h
150
151data = merge_histos("data")
152wjets = merge_histos("wjets")
153zjets = merge_histos("zjets")
154ttbar = merge_histos("ttbar")
155diboson = merge_histos("diboson")
156singletop = merge_histos("singletop")
157
158# Create the plot
159
160# Set styles
161ROOT.gROOT.SetStyle("ATLAS")
162
163# Create canvas
164c = ROOT.TCanvas("c", "", 600, 600)
165c.SetTickx(0)
166c.SetTicky(0)
167c.SetLogy()
168
169# Draw stack with MC contributions
170stack = ROOT.THStack()
171for h, color in zip(
172 [singletop, diboson, ttbar, zjets, wjets],
173 [(208, 240, 193), (195, 138, 145), (155, 152, 204), (248, 206, 104), (222, 90, 106)]):
174 h.SetLineWidth(1)
175 h.SetLineColor(1)
176 h.SetFillColor(ROOT.TColor.GetColor(*color))
177 stack.Add(h)
178stack.Draw("HIST")
179stack.GetXaxis().SetLabelSize(0.04)
180stack.GetXaxis().SetTitleSize(0.045)
181stack.GetXaxis().SetTitleOffset(1.3)
182stack.GetXaxis().SetTitle("m_{T}^{W#rightarrow l#nu} [GeV]")
183stack.GetYaxis().SetTitle("Events")
184stack.GetYaxis().SetLabelSize(0.04)
185stack.GetYaxis().SetTitleSize(0.045)
186stack.SetMaximum(1e10 * args.lumi_scale)
187stack.SetMinimum(1)
188
189# Draw data
190data.SetMarkerStyle(20)
191data.SetMarkerSize(1.2)
192data.SetLineWidth(2)
193data.SetLineColor(ROOT.kBlack)
194data.Draw("E SAME")
195
196# Add legend
197legend = ROOT.TLegend(0.60, 0.65, 0.92, 0.92)
198legend.SetTextFont(42)
199legend.SetFillStyle(0)
200legend.SetBorderSize(0)
201legend.SetTextSize(0.04)
202legend.SetTextAlign(32)
203legend.AddEntry(data, "Data" ,"lep")
204legend.AddEntry(wjets, "W+jets", "f")
205legend.AddEntry(zjets, "Z+jets", "f")
206legend.AddEntry(ttbar, "t#bar{t}", "f")
207legend.AddEntry(diboson, "Diboson", "f")
208legend.AddEntry(singletop, "Single top", "f")
209legend.Draw()
210
211# Add ATLAS label
212text = ROOT.TLatex()
213text.SetNDC()
214text.SetTextFont(72)
215text.SetTextSize(0.045)
216text.DrawLatex(0.21, 0.86, "ATLAS")
217text.SetTextFont(42)
218text.DrawLatex(0.21 + 0.16, 0.86, "Open Data")
219text.SetTextSize(0.04)
220text.DrawLatex(0.21, 0.80, "#sqrt{{s}} = 13 TeV, {:.2f} fb^{{-1}}".format(lumi * args.lumi_scale / 1000.0))
221
222# Save the plot
223c.SaveAs("df105_WBosonAnalysis.png")
224print("Saved figure to df105_WBosonAnalysis.png")
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
unsigned int RunGraphs(std::vector< RResultHandle > handles)
Trigger the event loop of multiple RDataFrames concurrently.
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition TROOT.cxx:537
A struct which stores the parameters of a TH1D.
Ta Range(0, 0, 1, 1)