Loading [MathJax]/extensions/tex2jax.js
Logo ROOT  
Reference Guide
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
ntpl004_dimuon.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_ntuple
3/// \notebook
4/// Convert CMS open data from a TTree to RNTuple.
5/// This tutorial illustrates data conversion and data processing with RNTuple and RDataFrame. In contrast to the
6/// LHCb open data tutorial, the data model in this tutorial is not tabular but entries have variable lengths vectors
7/// Based on RDataFrame's df102_NanoAODDimuonAnalysis.C
8///
9/// \macro_image
10/// \macro_code
11///
12/// \date April 2019
13/// \author The ROOT Team
14
15// NOTE: The RNTuple classes are experimental at this point.
16// Functionality, interface, and data format is still subject to changes.
17// Do not use for real data!
18
19#include <ROOT/RDataFrame.hxx>
20#include <ROOT/RNTuple.hxx>
21#include <ROOT/RNTupleDS.hxx>
22#include <ROOT/RVec.hxx>
23
24#include <TCanvas.h>
25#include <TH1D.h>
26#include <TLatex.h>
27#include <TStyle.h>
28
29#include <cassert>
30#include <cmath>
31#include <iostream>
32#include <memory>
33#include <string>
34#include <vector>
35#include <utility>
36
37// Import classes from experimental namespace for the time being
38using RNTupleReader = ROOT::Experimental::RNTupleReader;
39using RNTupleWriter = ROOT::Experimental::RNTupleWriter;
40using RNTupleDS = ROOT::Experimental::RNTupleDS;
41
42constexpr char const* kTreeFileName = "http://root.cern.ch/files/NanoAOD_DoubleMuon_CMS2011OpenData.root";
43constexpr char const* kNTupleFileName = "ntpl004_dimuon.root";
44
45
46using ColNames_t = std::vector<std::string>;
47
48// This is a custom action for RDataFrame. It does not support parallelism!
49// This action writes data from an RDataFrame entry into an ntuple. It is templated on the
50// types of the columns to be written and can be used as a generic file format converter.
51template <typename... ColumnTypes_t>
52class RNTupleHelper : public ROOT::Detail::RDF::RActionImpl<RNTupleHelper<ColumnTypes_t...>> {
53public:
54 using Result_t = RNTupleWriter;
55private:
56 using ColumnValues_t = std::tuple<std::shared_ptr<ColumnTypes_t>...>;
57
58 std::string fNTupleName;
59 std::string fRootFile;
60 ColNames_t fColNames;
61 ColumnValues_t fColumnValues;
62 static constexpr const auto fNColumns = std::tuple_size<ColumnValues_t>::value;
63 std::shared_ptr<RNTupleWriter> fNTuple;
64 int fCounter;
65
66 template<std::size_t... S>
67 void InitializeImpl(std::index_sequence<S...>) {
69 // Create the fields and the shared pointers to the connected values
70 std::initializer_list<int> expander{
71 (std::get<S>(fColumnValues) = eventModel->MakeField<ColumnTypes_t>(fColNames[S]), 0)...};
72 fNTuple = std::move(RNTupleWriter::Recreate(std::move(eventModel), fNTupleName, fRootFile));
73 }
74
75 template<std::size_t... S>
76 void ExecImpl(std::index_sequence<S...>, ColumnTypes_t... values) {
77 // For every entry, set the destination of the ntuple's default entry's shared pointers to the given values,
78 // which are provided by RDataFrame
79 std::initializer_list<int> expander{(*std::get<S>(fColumnValues) = values, 0)...};
80 }
81
82public:
83 RNTupleHelper(std::string_view ntupleName, std::string_view rootFile, const ColNames_t& colNames)
84 : fNTupleName(ntupleName), fRootFile(rootFile), fColNames(colNames)
85 {
86 InitializeImpl(std::make_index_sequence<fNColumns>());
87 }
88
89 RNTupleHelper(RNTupleHelper&&) = default;
90 RNTupleHelper(const RNTupleHelper&) = delete;
91 std::shared_ptr<RNTupleWriter> GetResultPtr() const { return fNTuple; }
92
93 void Initialize()
94 {
95 fCounter = 0;
96 }
97
98 void InitTask(TTreeReader *, unsigned int) {}
99
100 /// This is a method executed at every entry
101 void Exec(unsigned int slot, ColumnTypes_t... values)
102 {
103 // Populate the ntuple's fields data locations with the provided values, then write to disk
104 ExecImpl(std::make_index_sequence<fNColumns>(), values...);
105 fNTuple->Fill();
106 if (++fCounter % 100000 == 0)
107 std::cout << "Wrote " << fCounter << " entries" << std::endl;
108 }
109
110 void Finalize()
111 {
112 fNTuple->CommitCluster();
113 }
114
115 std::string GetActionName() { return "RNTuple Writer"; }
116};
117
118
119/// A wrapper for ROOT's InvariantMass function that takes std::vector instead of RVecs
120template <typename T>
121T InvariantMassStdVector(std::vector<T>& pt, std::vector<T>& eta, std::vector<T>& phi, std::vector<T>& mass)
122{
123 assert(pt.size() == 2 && eta.size() == 2 && phi.size() == 2 && mass.size() == 2);
124
125 // We adopt the memory here, no copy
126 ROOT::RVec<float> rvPt(pt);
127 ROOT::RVec<float> rvEta(eta);
128 ROOT::RVec<float> rvPhi(phi);
129 ROOT::RVec<float> rvMass(mass);
130
131 return InvariantMass(rvPt, rvEta, rvPhi, rvMass);
132}
133
134// We use an RDataFrame custom snapshotter to convert between TTree and RNTuple.
135// The snapshotter is templated; we construct the conversion C++ code as a string and hand it over to Cling
136void Convert() {
137 // Use df to list the branch types and names of the input tree
138 ROOT::RDataFrame df("Events", kTreeFileName);
139
140 // Construct the types for the template instantiation and the column names from the dataframe
141 std::string typeList = "<";
142 std::string columnList = "{";
143 auto columnNames = df.GetColumnNames();
144 for (auto name : columnNames) {
145 auto typeName = df.GetColumnType(name);
146 // Skip ULong64_t for the time being, RNTuple support will be added at a later point
147 if (typeName == "ULong64_t") continue;
148 columnList += "\"" + name + "\",";
149 typeList += typeName + ",";
150 }
151 *columnList.rbegin() = '}';
152 *typeList.rbegin() = '>';
153
154 std::string code = "{";
155 // Convert the first 4 million events
156 code += "auto df = std::make_unique<ROOT::RDataFrame>(\"Events\", \"" + std::string(kTreeFileName)
157 + "\")->Range(0, 4000000);";
158 code += "ColNames_t colNames = " + columnList + ";";
159 code += "RNTupleHelper" + typeList + " helper{\"Events\", \"" + std::string(kNTupleFileName) + "\", colNames};";
160 code += "*df.Book" + typeList + "(std::move(helper), colNames);";
161 code += "}";
162
163 gInterpreter->ProcessLine(code.c_str());
164}
165
166
167void ntpl004_dimuon() {
168 Convert();
169
170 // Enable mutli-threading only after the conversion because we use RDF's Range() in it,
171 // which currently does not support multi-threading
173
174 auto df = ROOT::Experimental::MakeNTupleDataFrame("Events", kNTupleFileName);
175
176 // As of this point, the tutorial is identical to df102_NanoAODDimuonAnalysis except the use of
177 // InvariantMassStdVector instead of InvariantMass
178
179 // For simplicity, select only events with exactly two muons and require opposite charge
180 auto df_2mu = df.Filter("nMuon == 2", "Events with exactly two muons");
181 auto df_os = df_2mu.Filter("Muon_charge[0] != Muon_charge[1]", "Muons with opposite charge");
182
183 // Compute invariant mass of the dimuon system
184 auto df_mass = df_os.Define("Dimuon_mass", InvariantMassStdVector<float>, {"Muon_pt", "Muon_eta", "Muon_phi", "Muon_mass"});
185 auto df_size = df_os.Define("eta_size", "Muon_mass.size()");
186
187 // Make histogram of dimuon mass spectrum
188 auto h = df_mass.Histo1D({"Dimuon_mass", "Dimuon_mass", 30000, 0.25, 300}, "Dimuon_mass");
189
190 // Request cut-flow report
191 auto report = df_mass.Report();
192
193 // Produce plot
195 auto c = new TCanvas("c", "", 800, 700);
196 c->SetLogx(); c->SetLogy();
197
198 h->SetTitle("");
199 h->GetXaxis()->SetTitle("m_{#mu#mu} (GeV)"); h->GetXaxis()->SetTitleSize(0.04);
200 h->GetYaxis()->SetTitle("N_{Events}"); h->GetYaxis()->SetTitleSize(0.04);
201 h->DrawCopy();
202
203 TLatex label; label.SetNDC(true);
204 label.DrawLatex(0.175, 0.740, "#eta");
205 label.DrawLatex(0.205, 0.775, "#rho,#omega");
206 label.DrawLatex(0.270, 0.740, "#phi");
207 label.DrawLatex(0.400, 0.800, "J/#psi");
208 label.DrawLatex(0.415, 0.670, "#psi'");
209 label.DrawLatex(0.485, 0.700, "Y(1,2,3S)");
210 label.DrawLatex(0.755, 0.680, "Z");
211 label.SetTextSize(0.040); label.DrawLatex(0.100, 0.920, "#bf{CMS Open Data}");
212 label.SetTextSize(0.030); label.DrawLatex(0.630, 0.920, "#sqrt{s} = 8 TeV, L_{int} = 11.6 fb^{-1}");
213
214 // Print cut-flow report
215 report->Print();
216}
#define c(i)
Definition: RSha256.hxx:101
#define h(i)
Definition: RSha256.hxx:106
char name[80]
Definition: TGX11.cxx:109
#define gInterpreter
Definition: TInterpreter.h:556
R__EXTERN TStyle * gStyle
Definition: TStyle.h:410
static std::unique_ptr< RNTupleModel > Create()
An RNTuple that is used to read data from storage.
Definition: RNTuple.hxx:73
An RNTuple that gets filled with entries (data) and writes them to storage.
Definition: RNTuple.hxx:169
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
Definition: RDataFrame.hxx:42
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition: RVec.hxx:275
virtual void SetTextFont(Font_t tfont=62)
Set the text font.
Definition: TAttText.h:45
virtual void SetTextSize(Float_t tsize=1)
Set the text size.
Definition: TAttText.h:46
The Canvas class.
Definition: TCanvas.h:27
To draw Mathematical Formula.
Definition: TLatex.h:18
TLatex * DrawLatex(Double_t x, Double_t y, const char *text)
Make a copy of this object with the new parameters And copy object attributes.
Definition: TLatex.cxx:1926
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1590
virtual void Print(Option_t *option="") const
Dump this text with its attributes.
Definition: TText.cxx:780
virtual void SetNDC(Bool_t isNDC=kTRUE)
Set NDC mode on if isNDC = kTRUE, off otherwise.
Definition: TText.cxx:813
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition: TTreeReader.h:43
TPaveText * pt
basic_string_view< char > string_view
CPYCPPYY_EXTERN bool Exec(const std::string &cmd)
Definition: API.cxx:331
RDataFrame MakeNTupleDataFrame(std::string_view ntupleName, std::string_view fileName)
Definition: RNTupleDS.cxx:133
double T(double x)
Definition: ChebyshevPol.h:34
Vector1::Scalar InvariantMass(const Vector1 &v1, const Vector2 &v2)
return the invariant mass of two LorentzVector The only requirement on the LorentzVector is that they...
Definition: VectorUtil.h:215
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition: TROOT.cxx:526
RooArgSet S(const RooAbsArg &v1)
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176