Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleReader.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleReader.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-20
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleReader
17#define ROOT7_RNTupleReader
18
19#include <ROOT/RConfig.hxx> // for R__unlikely
20#include <ROOT/RError.hxx>
23#include <ROOT/RNTupleModel.hxx>
25#include <ROOT/RNTupleUtil.hxx>
26#include <ROOT/RNTupleView.hxx>
27#include <ROOT/RPageStorage.hxx>
28#include <ROOT/RSpan.hxx>
29
30#include <iostream>
31#include <iterator>
32#include <memory>
33#include <string>
34#include <string_view>
35
36namespace ROOT {
37namespace Experimental {
38
39class REntry;
40class RNTuple;
41
42/// Listing of the different options that can be printed by RNTupleReader::GetInfo()
43enum class ENTupleInfo {
44 kSummary, // The ntuple name, description, number of entries
45 kStorageDetails, // size on storage, page sizes, compression factor, etc.
46 kMetrics, // internals performance counters, requires that EnableMetrics() was called
47};
48
49// clang-format off
50/**
51\class ROOT::Experimental::RNTupleReader
52\ingroup NTuple
53\brief An RNTuple that is used to read data from storage
54
55An input ntuple provides data from storage as C++ objects. The ntuple model can be created from the data on storage
56or it can be imposed by the user. The latter case allows users to read into a specialized ntuple model that covers
57only a subset of the fields in the ntuple. The ntuple model is used when reading complete entries.
58Individual fields can be read as well by instantiating a tree view.
59
60~~~ {.cpp}
61#include <ROOT/RNTupleReader.hxx>
62using ROOT::Experimental::RNTupleReader;
63
64#include <iostream>
65
66auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
67std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n";
68~~~
69*/
70// clang-format on
72private:
73 /// Set as the page source's scheduler for parallel page decompression if IMT is on
74 /// Needs to be destructed after the pages source is destructed (an thus be declared before)
75 std::unique_ptr<Internal::RPageStorage::RTaskScheduler> fUnzipTasks;
76
77 std::unique_ptr<Internal::RPageSource> fSource;
78 /// Needs to be destructed before fSource
79 std::unique_ptr<RNTupleModel> fModel;
80 /// We use a dedicated on-demand reader for Show() and Scan(). Printing data uses all the fields
81 /// from the full model even if the analysis code uses only a subset of fields. The display reader
82 /// is a clone of the original reader.
83 std::unique_ptr<RNTupleReader> fDisplayReader;
84 /// The ntuple descriptor in the page source is protected by a read-write lock. We don't expose that to the
85 /// users of RNTupleReader::GetDescriptor(). Instead, if descriptor information is needed, we clone the
86 /// descriptor. Using the descriptor's generation number, we know if the cached descriptor is stale.
87 /// Retrieving descriptor data from an RNTupleReader is supposed to be for testing and information purposes,
88 /// not on a hot code path.
89 std::unique_ptr<RNTupleDescriptor> fCachedDescriptor;
91
92 RNTupleReader(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Internal::RPageSource> source,
93 const RNTupleReadOptions &options);
94 /// The model is generated from the ntuple metadata on storage.
95 explicit RNTupleReader(std::unique_ptr<Internal::RPageSource> source, const RNTupleReadOptions &options);
96
97 void ConnectModel(RNTupleModel &model);
99 void InitPageSource(bool enableMetrics);
100
101 DescriptorId_t RetrieveFieldId(std::string_view fieldName) const;
102
103public:
104 // Browse through the entries
105 class RIterator {
106 private:
108
109 public:
111 using iterator_category = std::forward_iterator_tag;
116
117 RIterator() = default;
119 ~RIterator() = default;
120
121 iterator operator++(int) /* postfix */
122 {
123 auto r = *this;
124 fIndex++;
125 return r;
126 }
127 iterator &operator++() /* prefix */
128 {
129 ++fIndex;
130 return *this;
131 }
133 pointer operator->() { return &fIndex; }
134 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
135 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
136 };
137
138 /// Used to specify the underlying RNTuples in OpenFriends()
139 struct ROpenSpec {
140 std::string fNTupleName;
141 std::string fStorage;
143
144 ROpenSpec() = default;
145 ROpenSpec(std::string_view n, std::string_view s) : fNTupleName(n), fStorage(s) {}
146 };
147
148 /// Open an RNTuple for reading.
149 ///
150 /// Throws an RException if there is no RNTuple with the given name.
151 ///
152 /// **Example: open an RNTuple and print the number of entries**
153 /// ~~~ {.cpp}
154 /// #include <ROOT/RNTupleReader.hxx>
155 /// using ROOT::Experimental::RNTupleReader;
156 ///
157 /// #include <iostream>
158 ///
159 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
160 /// std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n";
161 /// ~~~
162 static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName, std::string_view storage,
163 const RNTupleReadOptions &options = RNTupleReadOptions());
164 static std::unique_ptr<RNTupleReader>
165 Open(const RNTuple &ntuple, const RNTupleReadOptions &options = RNTupleReadOptions());
166 /// The caller imposes a model, which must be compatible with the model found in the data on storage.
167 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model, std::string_view ntupleName,
168 std::string_view storage,
169 const RNTupleReadOptions &options = RNTupleReadOptions());
170 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model, const RNTuple &ntuple,
171 const RNTupleReadOptions &options = RNTupleReadOptions());
172 /// Open RNTuples as one virtual, horizontally combined ntuple. The underlying RNTuples must
173 /// have an identical number of entries. Fields in the combined RNTuple are named with the ntuple name
174 /// as a prefix, e.g. myNTuple1.px and myNTuple2.pt (see tutorial ntpl006_friends)
175 static std::unique_ptr<RNTupleReader>
176 OpenFriends(std::span<ROpenSpec> ntuples, const RNTupleReadOptions &options = RNTupleReadOptions());
177 std::unique_ptr<RNTupleReader> Clone()
178 {
179 auto options = RNTupleReadOptions{};
181 return std::unique_ptr<RNTupleReader>(new RNTupleReader(fSource->Clone(), options));
182 }
184
185 NTupleSize_t GetNEntries() const { return fSource->GetNEntries(); }
186 const RNTupleModel &GetModel();
187
188 /// Returns a cached copy of the page source descriptor. The returned pointer remains valid until the next call
189 /// to LoadEntry or to any of the views returned from the reader.
191
192 /// Prints a detailed summary of the ntuple, including a list of fields.
193 ///
194 /// **Example: print summary information to stdout**
195 /// ~~~ {.cpp}
196 /// #include <ROOT/RNTupleReader.hxx>
197 /// using ROOT::Experimental::ENTupleInfo;
198 /// using ROOT::Experimental::RNTupleReader;
199 ///
200 /// #include <iostream>
201 ///
202 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
203 /// ntuple->PrintInfo();
204 /// // or, equivalently:
205 /// ntuple->PrintInfo(ENTupleInfo::kSummary, std::cout);
206 /// ~~~
207 /// **Example: print detailed column storage data to stderr**
208 /// ~~~ {.cpp}
209 /// #include <ROOT/RNTupleReader.hxx>
210 /// using ROOT::Experimental::ENTupleInfo;
211 /// using ROOT::Experimental::RNTupleReader;
212 ///
213 /// #include <iostream>
214 ///
215 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
216 /// ntuple->PrintInfo(ENTupleInfo::kStorageDetails, std::cerr);
217 /// ~~~
218 ///
219 /// For use of ENTupleInfo::kMetrics, see #EnableMetrics.
220 void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout) const;
221
222 /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default,
223 /// prints the output in JSON format.
224 /// Uses the visitor pattern to traverse through each field of the given entry.
225 void Show(NTupleSize_t index, std::ostream &output = std::cout);
226
227 /// Analogous to Fill(), fills the default entry of the model. Returns false at the end of the ntuple.
228 /// On I/O errors, raises an exception.
230 {
231 // TODO(jblomer): can be templated depending on the factory method / constructor
232 if (R__unlikely(!fModel)) {
233 fModel = fSource->GetSharedDescriptorGuard()->CreateModel();
235 }
236 LoadEntry(index, fModel->GetDefaultEntry());
237 }
238 /// Fills a user provided entry after checking that the entry has been instantiated from the ntuple model
239 void LoadEntry(NTupleSize_t index, REntry &entry) { entry.Read(index); }
240
241 /// Returns an iterator over the entry indices of the RNTuple.
242 ///
243 /// **Example: iterate over all entries and print each entry in JSON format**
244 /// ~~~ {.cpp}
245 /// #include <ROOT/RNTupleReader.hxx>
246 /// using ROOT::Experimental::ENTupleShowFormat;
247 /// using ROOT::Experimental::RNTupleReader;
248 ///
249 /// #include <iostream>
250 ///
251 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
252 /// for (auto i : ntuple->GetEntryRange()) {
253 /// ntuple->Show(i);
254 /// }
255 /// ~~~
257
258 /// Provides access to an individual field that can contain either a scalar value or a collection, e.g.
259 /// GetView<double>("particles.pt") or GetView<std::vector<double>>("particle"). It can as well be the index
260 /// field of a collection itself, like GetView<NTupleSize_t>("particle").
261 ///
262 /// Raises an exception if there is no field with the given name.
263 ///
264 /// **Example: iterate over a field named "pt" of type `float`**
265 /// ~~~ {.cpp}
266 /// #include <ROOT/RNTupleReader.hxx>
267 /// using ROOT::Experimental::RNTupleReader;
268 ///
269 /// #include <iostream>
270 ///
271 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
272 /// auto pt = ntuple->GetView<float>("pt");
273 ///
274 /// for (auto i : ntuple->GetEntryRange()) {
275 /// std::cout << i << ": " << pt(i) << "\n";
276 /// }
277 /// ~~~
278 template <typename T>
279 RNTupleView<T, false> GetView(std::string_view fieldName)
280 {
281 return GetView<T>(RetrieveFieldId(fieldName));
282 }
283
284 template <typename T>
285 RNTupleView<T, true> GetView(std::string_view fieldName, std::shared_ptr<T> objPtr)
286 {
287 return GetView<T>(RetrieveFieldId(fieldName), objPtr);
288 }
289
290 template <typename T>
292 {
293 return RNTupleView<T, false>(fieldId, fSource.get());
294 }
295
296 template <typename T>
297 RNTupleView<T, true> GetView(DescriptorId_t fieldId, std::shared_ptr<T> objPtr)
298 {
299 return RNTupleView<T, true>(fieldId, fSource.get(), objPtr);
300 }
301
302 /// Raises an exception if:
303 /// * there is no field with the given name or,
304 /// * the field is not a collection
305 RNTupleCollectionView GetCollectionView(std::string_view fieldName)
306 {
307 auto fieldId = fSource->GetSharedDescriptorGuard()->FindFieldId(fieldName);
308 if (fieldId == kInvalidDescriptorId) {
309 throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '" +
310 fSource->GetSharedDescriptorGuard()->GetName() + "'"));
311 }
312 return GetCollectionView(fieldId);
313 }
314
316 {
317 return RNTupleCollectionView(fieldId, fSource.get());
318 }
319
320 RIterator begin() { return RIterator(0); }
322
323 /// Enable performance measurements (decompression time, bytes read from storage, etc.)
324 ///
325 /// **Example: inspect the reader metrics after loading every entry**
326 /// ~~~ {.cpp}
327 /// #include <ROOT/RNTupleReader.hxx>
328 /// using ROOT::Experimental::ENTupleInfo;
329 /// using ROOT::Experimental::RNTupleReader;
330 ///
331 /// #include <iostream>
332 ///
333 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
334 /// // metrics must be turned on beforehand
335 /// ntuple->EnableMetrics();
336 ///
337 /// for (auto i : ntuple->GetEntryRange()) {
338 /// ntuple->LoadEntry(i);
339 /// }
340 /// ntuple->PrintInfo(ENTupleInfo::kMetrics);
341 /// ~~~
343 const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
344}; // class RNTupleReader
345
346} // namespace Experimental
347} // namespace ROOT
348
349#endif // ROOT7_RNTupleReader
#define R__unlikely(expr)
Definition RConfig.hxx:586
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
A collection of Counter objects with a name, a unit, and a description.
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:50
void Read(NTupleSize_t index)
Definition REntry.hxx:103
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
A view for a collection, that can itself generate new ntuple views for its nested fields.
The on-storage meta-data of an ntuple.
Used to loop over indexes (entries or collections) between start and end.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
bool operator!=(const iterator &rh) const
bool operator==(const iterator &rh) const
An RNTuple that is used to read data from storage.
DescriptorId_t RetrieveFieldId(std::string_view fieldName) const
void Show(NTupleSize_t index, std::ostream &output=std::cout)
Shows the values of the i-th entry/row, starting with 0 for the first entry.
std::unique_ptr< RNTupleReader > Clone()
Detail::RNTupleMetrics fMetrics
std::unique_ptr< RNTupleReader > fDisplayReader
We use a dedicated on-demand reader for Show() and Scan().
void EnableMetrics()
Enable performance measurements (decompression time, bytes read from storage, etc....
const Detail::RNTupleMetrics & GetMetrics() const
RNTupleView< T, false > GetView(std::string_view fieldName)
Provides access to an individual field that can contain either a scalar value or a collection,...
const RNTupleDescriptor & GetDescriptor()
Returns a cached copy of the page source descriptor.
std::unique_ptr< RNTupleDescriptor > fCachedDescriptor
The ntuple descriptor in the page source is protected by a read-write lock.
RNTupleCollectionView GetCollectionView(std::string_view fieldName)
Raises an exception if:
std::unique_ptr< Internal::RPageSource > fSource
RNTupleCollectionView GetCollectionView(DescriptorId_t fieldId)
static std::unique_ptr< RNTupleReader > Open(std::string_view ntupleName, std::string_view storage, const RNTupleReadOptions &options=RNTupleReadOptions())
Open an RNTuple for reading.
RNTupleGlobalRange GetEntryRange()
Returns an iterator over the entry indices of the RNTuple.
std::unique_ptr< Internal::RPageStorage::RTaskScheduler > fUnzipTasks
Set as the page source's scheduler for parallel page decompression if IMT is on Needs to be destructe...
RNTupleView< T, true > GetView(DescriptorId_t fieldId, std::shared_ptr< T > objPtr)
void InitPageSource(bool enableMetrics)
void PrintInfo(const ENTupleInfo what=ENTupleInfo::kSummary, std::ostream &output=std::cout) const
Prints a detailed summary of the ntuple, including a list of fields.
RNTupleView< T, true > GetView(std::string_view fieldName, std::shared_ptr< T > objPtr)
void LoadEntry(NTupleSize_t index)
Analogous to Fill(), fills the default entry of the model.
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSource.
void LoadEntry(NTupleSize_t index, REntry &entry)
Fills a user provided entry after checking that the entry has been instantiated from the ntuple model...
RNTupleView< T, false > GetView(DescriptorId_t fieldId)
static std::unique_ptr< RNTupleReader > OpenFriends(std::span< ROpenSpec > ntuples, const RNTupleReadOptions &options=RNTupleReadOptions())
Open RNTuples as one virtual, horizontally combined ntuple.
void ConnectModel(RNTupleModel &model)
An RNTupleView provides read-only access to a single field of the ntuple.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:61
const Int_t n
Definition legend1.C:16
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
static const char * what
Definition stlLoader.cc:5
Used to specify the underlying RNTuples in OpenFriends()
ROpenSpec(std::string_view n, std::string_view s)
static void output()