Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleReader.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleReader.hxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-20
5
6/*************************************************************************
7 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
8 * All rights reserved. *
9 * *
10 * For the licensing terms see $ROOTSYS/LICENSE. *
11 * For the list of contributors see $ROOTSYS/README/CREDITS. *
12 *************************************************************************/
13
14#ifndef ROOT_RNTupleReader
15#define ROOT_RNTupleReader
16
17#include <ROOT/RConfig.hxx> // for R__unlikely
18#include <ROOT/REntry.hxx>
19#include <ROOT/RError.hxx>
22#include <ROOT/RNTupleModel.hxx>
24#include <ROOT/RNTupleUtil.hxx>
25#include <ROOT/RNTupleView.hxx>
26#include <ROOT/RPageStorage.hxx>
27#include <ROOT/RSpan.hxx>
28
29#include <iostream>
30#include <iterator>
31#include <memory>
32#include <string>
33#include <string_view>
34
35namespace ROOT {
36class RNTuple;
37
38/// Listing of the different options that can be printed by RNTupleReader::GetInfo()
39enum class ENTupleInfo {
40 kSummary, // The RNTuple name, description, number of entries
41 kStorageDetails, // size on storage, page sizes, compression factor, etc.
42 kMetrics, // internals performance counters, requires that EnableMetrics() was called
43};
44
45// clang-format off
46/**
47\class ROOT::RNTupleReader
48\ingroup NTuple
49\brief Reads RNTuple data from storage
50
51The RNTupleReader provides access to data stored in the RNTuple binary format as C++ objects, using an RNTupleModel.
52It infers this model from the RNTuple's on-disk metadata, or uses a model imposed by the user.
53The latter case allows users to read into a specialized RNTuple model that covers
54only a subset of the fields in the RNTuple. The RNTuple model is used when reading complete entries through LoadEntry().
55Individual fields can be read as well by instantiating a tree view.
56
57~~~ {.cpp}
58#include <ROOT/RNTupleReader.hxx>
59#include <iostream>
60
61auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
62std::cout << "myNTuple has " << reader->GetNEntries() << " entries\n";
63~~~
64*/
65// clang-format on
67private:
68 /// Set as the page source's scheduler for parallel page decompression if implicit multi-threading (IMT) is on.
69 /// Needs to be destructed after the page source is destructed (and thus be declared before)
70 std::unique_ptr<Internal::RPageStorage::RTaskScheduler> fUnzipTasks;
71
72 std::unique_ptr<Internal::RPageSource> fSource;
73 /// Needs to be destructed before fSource
74 std::unique_ptr<ROOT::RNTupleModel> fModel;
75 /// We use a dedicated on-demand reader for Show(). Printing data uses all the fields
76 /// from the full model even if the analysis code uses only a subset of fields. The display reader
77 /// is a clone of the original reader.
78 std::unique_ptr<RNTupleReader> fDisplayReader;
79 /// The RNTuple descriptor in the page source is protected by a read-write lock. We don't expose that to the
80 /// users of RNTupleReader::GetDescriptor(). Instead, if descriptor information is needed, we clone the
81 /// descriptor. Using the descriptor's generation number, we know if the cached descriptor is stale.
82 /// Retrieving descriptor data from an RNTupleReader is supposed to be for testing and information purposes,
83 /// not on a hot code path.
84 std::optional<ROOT::RNTupleDescriptor> fCachedDescriptor;
86 /// If not nullopt, these will be used when creating the model
87 std::optional<ROOT::RNTupleDescriptor::RCreateModelOptions> fCreateModelOptions;
88
89 RNTupleReader(std::unique_ptr<ROOT::RNTupleModel> model, std::unique_ptr<Internal::RPageSource> source,
90 const ROOT::RNTupleReadOptions &options);
91 /// The model is generated from the RNTuple metadata on storage.
92 explicit RNTupleReader(std::unique_ptr<Internal::RPageSource> source, const ROOT::RNTupleReadOptions &options);
93
97
98 ROOT::DescriptorId_t RetrieveFieldId(std::string_view fieldName) const;
99
100public:
101 // Browse through the entries
102 class RIterator {
103 private:
105
106 public:
108 using iterator_category = std::forward_iterator_tag;
113
114 RIterator() = default;
116 ~RIterator() = default;
117
118 iterator operator++(int) /* postfix */
119 {
120 auto r = *this;
121 fIndex++;
122 return r;
123 }
124 iterator &operator++() /* prefix */
125 {
126 ++fIndex;
127 return *this;
128 }
130 pointer operator->() { return &fIndex; }
131 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
132 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
133 };
134
135 /// Open an RNTuple for reading.
136 ///
137 /// Throws an RException if there is no RNTuple with the given name.
138 ///
139 /// **Example: open an RNTuple and print the number of entries**
140 /// ~~~ {.cpp}
141 /// #include <ROOT/RNTupleReader.hxx>
142 /// #include <iostream>
143 ///
144 /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
145 /// std::cout << "myNTuple has " << reader->GetNEntries() << " entries\n";
146 /// ~~~
147 static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName, std::string_view storage,
149 static std::unique_ptr<RNTupleReader>
151
152 /// The caller imposes a model, which must be compatible with the model found in the data on storage.
153 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<ROOT::RNTupleModel> model, std::string_view ntupleName,
154 std::string_view storage,
156 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<ROOT::RNTupleModel> model, const RNTuple &ntuple,
158
159 /// The caller imposes the way the model is reconstructed
160 static std::unique_ptr<RNTupleReader> Open(const ROOT::RNTupleDescriptor::RCreateModelOptions &createModelOpts,
161 std::string_view ntupleName, std::string_view storage,
163 static std::unique_ptr<RNTupleReader> Open(const ROOT::RNTupleDescriptor::RCreateModelOptions &createModelOpts,
164 const RNTuple &ntuple,
166 std::unique_ptr<RNTupleReader> Clone()
167 {
168 auto options = ROOT::RNTupleReadOptions{};
170 return std::unique_ptr<RNTupleReader>(new RNTupleReader(fSource->Clone(), options));
171 }
173
174 ROOT::NTupleSize_t GetNEntries() const { return fSource->GetNEntries(); }
176 std::unique_ptr<ROOT::REntry> CreateEntry();
177
178 /// Returns a cached copy of the page source descriptor. The returned pointer remains valid until the next call
179 /// to LoadEntry() or to any of the views returned from the reader.
181
182 /// Prints a detailed summary of the RNTuple, including a list of fields.
183 ///
184 /// **Example: print summary information to stdout**
185 /// ~~~ {.cpp}
186 /// #include <ROOT/RNTupleReader.hxx>
187 /// #include <iostream>
188 ///
189 /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
190 /// reader->PrintInfo();
191 /// // or, equivalently:
192 /// reader->PrintInfo(ROOT::ENTupleInfo::kSummary, std::cout);
193 /// ~~~
194 /// **Example: print detailed column storage data to stderr**
195 /// ~~~ {.cpp}
196 /// #include <ROOT/RNTupleReader.hxx>
197 /// #include <iostream>
198 ///
199 /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
200 /// reader->PrintInfo(ROOT::ENTupleInfo::kStorageDetails, std::cerr);
201 /// ~~~
202 ///
203 /// For use of ENTupleInfo::kMetrics, see #EnableMetrics.
204 void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout) const;
205
206 /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default,
207 /// prints the output in JSON format.
208 /// Uses the visitor pattern to traverse through each field of the given entry.
209 void Show(ROOT::NTupleSize_t index, std::ostream &output = std::cout);
210
211 /// Fills the default entry of the model.
212 /// Raises an exception when `index` is greater than the number of entries present in the RNTuple
214 {
215 // TODO(jblomer): can be templated depending on the factory method / constructor
216 if (R__unlikely(!fModel)) {
217 fModel = fSource->GetSharedDescriptorGuard()->CreateModel(
220 }
221 LoadEntry(index, fModel->GetDefaultEntry());
222 }
223 /// Fills a user provided entry after checking that the entry has been instantiated from the RNTuple model
225 {
226 if (R__unlikely(entry.GetModelId() != fModel->GetModelId()))
227 throw RException(R__FAIL("mismatch between entry and model"));
228
229 entry.Read(index);
230 }
231
232 /// Returns an iterator over the entry indices of the RNTuple.
233 ///
234 /// **Example: iterate over all entries and print each entry in JSON format**
235 /// ~~~ {.cpp}
236 /// #include <ROOT/RNTupleReader.hxx>
237 /// #include <iostream>
238 ///
239 /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
240 /// for (auto i : ntuple->GetEntryRange()) {
241 /// reader->Show(i);
242 /// }
243 /// ~~~
245
246 /// Provides access to an individual (sub)field,
247 /// e.g. `GetView<Particle>("particle")`, `GetView<double>("particle.pt")` or
248 /// `GetView<std::vector<Particle>>("particles")`. It is possible to directly get the size of a collection (without
249 /// reading the collection itself) using RNTupleCardinality:
250 /// `GetView<ROOT::RNTupleCardinality<std::uint64_t>>("particles")`.
251 ///
252 /// Raises an exception if there is no field with the given name.
253 ///
254 /// **Example: iterate over a field named "pt" of type `float`**
255 /// ~~~ {.cpp}
256 /// #include <ROOT/RNTupleReader.hxx>
257 /// #include <iostream>
258 ///
259 /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
260 /// auto pt = reader->GetView<float>("pt");
261 ///
262 /// for (auto i : reader->GetEntryRange()) {
263 /// std::cout << i << ": " << pt(i) << "\n";
264 /// }
265 /// ~~~
266 ///
267 /// **Note**: if `T = void`, type checks are disabled. This is not really useful for this overload because
268 /// RNTupleView<void> does not give access to the pointer. If required, it is possible to provide an `objPtr` of a
269 /// dynamic type, for example via GetView(std::string_view, void *, std::string_view).
270 template <typename T>
272 {
274 }
275
276 /// Provides access to an individual (sub)field, reading its values into `objPtr`.
277 ///
278 /// Raises an exception if there is no field with the given name.
279 ///
280 /// **Example: iterate over a field named "pt" of type `float`**
281 /// ~~~ {.cpp}
282 /// #include <ROOT/RNTupleReader.hxx>
283 /// #include <iostream>
284 ///
285 /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
286 /// auto pt = std::make_shared<float>();
287 /// auto ptView = reader->GetView("pt", pt);
288 ///
289 /// for (auto i : reader->GetEntryRange()) {
290 /// ptView(i);
291 /// std::cout << i << ": " << *pt << "\n";
292 /// }
293 /// ~~~
294 ///
295 /// **Note**: if `T = void`, type checks are disabled. It is the caller's responsibility to match the field and
296 /// object types. It is strongly recommended to use an overload that allows passing the `typeName`, such as
297 /// GetView(std::string_view, void *, std::string_view). This allows type checks with the on-disk metadata and
298 /// enables automatic schema evolution and conversion rules.
299 template <typename T>
300 ROOT::RNTupleView<T> GetView(std::string_view fieldName, std::shared_ptr<T> objPtr)
301 {
303 }
304
305 /// Provides access to an individual (sub)field, reading its values into `rawPtr`.
306 ///
307 /// \sa GetView(std::string_view, std::shared_ptr<T>)
308 template <typename T>
310 {
312 }
313
314 /// Provides access to an individual (sub)field, reading its values into `rawPtr` as the type provided by `typeName`.
315 ///
316 /// \sa GetView(std::string_view, std::shared_ptr<T>)
317 ROOT::RNTupleView<void> GetView(std::string_view fieldName, void *rawPtr, std::string_view typeName)
318 {
319 return GetView(RetrieveFieldId(fieldName), rawPtr, typeName);
320 }
321
322 /// Provides access to an individual (sub)field, reading its values into `rawPtr` as the type provided by `ti`.
323 ///
324 /// \sa GetView(std::string_view, std::shared_ptr<T>)
329
330 /// Provides access to an individual (sub)field from its on-disk ID.
331 ///
332 /// \sa GetView(std::string_view)
333 template <typename T>
340
341 /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `objPtr`.
342 ///
343 /// \sa GetView(std::string_view, std::shared_ptr<T>)
344 template <typename T>
351
352 /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `rawPtr`.
353 ///
354 /// \sa GetView(std::string_view, std::shared_ptr<T>)
355 template <typename T>
362
363 /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `rawPtr` as the type
364 /// provided by `typeName`.
365 ///
366 /// \sa GetView(std::string_view, std::shared_ptr<T>)
368 {
371 return RNTupleView<void>(std::move(field), range, rawPtr);
372 }
373
374 /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `objPtr` as the type
375 /// provided by `ti`.
376 ///
377 /// \sa GetView(std::string_view, std::shared_ptr<T>)
382
383 /// Provides direct access to the I/O buffers of a **mappable** (sub)field.
384 ///
385 /// Raises an exception if there is no field with the given name.
386 /// Attempting to access the values of a direct-access view for non-mappable fields will yield compilation errors.
387 ///
388 /// \sa GetView(std::string_view)
389 template <typename T>
394
395 /// Provides direct access to the I/O buffers of a **mappable** (sub)field from its on-disk ID.
396 ///
397 /// \sa GetDirectAccessView(std::string_view)
398 template <typename T>
405
406 /// Provides access to a collection field, that can itself generate new RNTupleViews for its nested fields.
407 ///
408 /// Raises an exception if:
409 /// * there is no field with the given name or,
410 /// * the field is not a collection
411 ///
412 /// \sa GetView(std::string_view)
414 {
415 auto fieldId = fSource->GetSharedDescriptorGuard()->FindFieldId(fieldName);
417 throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '" +
418 fSource->GetSharedDescriptorGuard()->GetName() + "'"));
419 }
421 }
422
423 /// Provides access to a collection field from its on-disk ID, that can itself generate new RNTupleViews for its
424 /// nested fields.
425 ///
426 /// \sa GetCollectionView(std::string_view)
431
432 RIterator begin() { return RIterator(0); }
434
435 /// Enable performance measurements (decompression time, bytes read from storage, etc.)
436 ///
437 /// **Example: inspect the reader metrics after loading every entry**
438 /// ~~~ {.cpp}
439 /// #include <ROOT/RNTupleReader.hxx>
440 /// #include <iostream>
441 ///
442 /// auto ntuple = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
443 /// // metrics must be turned on beforehand
444 /// reader->EnableMetrics();
445 ///
446 /// for (auto i : ntuple->GetEntryRange()) {
447 /// reader->LoadEntry(i);
448 /// }
449 /// reader->PrintInfo(ROOT::ENTupleInfo::kMetrics);
450 /// ~~~
453}; // class RNTupleReader
454
455} // namespace ROOT
456
457#endif // ROOT_RNTupleReader
#define R__unlikely(expr)
Definition RConfig.hxx:594
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
A collection of Counter objects with a name, a unit, and a description.
The REntry is a collection of values in an RNTuple corresponding to a complete row in the data set.
Definition REntry.hxx:54
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
A view for a collection, that can itself generate new ntuple views for its nested fields.
static RNTupleCollectionView Create(ROOT::DescriptorId_t fieldId, ROOT::Internal::RPageSource *source)
The on-storage metadata of an RNTuple.
A view variant that provides direct access to the I/O buffers.
static ROOT::RField< T > CreateField(ROOT::DescriptorId_t fieldId, ROOT::Internal::RPageSource &pageSource)
Used to loop over indexes (entries or collections) between start and end.
The RNTupleModel encapulates the schema of an RNTuple.
Common user-tunable settings for reading RNTuples.
bool operator!=(const iterator &rh) const
RIterator(ROOT::NTupleSize_t index)
std::forward_iterator_tag iterator_category
bool operator==(const iterator &rh) const
Reads RNTuple data from storage.
ROOT::RNTupleGlobalRange GetEntryRange()
Returns an iterator over the entry indices of the RNTuple.
ROOT::RNTupleView< T > GetView(ROOT::DescriptorId_t fieldId, T *rawPtr)
Provides access to an individual (sub)field from its on-disk ID, reading its values into rawPtr.
ROOT::RNTupleView< T > GetView(std::string_view fieldName, std::shared_ptr< T > objPtr)
Provides access to an individual (sub)field, reading its values into objPtr.
std::unique_ptr< RNTupleReader > fDisplayReader
We use a dedicated on-demand reader for Show().
ROOT::RNTupleView< T > GetView(ROOT::DescriptorId_t fieldId)
Provides access to an individual (sub)field from its on-disk ID.
std::unique_ptr< ROOT::REntry > CreateEntry()
static std::unique_ptr< RNTupleReader > Open(std::string_view ntupleName, std::string_view storage, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Open an RNTuple for reading.
void InitPageSource(bool enableMetrics)
ROOT::RNTupleDirectAccessView< T > GetDirectAccessView(ROOT::DescriptorId_t fieldId)
Provides direct access to the I/O buffers of a mappable (sub)field from its on-disk ID.
std::unique_ptr< Internal::RPageStorage::RTaskScheduler > fUnzipTasks
Set as the page source's scheduler for parallel page decompression if implicit multi-threading (IMT) ...
ROOT::RNTupleView< T > GetView(std::string_view fieldName)
Provides access to an individual (sub)field, e.g.
std::unique_ptr< ROOT::RNTupleModel > fModel
Needs to be destructed before fSource.
const Experimental::Detail::RNTupleMetrics & GetMetrics() const
ROOT::RNTupleCollectionView GetCollectionView(std::string_view fieldName)
Provides access to a collection field, that can itself generate new RNTupleViews for its nested field...
ROOT::RNTupleDirectAccessView< T > GetDirectAccessView(std::string_view fieldName)
Provides direct access to the I/O buffers of a mappable (sub)field.
const ROOT::RNTupleDescriptor & GetDescriptor()
Returns a cached copy of the page source descriptor.
ROOT::RNTupleView< void > GetView(std::string_view fieldName, void *rawPtr, std::string_view typeName)
Provides access to an individual (sub)field, reading its values into rawPtr as the type provided by t...
std::optional< ROOT::RNTupleDescriptor::RCreateModelOptions > fCreateModelOptions
If not nullopt, these will be used when creating the model.
void PrintInfo(const ENTupleInfo what=ENTupleInfo::kSummary, std::ostream &output=std::cout) const
Prints a detailed summary of the RNTuple, including a list of fields.
std::unique_ptr< Internal::RPageSource > fSource
RNTupleReader(std::unique_ptr< ROOT::RNTupleModel > model, std::unique_ptr< Internal::RPageSource > source, const ROOT::RNTupleReadOptions &options)
const ROOT::RNTupleModel & GetModel()
ROOT::RNTupleView< void > GetView(ROOT::DescriptorId_t fieldId, void *rawPtr, std::string_view typeName)
Provides access to an individual (sub)field from its on-disk ID, reading its values into rawPtr as th...
std::optional< ROOT::RNTupleDescriptor > fCachedDescriptor
The RNTuple descriptor in the page source is protected by a read-write lock.
ROOT::RNTupleCollectionView GetCollectionView(ROOT::DescriptorId_t fieldId)
Provides access to a collection field from its on-disk ID, that can itself generate new RNTupleViews ...
std::unique_ptr< RNTupleReader > Clone()
void ConnectModel(ROOT::RNTupleModel &model)
ROOT::NTupleSize_t GetNEntries() const
ROOT::RNTupleView< void > GetView(std::string_view fieldName, void *rawPtr, const std::type_info &ti)
Provides access to an individual (sub)field, reading its values into rawPtr as the type provided by t...
ROOT::DescriptorId_t RetrieveFieldId(std::string_view fieldName) const
void Show(ROOT::NTupleSize_t index, std::ostream &output=std::cout)
Shows the values of the i-th entry/row, starting with 0 for the first entry.
ROOT::RNTupleView< T > GetView(std::string_view fieldName, T *rawPtr)
Provides access to an individual (sub)field, reading its values into rawPtr.
RNTupleReader * GetDisplayReader()
Experimental::Detail::RNTupleMetrics fMetrics
void EnableMetrics()
Enable performance measurements (decompression time, bytes read from storage, etc....
void LoadEntry(ROOT::NTupleSize_t index, ROOT::REntry &entry)
Fills a user provided entry after checking that the entry has been instantiated from the RNTuple mode...
ROOT::RNTupleView< void > GetView(ROOT::DescriptorId_t fieldId, void *rawPtr, const std::type_info &ti)
Provides access to an individual (sub)field from its on-disk ID, reading its values into objPtr as th...
ROOT::RNTupleView< T > GetView(ROOT::DescriptorId_t fieldId, std::shared_ptr< T > objPtr)
Provides access to an individual (sub)field from its on-disk ID, reading its values into objPtr.
void LoadEntry(ROOT::NTupleSize_t index)
Fills the default entry of the model.
static std::unique_ptr< ROOT::RFieldBase > CreateField(ROOT::DescriptorId_t fieldId, Internal::RPageSource &pageSource, std::string_view typeName="")
An RNTupleView for a known type.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:65
ROOT::RNTupleGlobalRange GetFieldRange(const ROOT::RFieldBase &field, const ROOT::Internal::RPageSource &pageSource)
Helper to get the iteration space of the given field that needs to be connected to the given page sou...
std::string GetRenormalizedDemangledTypeName(const std::type_info &ti)
Given a type info ask ROOT meta to demangle it, then renormalize the resulting type name for RNTuple.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
static const char * what
Definition stlLoader.cc:5
static void output()