Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleReader.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleReader.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-20
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleReader
17#define ROOT7_RNTupleReader
18
19#include <ROOT/RConfig.hxx> // for R__unlikely
20#include <ROOT/REntry.hxx>
21#include <ROOT/RError.hxx>
24#include <ROOT/RNTupleModel.hxx>
26#include <ROOT/RNTupleUtil.hxx>
27#include <ROOT/RNTupleView.hxx>
28#include <ROOT/RPageStorage.hxx>
29#include <ROOT/RSpan.hxx>
30
31#include <iostream>
32#include <iterator>
33#include <memory>
34#include <string>
35#include <string_view>
36
37namespace ROOT {
38class RNTuple;
39
40namespace Experimental {
41
42/// Listing of the different options that can be printed by RNTupleReader::GetInfo()
43enum class ENTupleInfo {
44 kSummary, // The ntuple name, description, number of entries
45 kStorageDetails, // size on storage, page sizes, compression factor, etc.
46 kMetrics, // internals performance counters, requires that EnableMetrics() was called
47};
48
49// clang-format off
50/**
51\class ROOT::Experimental::RNTupleReader
52\ingroup NTuple
53\brief An RNTuple that is used to read data from storage
54
55An input ntuple provides data from storage as C++ objects. The ntuple model can be created from the data on storage
56or it can be imposed by the user. The latter case allows users to read into a specialized ntuple model that covers
57only a subset of the fields in the ntuple. The ntuple model is used when reading complete entries.
58Individual fields can be read as well by instantiating a tree view.
59
60~~~ {.cpp}
61#include <ROOT/RNTupleReader.hxx>
62using ROOT::Experimental::RNTupleReader;
63
64#include <iostream>
65
66auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
67std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n";
68~~~
69*/
70// clang-format on
72private:
73 /// Set as the page source's scheduler for parallel page decompression if IMT is on
74 /// Needs to be destructed after the pages source is destructed (an thus be declared before)
75 std::unique_ptr<Internal::RPageStorage::RTaskScheduler> fUnzipTasks;
76
77 std::unique_ptr<Internal::RPageSource> fSource;
78 /// Needs to be destructed before fSource
79 std::unique_ptr<RNTupleModel> fModel;
80 /// We use a dedicated on-demand reader for Show() and Scan(). Printing data uses all the fields
81 /// from the full model even if the analysis code uses only a subset of fields. The display reader
82 /// is a clone of the original reader.
83 std::unique_ptr<RNTupleReader> fDisplayReader;
84 /// The ntuple descriptor in the page source is protected by a read-write lock. We don't expose that to the
85 /// users of RNTupleReader::GetDescriptor(). Instead, if descriptor information is needed, we clone the
86 /// descriptor. Using the descriptor's generation number, we know if the cached descriptor is stale.
87 /// Retrieving descriptor data from an RNTupleReader is supposed to be for testing and information purposes,
88 /// not on a hot code path.
89 std::unique_ptr<RNTupleDescriptor> fCachedDescriptor;
91 /// If not nullopt, these will used when creating the model
92 std::optional<RNTupleDescriptor::RCreateModelOptions> fCreateModelOptions;
93
94 RNTupleReader(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Internal::RPageSource> source,
95 const RNTupleReadOptions &options);
96 /// The model is generated from the ntuple metadata on storage.
97 explicit RNTupleReader(std::unique_ptr<Internal::RPageSource> source, const RNTupleReadOptions &options);
98
99 void ConnectModel(RNTupleModel &model);
101 void InitPageSource(bool enableMetrics);
102
103 DescriptorId_t RetrieveFieldId(std::string_view fieldName) const;
104
105public:
106 // Browse through the entries
107 class RIterator {
108 private:
110
111 public:
113 using iterator_category = std::forward_iterator_tag;
118
119 RIterator() = default;
121 ~RIterator() = default;
122
123 iterator operator++(int) /* postfix */
124 {
125 auto r = *this;
126 fIndex++;
127 return r;
128 }
129 iterator &operator++() /* prefix */
130 {
131 ++fIndex;
132 return *this;
133 }
135 pointer operator->() { return &fIndex; }
136 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
137 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
138 };
139
140 /// Open an RNTuple for reading.
141 ///
142 /// Throws an RException if there is no RNTuple with the given name.
143 ///
144 /// **Example: open an RNTuple and print the number of entries**
145 /// ~~~ {.cpp}
146 /// #include <ROOT/RNTupleReader.hxx>
147 /// using ROOT::Experimental::RNTupleReader;
148 ///
149 /// #include <iostream>
150 ///
151 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
152 /// std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n";
153 /// ~~~
154 static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName, std::string_view storage,
155 const RNTupleReadOptions &options = RNTupleReadOptions());
156 static std::unique_ptr<RNTupleReader>
157 Open(const RNTuple &ntuple, const RNTupleReadOptions &options = RNTupleReadOptions());
158
159 /// The caller imposes a model, which must be compatible with the model found in the data on storage.
160 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model, std::string_view ntupleName,
161 std::string_view storage,
162 const RNTupleReadOptions &options = RNTupleReadOptions());
163 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model, const RNTuple &ntuple,
164 const RNTupleReadOptions &options = RNTupleReadOptions());
165
166 /// The caller imposes the way the model is reconstructed
167 static std::unique_ptr<RNTupleReader> Open(const RNTupleDescriptor::RCreateModelOptions &createModelOpts,
168 std::string_view ntupleName, std::string_view storage,
169 const RNTupleReadOptions &options = RNTupleReadOptions());
170 static std::unique_ptr<RNTupleReader> Open(const RNTupleDescriptor::RCreateModelOptions &createModelOpts,
171 const RNTuple &ntuple,
172 const RNTupleReadOptions &options = RNTupleReadOptions());
173 std::unique_ptr<RNTupleReader> Clone()
174 {
175 auto options = RNTupleReadOptions{};
177 return std::unique_ptr<RNTupleReader>(new RNTupleReader(fSource->Clone(), options));
178 }
180
181 NTupleSize_t GetNEntries() const { return fSource->GetNEntries(); }
182 const RNTupleModel &GetModel();
183 std::unique_ptr<REntry> CreateEntry();
184
185 /// Returns a cached copy of the page source descriptor. The returned pointer remains valid until the next call
186 /// to LoadEntry or to any of the views returned from the reader.
188
189 /// Prints a detailed summary of the ntuple, including a list of fields.
190 ///
191 /// **Example: print summary information to stdout**
192 /// ~~~ {.cpp}
193 /// #include <ROOT/RNTupleReader.hxx>
194 /// using ROOT::Experimental::ENTupleInfo;
195 /// using ROOT::Experimental::RNTupleReader;
196 ///
197 /// #include <iostream>
198 ///
199 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
200 /// ntuple->PrintInfo();
201 /// // or, equivalently:
202 /// ntuple->PrintInfo(ENTupleInfo::kSummary, std::cout);
203 /// ~~~
204 /// **Example: print detailed column storage data to stderr**
205 /// ~~~ {.cpp}
206 /// #include <ROOT/RNTupleReader.hxx>
207 /// using ROOT::Experimental::ENTupleInfo;
208 /// using ROOT::Experimental::RNTupleReader;
209 ///
210 /// #include <iostream>
211 ///
212 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
213 /// ntuple->PrintInfo(ENTupleInfo::kStorageDetails, std::cerr);
214 /// ~~~
215 ///
216 /// For use of ENTupleInfo::kMetrics, see #EnableMetrics.
217 void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout) const;
218
219 /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default,
220 /// prints the output in JSON format.
221 /// Uses the visitor pattern to traverse through each field of the given entry.
222 void Show(NTupleSize_t index, std::ostream &output = std::cout);
223
224 /// Analogous to Fill(), fills the default entry of the model. Returns false at the end of the ntuple.
225 /// On I/O errors, raises an exception.
227 {
228 // TODO(jblomer): can be templated depending on the factory method / constructor
229 if (R__unlikely(!fModel)) {
230 fModel = fSource->GetSharedDescriptorGuard()->CreateModel(
233 }
234 LoadEntry(index, fModel->GetDefaultEntry());
235 }
236 /// Fills a user provided entry after checking that the entry has been instantiated from the ntuple model
238
239 /// Returns an iterator over the entry indices of the RNTuple.
240 ///
241 /// **Example: iterate over all entries and print each entry in JSON format**
242 /// ~~~ {.cpp}
243 /// #include <ROOT/RNTupleReader.hxx>
244 /// using ROOT::Experimental::ENTupleShowFormat;
245 /// using ROOT::Experimental::RNTupleReader;
246 ///
247 /// #include <iostream>
248 ///
249 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
250 /// for (auto i : ntuple->GetEntryRange()) {
251 /// ntuple->Show(i);
252 /// }
253 /// ~~~
255
256 /// Provides access to an individual field that can contain either a scalar value or a collection, e.g.
257 /// GetView<double>("particles.pt") or GetView<std::vector<double>>("particle"). It can as well be the index
258 /// field of a collection itself, like GetView<NTupleSize_t>("particle").
259 ///
260 /// Raises an exception if there is no field with the given name.
261 ///
262 /// **Example: iterate over a field named "pt" of type `float`**
263 /// ~~~ {.cpp}
264 /// #include <ROOT/RNTupleReader.hxx>
265 /// using ROOT::Experimental::RNTupleReader;
266 ///
267 /// #include <iostream>
268 ///
269 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
270 /// auto pt = ntuple->GetView<float>("pt");
271 ///
272 /// for (auto i : ntuple->GetEntryRange()) {
273 /// std::cout << i << ": " << pt(i) << "\n";
274 /// }
275 /// ~~~
276 template <typename T>
278 {
280 }
281
282 template <typename T>
283 RNTupleView<T> GetView(std::string_view fieldName, std::shared_ptr<T> objPtr)
284 {
286 }
287
288 template <typename T>
290 {
292 }
293
294 template <typename T>
301
302 template <typename T>
309
310 template <typename T>
317
318 template <typename T>
323
324 template <typename T>
331
332 /// Raises an exception if:
333 /// * there is no field with the given name or,
334 /// * the field is not a collection
336 {
337 auto fieldId = fSource->GetSharedDescriptorGuard()->FindFieldId(fieldName);
339 throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '" +
340 fSource->GetSharedDescriptorGuard()->GetName() + "'"));
341 }
343 }
344
349
350 RIterator begin() { return RIterator(0); }
352
353 /// Enable performance measurements (decompression time, bytes read from storage, etc.)
354 ///
355 /// **Example: inspect the reader metrics after loading every entry**
356 /// ~~~ {.cpp}
357 /// #include <ROOT/RNTupleReader.hxx>
358 /// using ROOT::Experimental::ENTupleInfo;
359 /// using ROOT::Experimental::RNTupleReader;
360 ///
361 /// #include <iostream>
362 ///
363 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
364 /// // metrics must be turned on beforehand
365 /// ntuple->EnableMetrics();
366 ///
367 /// for (auto i : ntuple->GetEntryRange()) {
368 /// ntuple->LoadEntry(i);
369 /// }
370 /// ntuple->PrintInfo(ENTupleInfo::kMetrics);
371 /// ~~~
373 const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
374}; // class RNTupleReader
375
376} // namespace Experimental
377} // namespace ROOT
378
379#endif // ROOT7_RNTupleReader
#define R__unlikely(expr)
Definition RConfig.hxx:602
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
A collection of Counter objects with a name, a unit, and a description.
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:51
A view for a collection, that can itself generate new ntuple views for its nested fields.
static RNTupleCollectionView Create(DescriptorId_t fieldId, Internal::RPageSource *source)
The on-storage meta-data of an ntuple.
static RField< T > CreateField(DescriptorId_t fieldId, Internal::RPageSource &pageSource)
Used to loop over indexes (entries or collections) between start and end.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
bool operator!=(const iterator &rh) const
bool operator==(const iterator &rh) const
An RNTuple that is used to read data from storage.
RNTupleView< T > GetView(std::string_view fieldName, T *rawPtr)
DescriptorId_t RetrieveFieldId(std::string_view fieldName) const
void Show(NTupleSize_t index, std::ostream &output=std::cout)
Shows the values of the i-th entry/row, starting with 0 for the first entry.
RNTupleDirectAccessView< T > GetDirectAccessView(DescriptorId_t fieldId)
std::unique_ptr< RNTupleReader > Clone()
Detail::RNTupleMetrics fMetrics
RNTupleReader(std::unique_ptr< RNTupleModel > model, std::unique_ptr< Internal::RPageSource > source, const RNTupleReadOptions &options)
std::unique_ptr< RNTupleReader > fDisplayReader
We use a dedicated on-demand reader for Show() and Scan().
void EnableMetrics()
Enable performance measurements (decompression time, bytes read from storage, etc....
const Detail::RNTupleMetrics & GetMetrics() const
std::unique_ptr< REntry > CreateEntry()
RNTupleView< T > GetView(DescriptorId_t fieldId, std::shared_ptr< T > objPtr)
RNTupleView< T > GetView(std::string_view fieldName)
Provides access to an individual field that can contain either a scalar value or a collection,...
RNTupleView< T > GetView(std::string_view fieldName, std::shared_ptr< T > objPtr)
const RNTupleDescriptor & GetDescriptor()
Returns a cached copy of the page source descriptor.
std::optional< RNTupleDescriptor::RCreateModelOptions > fCreateModelOptions
If not nullopt, these will used when creating the model.
std::unique_ptr< RNTupleDescriptor > fCachedDescriptor
The ntuple descriptor in the page source is protected by a read-write lock.
RNTupleCollectionView GetCollectionView(std::string_view fieldName)
Raises an exception if:
std::unique_ptr< Internal::RPageSource > fSource
RNTupleDirectAccessView< T > GetDirectAccessView(std::string_view fieldName)
RNTupleCollectionView GetCollectionView(DescriptorId_t fieldId)
static std::unique_ptr< RNTupleReader > Open(std::string_view ntupleName, std::string_view storage, const RNTupleReadOptions &options=RNTupleReadOptions())
Open an RNTuple for reading.
RNTupleGlobalRange GetEntryRange()
Returns an iterator over the entry indices of the RNTuple.
std::unique_ptr< Internal::RPageStorage::RTaskScheduler > fUnzipTasks
Set as the page source's scheduler for parallel page decompression if IMT is on Needs to be destructe...
void InitPageSource(bool enableMetrics)
RNTupleView< T > GetView(DescriptorId_t fieldId)
void PrintInfo(const ENTupleInfo what=ENTupleInfo::kSummary, std::ostream &output=std::cout) const
Prints a detailed summary of the ntuple, including a list of fields.
void LoadEntry(NTupleSize_t index)
Analogous to Fill(), fills the default entry of the model.
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSource.
void LoadEntry(NTupleSize_t index, REntry &entry)
Fills a user provided entry after checking that the entry has been instantiated from the ntuple model...
void ConnectModel(RNTupleModel &model)
RNTupleView< T > GetView(DescriptorId_t fieldId, T *rawPtr)
static std::unique_ptr< RFieldBase > CreateField(DescriptorId_t fieldId, Internal::RPageSource &pageSource)
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:69
RNTupleGlobalRange GetFieldRange(const RFieldBase &field, const RPageSource &pageSource)
Helper to get the iteration space of the given field that needs to be connected to the given page sou...
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
static const char * what
Definition stlLoader.cc:5
static void output()