Logo ROOT  
Reference Guide
RNTuple.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTuple.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTuple
17#define ROOT7_RNTuple
18
19#include <ROOT/RConfig.hxx> // for R__unlikely
21#include <ROOT/RNTupleModel.hxx>
23#include <ROOT/RNTupleUtil.hxx>
24#include <ROOT/RNTupleView.hxx>
25#include <ROOT/RPageStorage.hxx>
26#include <ROOT/RSpan.hxx>
27#include <ROOT/RStringView.hxx>
28
29#include <iterator>
30#include <memory>
31#include <sstream>
32#include <utility>
33
34class TFile;
35
36namespace ROOT {
37namespace Experimental {
38
39class REntry;
40class RNTupleModel;
41
42namespace Detail {
43class RPageSink;
44class RPageSource;
45}
46
47
48/**
49 * Listing of the different options that can be printed by RNTupleReader::GetInfo()
50 */
51enum class ENTupleInfo {
52 kSummary, // The ntuple name, description, number of entries
53 kStorageDetails, // size on storage, page sizes, compression factor, etc.
54 kMetrics, // internals performance counters, requires that EnableMetrics() was called
55};
56
57/**
58 * Listing of the different entry output formats of RNTupleReader::Show()
59 */
61 kCurrentModelJSON, // prints a single entry/row with the current active model in JSON format.
62 kCompleteJSON, // prints a single entry/row with all the fields in JSON format.
63};
64
65
66#ifdef R__USE_IMT
67class TTaskGroup;
69private:
70 std::unique_ptr<TTaskGroup> fTaskGroup;
71public:
73 virtual ~RNTupleImtTaskScheduler() = default;
74 void Reset() final;
75 void AddTask(const std::function<void(void)> &taskFunc) final;
76 void Wait() final;
77};
78#endif
79
80// clang-format off
81/**
82\class ROOT::Experimental::RNTupleReader
83\ingroup NTuple
84\brief An RNTuple that is used to read data from storage
85
86An input ntuple provides data from storage as C++ objects. The ntuple model can be created from the data on storage
87or it can be imposed by the user. The latter case allows users to read into a specialized ntuple model that covers
88only a subset of the fields in the ntuple. The ntuple model is used when reading complete entries.
89Individual fields can be read as well by instantiating a tree view.
90
91~~~ {.cpp}
92#include <ROOT/RNTuple.hxx>
93using ROOT::Experimental::RNTupleReader;
94
95#include <iostream>
96
97auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
98std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n";
99~~~
100*/
101// clang-format on
103private:
104 /// Set as the page source's scheduler for parallel page decompression if IMT is on
105 /// Needs to be destructed after the pages source is destructed (an thus be declared before)
106 std::unique_ptr<Detail::RPageStorage::RTaskScheduler> fUnzipTasks;
107
108 std::unique_ptr<Detail::RPageSource> fSource;
109 /// Needs to be destructed before fSource
110 std::unique_ptr<RNTupleModel> fModel;
111 /// We use a dedicated on-demand reader for Show() and Scan(). Printing data uses all the fields
112 /// from the full model even if the analysis code uses only a subset of fields. The display reader
113 /// is a clone of the original reader.
114 std::unique_ptr<RNTupleReader> fDisplayReader;
116
117 void ConnectModel(const RNTupleModel &model);
118 RNTupleReader *GetDisplayReader();
119 void InitPageSource();
120
121public:
122 // Browse through the entries
123 class RIterator {
124 private:
126 public:
128 using iterator_category = std::forward_iterator_tag;
133
134 RIterator() = default;
135 explicit RIterator(NTupleSize_t index) : fIndex(index) {}
136 ~RIterator() = default;
137
138 iterator operator++(int) /* postfix */ { auto r = *this; fIndex++; return r; }
139 iterator& operator++() /* prefix */ { ++fIndex; return *this; }
140 reference operator* () { return fIndex; }
141 pointer operator->() { return &fIndex; }
142 bool operator==(const iterator& rh) const { return fIndex == rh.fIndex; }
143 bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
144 };
145
146 /// Used to specify the underlying RNTuples in OpenFriends()
147 struct ROpenSpec {
148 std::string fNTupleName;
149 std::string fStorage;
151
152 ROpenSpec() = default;
153 ROpenSpec(std::string_view n, std::string_view s) : fNTupleName(n), fStorage(s) {}
154 };
155
156 /// Throws an exception if the model is null.
157 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model,
158 std::string_view ntupleName,
159 std::string_view storage,
160 const RNTupleReadOptions &options = RNTupleReadOptions());
161 /// Open an RNTuple for reading.
162 ///
163 /// Throws an RException if there is no RNTuple with the given name.
164 ///
165 /// **Example: open an RNTuple and print the number of entries**
166 /// ~~~ {.cpp}
167 /// #include <ROOT/RNTuple.hxx>
168 /// using ROOT::Experimental::RNTupleReader;
169 ///
170 /// #include <iostream>
171 ///
172 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
173 /// std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n";
174 /// ~~~
175 static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName,
176 std::string_view storage,
177 const RNTupleReadOptions &options = RNTupleReadOptions());
178 /// Open RNTuples as one virtual, horizontally combined ntuple. The underlying RNTuples must
179 /// have an identical number of entries. Fields in the combined RNTuple are named with the ntuple name
180 /// as a prefix, e.g. myNTuple1.px and myNTuple2.pt (see tutorial ntpl006_friends)
181 static std::unique_ptr<RNTupleReader> OpenFriends(std::span<ROpenSpec> ntuples);
182
183 /// The user imposes an ntuple model, which must be compatible with the model found in the data on
184 /// storage.
185 ///
186 /// Throws an exception if the model or the source is null.
187 RNTupleReader(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Detail::RPageSource> source);
188 /// The model is generated from the ntuple metadata on storage
189 ///
190 /// Throws an exception if the source is null.
191 explicit RNTupleReader(std::unique_ptr<Detail::RPageSource> source);
192 std::unique_ptr<RNTupleReader> Clone() { return std::make_unique<RNTupleReader>(fSource->Clone()); }
194
195 RNTupleModel *GetModel();
196 NTupleSize_t GetNEntries() const { return fSource->GetNEntries(); }
197 const RNTupleDescriptor &GetDescriptor() const { return fSource->GetDescriptor(); }
198
199 /// Prints a detailed summary of the ntuple, including a list of fields.
200 ///
201 /// **Example: print summary information to stdout**
202 /// ~~~ {.cpp}
203 /// #include <ROOT/RNTuple.hxx>
204 /// using ROOT::Experimental::ENTupleInfo;
205 /// using ROOT::Experimental::RNTupleReader;
206 ///
207 /// #include <iostream>
208 ///
209 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
210 /// ntuple->PrintInfo();
211 /// // or, equivalently:
212 /// ntuple->PrintInfo(ENTupleInfo::kSummary, std::cout);
213 /// ~~~
214 /// **Example: print detailed column storage data to stderr**
215 /// ~~~ {.cpp}
216 /// #include <ROOT/RNTuple.hxx>
217 /// using ROOT::Experimental::ENTupleInfo;
218 /// using ROOT::Experimental::RNTupleReader;
219 ///
220 /// #include <iostream>
221 ///
222 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
223 /// ntuple->PrintInfo(ENTupleInfo::kStorageDetails, std::cerr);
224 /// ~~~
225 ///
226 /// For use of ENTupleInfo::kMetrics, see #EnableMetrics.
227 void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout);
228
229 /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default,
230 /// prints the output in JSON format.
231 /// Uses the visitor pattern to traverse through each field of the given entry.
233 std::ostream &output = std::cout);
234
235 /// Analogous to Fill(), fills the default entry of the model. Returns false at the end of the ntuple.
236 /// On I/O errors, raises an exception.
238 // TODO(jblomer): can be templated depending on the factory method / constructor
239 if (R__unlikely(!fModel)) {
240 fModel = fSource->GetDescriptor().GenerateModel();
241 ConnectModel(*fModel);
242 }
243 LoadEntry(index, *fModel->GetDefaultEntry());
244 }
245 /// Fills a user provided entry after checking that the entry has been instantiated from the ntuple model
246 void LoadEntry(NTupleSize_t index, REntry &entry) {
247 for (auto& value : entry) {
248 value.GetField()->Read(index, &value);
249 }
250 }
251
252 /// Returns an iterator over the entry indices of the RNTuple.
253 ///
254 /// **Example: iterate over all entries and print each entry in JSON format**
255 /// ~~~ {.cpp}
256 /// #include <ROOT/RNTuple.hxx>
257 /// using ROOT::Experimental::ENTupleShowFormat;
258 /// using ROOT::Experimental::RNTupleReader;
259 ///
260 /// #include <iostream>
261 ///
262 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
263 /// for (auto i : ntuple->GetEntryRange()) {
264 /// ntuple->Show(i, ENTupleShowFormat::kCompleteJSON);
265 /// }
266 /// ~~~
268
269 /// Provides access to an individual field that can contain either a scalar value or a collection, e.g.
270 /// GetView<double>("particles.pt") or GetView<std::vector<double>>("particle"). It can as well be the index
271 /// field of a collection itself, like GetView<NTupleSize_t>("particle").
272 ///
273 /// Raises an exception if there is no field with the given name.
274 ///
275 /// **Example: iterate over a field named "pt" of type `float`**
276 /// ~~~ {.cpp}
277 /// #include <ROOT/RNTuple.hxx>
278 /// using ROOT::Experimental::RNTupleReader;
279 ///
280 /// #include <iostream>
281 ///
282 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
283 /// auto pt = ntuple->GetView<float>("pt");
284 ///
285 /// for (auto i : ntuple->GetEntryRange()) {
286 /// std::cout << i << ": " << pt(i) << "\n";
287 /// }
288 /// ~~~
289 template <typename T>
291 auto fieldId = fSource->GetDescriptor().FindFieldId(fieldName);
292 if (fieldId == kInvalidDescriptorId) {
293 throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '"
294 + fSource->GetDescriptor().GetName() + "'"
295 ));
296 }
297 return RNTupleView<T>(fieldId, fSource.get());
298 }
299
300 /// Raises an exception if:
301 /// * there is no field with the given name or,
302 /// * the field is not a collection
304 auto fieldId = fSource->GetDescriptor().FindFieldId(fieldName);
305 if (fieldId == kInvalidDescriptorId) {
306 throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '"
307 + fSource->GetDescriptor().GetName() + "'"
308 ));
309 }
310 return RNTupleViewCollection(fieldId, fSource.get());
311 }
312
313 RIterator begin() { return RIterator(0); }
314 RIterator end() { return RIterator(GetNEntries()); }
315
316 /// Enable performance measurements (decompression time, bytes read from storage, etc.)
317 ///
318 /// **Example: inspect the reader metrics after loading every entry**
319 /// ~~~ {.cpp}
320 /// #include <ROOT/RNTuple.hxx>
321 /// using ROOT::Experimental::ENTupleInfo;
322 /// using ROOT::Experimental::RNTupleReader;
323 ///
324 /// #include <iostream>
325 ///
326 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
327 /// // metrics must be turned on beforehand
328 /// ntuple->EnableMetrics();
329 ///
330 /// for (auto i : ntuple->GetEntryRange()) {
331 /// ntuple->LoadEntry(i);
332 /// }
333 /// ntuple->PrintInfo(ENTupleInfo::kMetrics);
334 /// ~~~
335 void EnableMetrics() { fMetrics.Enable(); }
336 const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
337};
338
339// clang-format off
340/**
341\class ROOT::Experimental::RNTupleWriter
342\ingroup NTuple
343\brief An RNTuple that gets filled with entries (data) and writes them to storage
344
345An output ntuple can be filled with entries. The caller has to make sure that the data that gets filled into an ntuple
346is not modified for the time of the Fill() call. The fill call serializes the C++ object into the column format and
347writes data into the corresponding column page buffers. Writing of the buffers to storage is deferred and can be
348triggered by Flush() or by destructing the ntuple. On I/O errors, an exception is thrown.
349*/
350// clang-format on
352private:
353 /// The page sink's parallel page compression scheduler if IMT is on.
354 /// Needs to be destructed after the page sink is destructed and so declared before.
355 std::unique_ptr<Detail::RPageStorage::RTaskScheduler> fZipTasks;
356 std::unique_ptr<Detail::RPageSink> fSink;
357 /// Needs to be destructed before fSink
358 std::unique_ptr<RNTupleModel> fModel;
362
363public:
364 /// Throws an exception if the model is null.
365 static std::unique_ptr<RNTupleWriter> Recreate(std::unique_ptr<RNTupleModel> model,
366 std::string_view ntupleName,
367 std::string_view storage,
368 const RNTupleWriteOptions &options = RNTupleWriteOptions());
369 /// Throws an exception if the model is null.
370 static std::unique_ptr<RNTupleWriter> Append(std::unique_ptr<RNTupleModel> model,
371 std::string_view ntupleName,
372 TFile &file,
373 const RNTupleWriteOptions &options = RNTupleWriteOptions());
374 /// Throws an exception if the model or the sink is null.
375 RNTupleWriter(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Detail::RPageSink> sink);
376 RNTupleWriter(const RNTupleWriter&) = delete;
379
380 /// The simplest user interface if the default entry that comes with the ntuple model is used
381 void Fill() { Fill(*fModel->GetDefaultEntry()); }
382 /// Multiple entries can have been instantiated from the tnuple model. This method will perform
383 /// a light check whether the entry comes from the ntuple's own model
384 void Fill(REntry &entry) {
385 for (auto& value : entry) {
386 value.GetField()->Append(value);
387 }
388 fNEntries++;
389 if ((fNEntries % fSink->GetWriteOptions().GetNEntriesPerCluster()) == 0)
390 CommitCluster();
391 }
392 /// Ensure that the data from the so far seen Fill calls has been written to storage
393 void CommitCluster();
394
395 std::unique_ptr<REntry> CreateEntry() { return fModel->CreateEntry(); }
396
397 void EnableMetrics() { fMetrics.Enable(); }
398 const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
399};
400
401// clang-format off
402/**
403\class ROOT::Experimental::RCollectionNTuple
404\ingroup NTuple
405\brief A virtual ntuple used for writing untyped collections that can be used to some extent like an RNTupleWriter
406*
407* This class is between a field and a ntuple. It carries the offset column for the collection and the default entry
408* taken from the collection model. It does not, however, own an ntuple model because the collection model has been
409* merged into the larger ntuple model.
410*/
411// clang-format on
413private:
415 std::unique_ptr<REntry> fDefaultEntry;
416public:
417 explicit RCollectionNTupleWriter(std::unique_ptr<REntry> defaultEntry);
421
422 void Fill() { Fill(fDefaultEntry.get()); }
423 void Fill(REntry *entry) {
424 for (auto &value : *entry) {
425 value.GetField()->Append(value);
426 }
427 fOffset++;
428 }
429
430 ClusterSize_t *GetOffsetPtr() { return &fOffset; }
431};
432
433} // namespace Experimental
434} // namespace ROOT
435
436#endif
ROOT::R::TRInterface & r
Definition: Object.C:4
#define R__unlikely(expr)
Definition: RConfig.hxx:597
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition: RError.hxx:291
A collection of Counter objects with a name, a unit, and a description.
The interface of a task scheduler to schedule page (de)compression tasks.
RCollectionNTupleWriter(const RCollectionNTupleWriter &)=delete
RCollectionNTupleWriter & operator=(const RCollectionNTupleWriter &)=delete
std::unique_ptr< REntry > fDefaultEntry
Definition: RNTuple.hxx:415
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition: REntry.hxx:42
Base class for all ROOT issued exceptions.
Definition: RError.hxx:114
The on-storage meta-data of an ntuple.
Used to loop over indexes (entries or collections) between start and end.
Definition: RNTupleView.hxx:40
void Reset() final
Start a new set of tasks.
Definition: RNTuple.cxx:48
std::unique_ptr< TTaskGroup > fTaskGroup
Definition: RNTuple.hxx:70
void AddTask(const std::function< void(void)> &taskFunc) final
Take a callable that represents a task.
Definition: RNTuple.cxx:54
void Wait() final
Blocks until all scheduled tasks finished.
Definition: RNTuple.cxx:60
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
bool operator!=(const iterator &rh) const
Definition: RNTuple.hxx:143
bool operator==(const iterator &rh) const
Definition: RNTuple.hxx:142
std::forward_iterator_tag iterator_category
Definition: RNTuple.hxx:128
An RNTuple that is used to read data from storage.
Definition: RNTuple.hxx:102
std::unique_ptr< RNTupleReader > Clone()
Definition: RNTuple.hxx:192
Detail::RNTupleMetrics fMetrics
Definition: RNTuple.hxx:115
std::unique_ptr< Detail::RPageStorage::RTaskScheduler > fUnzipTasks
Set as the page source's scheduler for parallel page decompression if IMT is on Needs to be destructe...
Definition: RNTuple.hxx:106
std::unique_ptr< RNTupleReader > fDisplayReader
We use a dedicated on-demand reader for Show() and Scan().
Definition: RNTuple.hxx:114
void EnableMetrics()
Enable performance measurements (decompression time, bytes read from storage, etc....
Definition: RNTuple.hxx:335
const Detail::RNTupleMetrics & GetMetrics() const
Definition: RNTuple.hxx:336
const RNTupleDescriptor & GetDescriptor() const
Definition: RNTuple.hxx:197
RNTupleView< T > GetView(std::string_view fieldName)
Provides access to an individual field that can contain either a scalar value or a collection,...
Definition: RNTuple.hxx:290
NTupleSize_t GetNEntries() const
Definition: RNTuple.hxx:196
std::unique_ptr< Detail::RPageSource > fSource
Definition: RNTuple.hxx:108
RNTupleGlobalRange GetEntryRange()
Returns an iterator over the entry indices of the RNTuple.
Definition: RNTuple.hxx:267
RNTupleViewCollection GetViewCollection(std::string_view fieldName)
Raises an exception if:
Definition: RNTuple.hxx:303
void LoadEntry(NTupleSize_t index)
Analogous to Fill(), fills the default entry of the model.
Definition: RNTuple.hxx:237
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSource.
Definition: RNTuple.hxx:110
void LoadEntry(NTupleSize_t index, REntry &entry)
Fills a user provided entry after checking that the entry has been instantiated from the ntuple model...
Definition: RNTuple.hxx:246
A view for a collection, that can itself generate new ntuple views for its nested fields.
An RNTupleView provides read-only access to a single field of the ntuple.
Common user-tunable settings for storing ntuples.
An RNTuple that gets filled with entries (data) and writes them to storage.
Definition: RNTuple.hxx:351
const Detail::RNTupleMetrics & GetMetrics() const
Definition: RNTuple.hxx:398
std::unique_ptr< REntry > CreateEntry()
Definition: RNTuple.hxx:395
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSink.
Definition: RNTuple.hxx:358
RNTupleWriter(const RNTupleWriter &)=delete
Detail::RNTupleMetrics fMetrics
Definition: RNTuple.hxx:359
RNTupleWriter & operator=(const RNTupleWriter &)=delete
std::unique_ptr< Detail::RPageSink > fSink
Definition: RNTuple.hxx:356
void Fill()
The simplest user interface if the default entry that comes with the ntuple model is used.
Definition: RNTuple.hxx:381
std::unique_ptr< Detail::RPageStorage::RTaskScheduler > fZipTasks
The page sink's parallel page compression scheduler if IMT is on.
Definition: RNTuple.hxx:355
void Fill(REntry &entry)
Multiple entries can have been instantiated from the tnuple model.
Definition: RNTuple.hxx:384
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
const Int_t n
Definition: legend1.C:16
basic_string_view< char > string_view
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
Definition: RNTuple.hxx:51
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:77
constexpr NTupleSize_t kInvalidNTupleIndex
Definition: RNTupleUtil.hxx:78
REveVectorT< TT > operator*(const REveVectorT< TT > &a, TT b)
Definition: REveVector.hxx:218
ENTupleShowFormat
Listing of the different entry output formats of RNTupleReader::Show()
Definition: RNTuple.hxx:60
constexpr DescriptorId_t kInvalidDescriptorId
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:152
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
static constexpr double s
Definition: file.py:1
static const char * what
Definition: stlLoader.cc:6
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
Definition: RNTupleUtil.hxx:80
Used to specify the underlying RNTuples in OpenFriends()
Definition: RNTuple.hxx:147
ROpenSpec(std::string_view n, std::string_view s)
Definition: RNTuple.hxx:153
static void output(int code)
Definition: gifencode.c:226