Logo ROOT  
Reference Guide
RNTuple.hxx
Go to the documentation of this file.
1 /// \file ROOT/RNTuple.hxx
2 /// \ingroup NTuple ROOT7
3 /// \author Jakob Blomer <jblomer@cern.ch>
4 /// \date 2018-10-04
5 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6 /// is welcome!
7 
8 /*************************************************************************
9  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10  * All rights reserved. *
11  * *
12  * For the licensing terms see $ROOTSYS/LICENSE. *
13  * For the list of contributors see $ROOTSYS/README/CREDITS. *
14  *************************************************************************/
15 
16 #ifndef ROOT7_RNTuple
17 #define ROOT7_RNTuple
18 
19 #include <ROOT/RConfig.hxx> // for R__unlikely
20 #include <ROOT/RNTupleMetrics.hxx>
21 #include <ROOT/RNTupleModel.hxx>
22 #include <ROOT/RNTupleOptions.hxx>
23 #include <ROOT/RNTupleUtil.hxx>
24 #include <ROOT/RNTupleView.hxx>
25 #include <ROOT/RPageStorage.hxx>
26 #include <ROOT/RStringView.hxx>
27 
28 #include <iterator>
29 #include <memory>
30 #include <sstream>
31 #include <utility>
32 
33 class TFile;
34 
35 namespace ROOT {
36 namespace Experimental {
37 
38 class REntry;
39 class RNTupleModel;
40 
41 namespace Detail {
42 class RPageSink;
43 class RPageSource;
44 }
45 
46 
47 /**
48  * Listing of the different options that can be printed by RNTupleReader::GetInfo()
49  */
50 enum class ENTupleInfo {
51  kSummary, // The ntuple name, description, number of entries
52  kStorageDetails, // size on storage, page sizes, compression factor, etc.
53  kMetrics, // internals performance counters, requires that EnableMetrics() was called
54 };
55 
56 /**
57  * Listing of the different entry output formats of RNTupleReader::Show()
58  */
59 enum class ENTupleShowFormat {
60  kCurrentModelJSON, // prints a single entry/row with the current active model in JSON format.
61  kCompleteJSON, // prints a single entry/row with all the fields in JSON format.
62 };
63 
64 
65 #ifdef R__USE_IMT
66 class TTaskGroup;
68 private:
69  std::unique_ptr<TTaskGroup> fTaskGroup;
70 public:
71  virtual ~RNTupleImtTaskScheduler() = default;
72  void Reset() final;
73  void AddTask(const std::function<void(void)> &taskFunc) final;
74  void Wait() final;
75 };
76 #endif
77 
78 // clang-format off
79 /**
80 \class ROOT::Experimental::RNTupleReader
81 \ingroup NTuple
82 \brief An RNTuple that is used to read data from storage
83 
84 An input ntuple provides data from storage as C++ objects. The ntuple model can be created from the data on storage
85 or it can be imposed by the user. The latter case allows users to read into a specialized ntuple model that covers
86 only a subset of the fields in the ntuple. The ntuple model is used when reading complete entries.
87 Individual fields can be read as well by instantiating a tree view.
88 */
89 // clang-format on
91 private:
92  /// Set as the page source's scheduler for parallel page decompression if IMT is on
93  /// Needs to be destructed after the pages source is destructed (an thus be declared before)
94  std::unique_ptr<Detail::RPageStorage::RTaskScheduler> fUnzipTasks;
95 
96  std::unique_ptr<Detail::RPageSource> fSource;
97  /// Needs to be destructed before fSource
98  std::unique_ptr<RNTupleModel> fModel;
99  /// We use a dedicated on-demand reader for Show() and Scan(). Printing data uses all the fields
100  /// from the full model even if the analysis code uses only a subset of fields. The display reader
101  /// is a clone of the original reader.
102  std::unique_ptr<RNTupleReader> fDisplayReader;
104 
105  void ConnectModel(const RNTupleModel &model);
106  RNTupleReader *GetDisplayReader();
107  void InitPageSource();
108 
109 public:
110  // Browse through the entries
111  class RIterator {
112  private:
114  public:
116  using iterator_category = std::forward_iterator_tag;
121 
122  RIterator() = default;
123  explicit RIterator(NTupleSize_t index) : fIndex(index) {}
124  ~RIterator() = default;
125 
126  iterator operator++(int) /* postfix */ { auto r = *this; fIndex++; return r; }
127  iterator& operator++() /* prefix */ { ++fIndex; return *this; }
128  reference operator* () { return fIndex; }
129  pointer operator->() { return &fIndex; }
130  bool operator==(const iterator& rh) const { return fIndex == rh.fIndex; }
131  bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
132  };
133 
134 
135  static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model,
136  std::string_view ntupleName,
137  std::string_view storage,
138  const RNTupleReadOptions &options = RNTupleReadOptions());
139  static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName,
140  std::string_view storage,
141  const RNTupleReadOptions &options = RNTupleReadOptions());
142 
143  /// The user imposes an ntuple model, which must be compatible with the model found in the data on storage
144  RNTupleReader(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Detail::RPageSource> source);
145  /// The model is generated from the ntuple metadata on storage
146  explicit RNTupleReader(std::unique_ptr<Detail::RPageSource> source);
147  std::unique_ptr<RNTupleReader> Clone() { return std::make_unique<RNTupleReader>(fSource->Clone()); }
149 
150  RNTupleModel *GetModel();
151  NTupleSize_t GetNEntries() const { return fSource->GetNEntries(); }
152  const RNTupleDescriptor &GetDescriptor() const { return fSource->GetDescriptor(); }
153 
154  /// Prints a detailed summary of the ntuple, including a list of fields.
155  void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout);
156 
157  /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default,
158  /// prints the output in JSON format.
159  /// Uses the visitor pattern to traverse through each field of the given entry.
161  std::ostream &output = std::cout);
162 
163  /// Analogous to Fill(), fills the default entry of the model. Returns false at the end of the ntuple.
164  /// On I/O errors, raises an exception.
165  void LoadEntry(NTupleSize_t index) { LoadEntry(index, *fModel->GetDefaultEntry()); }
166  /// Fills a user provided entry after checking that the entry has been instantiated from the ntuple model
167  void LoadEntry(NTupleSize_t index, REntry &entry) {
168  // TODO(jblomer): can be templated depending on the factory method / constructor
169  if (R__unlikely(!fModel)) {
170  fModel = fSource->GetDescriptor().GenerateModel();
171  ConnectModel(*fModel);
172  }
173 
174  for (auto& value : entry) {
175  value.GetField()->Read(index, &value);
176  }
177  }
178 
179  RNTupleGlobalRange GetEntryRange() { return RNTupleGlobalRange(0, GetNEntries()); }
180 
181  /// Provides access to an individual field that can contain either a scalar value or a collection, e.g.
182  /// GetView<double>("particles.pt") or GetView<std::vector<double>>("particle"). It can as well be the index
183  /// field of a collection itself, like GetView<NTupleSize_t>("particle")
184  template <typename T>
186  auto fieldId = fSource->GetDescriptor().FindFieldId(fieldName);
187  return RNTupleView<T>(fieldId, fSource.get());
188  }
190  auto fieldId = fSource->GetDescriptor().FindFieldId(fieldName);
191  return RNTupleViewCollection(fieldId, fSource.get());
192  }
193 
194  RIterator begin() { return RIterator(0); }
195  RIterator end() { return RIterator(GetNEntries()); }
196 
197  void EnableMetrics() { fMetrics.Enable(); }
198  const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
199 };
200 
201 // clang-format off
202 /**
203 \class ROOT::Experimental::RNTupleWriter
204 \ingroup NTuple
205 \brief An RNTuple that gets filled with entries (data) and writes them to storage
206 
207 An output ntuple can be filled with entries. The caller has to make sure that the data that gets filled into an ntuple
208 is not modified for the time of the Fill() call. The fill call serializes the C++ object into the column format and
209 writes data into the corresponding column page buffers. Writing of the buffers to storage is deferred and can be
210 triggered by Flush() or by destructing the ntuple. On I/O errors, an exception is thrown.
211 */
212 // clang-format on
214 private:
215  static constexpr NTupleSize_t kDefaultClusterSizeEntries = 64000;
216  std::unique_ptr<Detail::RPageSink> fSink;
217  /// Needs to be destructed before fSink
218  std::unique_ptr<RNTupleModel> fModel;
222 
223 public:
224  static std::unique_ptr<RNTupleWriter> Recreate(std::unique_ptr<RNTupleModel> model,
225  std::string_view ntupleName,
226  std::string_view storage,
227  const RNTupleWriteOptions &options = RNTupleWriteOptions());
228  static std::unique_ptr<RNTupleWriter> Append(std::unique_ptr<RNTupleModel> model,
229  std::string_view ntupleName,
230  TFile &file,
231  const RNTupleWriteOptions &options = RNTupleWriteOptions());
232  RNTupleWriter(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Detail::RPageSink> sink);
233  RNTupleWriter(const RNTupleWriter&) = delete;
235  ~RNTupleWriter();
236 
237  /// The simplest user interface if the default entry that comes with the ntuple model is used
238  void Fill() { Fill(*fModel->GetDefaultEntry()); }
239  /// Multiple entries can have been instantiated from the tnuple model. This method will perform
240  /// a light check whether the entry comes from the ntuple's own model
241  void Fill(REntry &entry) {
242  for (auto& value : entry) {
243  value.GetField()->Append(value);
244  }
245  fNEntries++;
246  if ((fNEntries % fClusterSizeEntries) == 0)
247  CommitCluster();
248  }
249  /// Ensure that the data from the so far seen Fill calls has been written to storage
250  void CommitCluster();
251 };
252 
253 // clang-format off
254 /**
255 \class ROOT::Experimental::RCollectionNTuple
256 \ingroup NTuple
257 \brief A virtual ntuple for collections that can be used to some extent like a real ntuple
258 *
259 * This class is between a field and a ntuple. It carries the offset column for the collection and the default entry
260 * taken from the collection model. It does not, however, have a tree model because the collection model has been merged
261 * into the larger ntuple model.
262 */
263 // clang-format on
265 private:
267  std::unique_ptr<REntry> fDefaultEntry;
268 public:
269  explicit RCollectionNTuple(std::unique_ptr<REntry> defaultEntry);
272  ~RCollectionNTuple() = default;
273 
274  void Fill() { Fill(fDefaultEntry.get()); }
275  void Fill(REntry *entry) {
276  for (auto& treeValue : *entry) {
277  treeValue.GetField()->Append(treeValue);
278  }
279  fOffset++;
280  }
281 
282  ClusterSize_t* GetOffsetPtr() { return &fOffset; }
283 };
284 
285 } // namespace Experimental
286 } // namespace ROOT
287 
288 #endif
ROOT::Experimental::RNTupleWriteOptions
Common user-tunable settings for storing ntuples.
Definition: RNTupleOptions.hxx:46
ROOT::Experimental::RCollectionNTuple
A virtual ntuple for collections that can be used to some extent like a real ntuple.
Definition: RNTuple.hxx:264
ROOT::Experimental::RNTupleReader::~RNTupleReader
~RNTupleReader()
ROOT::Experimental::RNTupleWriter::Fill
void Fill(REntry &entry)
Multiple entries can have been instantiated from the tnuple model.
Definition: RNTuple.hxx:241
ROOT::Experimental::ENTupleInfo
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
Definition: RNTuple.hxx:50
ROOT::Experimental::RNTupleReader::GetView
RNTupleView< T > GetView(std::string_view fieldName)
Provides access to an individual field that can contain either a scalar value or a collection,...
Definition: RNTuple.hxx:185
RNTupleMetrics.hxx
RNTupleModel.hxx
ROOT::Experimental::RNTupleReader::LoadEntry
void LoadEntry(NTupleSize_t index, REntry &entry)
Fills a user provided entry after checking that the entry has been instantiated from the ntuple model...
Definition: RNTuple.hxx:167
ROOT::Experimental::ENTupleInfo::kSummary
@ kSummary
ROOT::Experimental::RNTupleImtTaskScheduler
Definition: RNTuple.hxx:67
ROOT::Experimental::RCollectionNTuple::RCollectionNTuple
RCollectionNTuple(const RCollectionNTuple &)=delete
ROOT::Experimental::Detail::RNTupleMetrics
A collection of Counter objects with a name, a unit, and a description.
Definition: RNTupleMetrics.hxx:285
ROOT::Experimental::Detail::RPageStorage::RTaskScheduler
The interface of a task scheduler to schedule page (de)compression tasks.
Definition: RPageStorage.hxx:64
ROOT::Experimental::RCollectionNTuple::fOffset
ClusterSize_t fOffset
Definition: RNTuple.hxx:266
ROOT::Experimental::RNTupleReader::RIterator::value_type
NTupleSize_t value_type
Definition: RNTuple.hxx:117
ROOT::Experimental::RNTupleImtTaskScheduler::Reset
void Reset() final
Start a new set of tasks.
Definition: RNTuple.cxx:41
ROOT::Experimental::RNTupleWriter::fNEntries
NTupleSize_t fNEntries
Definition: RNTuple.hxx:221
ROOT::Experimental::RNTupleReader::GetEntryRange
RNTupleGlobalRange GetEntryRange()
Definition: RNTuple.hxx:179
ROOT::Experimental::RNTupleWriter::fModel
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSink.
Definition: RNTuple.hxx:218
ROOT::Experimental::RNTupleWriter::operator=
RNTupleWriter & operator=(const RNTupleWriter &)=delete
ROOT::Experimental::RCollectionNTuple::operator=
RCollectionNTuple & operator=(const RCollectionNTuple &)=delete
ROOT::Experimental::RNTupleReader::EnableMetrics
void EnableMetrics()
Definition: RNTuple.hxx:197
ROOT::Experimental::RCollectionNTuple::~RCollectionNTuple
~RCollectionNTuple()=default
r
ROOT::R::TRInterface & r
Definition: Object.C:4
ROOT::Experimental::RNTupleWriter::Fill
void Fill()
The simplest user interface if the default entry that comes with the ntuple model is used.
Definition: RNTuple.hxx:238
ROOT::Experimental::RNTupleWriter::fLastCommitted
NTupleSize_t fLastCommitted
Definition: RNTuple.hxx:220
ROOT::Experimental::ENTupleShowFormat
ENTupleShowFormat
Listing of the different entry output formats of RNTupleReader::Show()
Definition: RNTuple.hxx:59
output
static void output(int code)
Definition: gifencode.c:226
string_view
basic_string_view< char > string_view
Definition: libcpp_string_view.h:785
ROOT::Experimental::RNTupleReader::fModel
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSource.
Definition: RNTuple.hxx:98
ROOT::Experimental::ENTupleShowFormat::kCompleteJSON
@ kCompleteJSON
ROOT::Experimental::RNTupleReader::GetNEntries
NTupleSize_t GetNEntries() const
Definition: RNTuple.hxx:151
ROOT::Experimental::RNTupleReader::LoadEntry
void LoadEntry(NTupleSize_t index)
Analogous to Fill(), fills the default entry of the model.
Definition: RNTuple.hxx:165
ROOT::Experimental::RNTupleImtTaskScheduler::Wait
void Wait() final
Blocks until all scheduled tasks finished.
Definition: RNTuple.cxx:53
ROOT::Experimental::RCollectionNTuple::Fill
void Fill(REntry *entry)
Definition: RNTuple.hxx:275
ROOT::Experimental::NTupleSize_t
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:55
ROOT::Experimental::RNTupleReadOptions
Common user-tunable settings for reading ntuples.
Definition: RNTupleOptions.hxx:71
ROOT::Experimental::ENTupleInfo::kMetrics
@ kMetrics
ROOT::Experimental::RNTupleReader::GetDescriptor
const RNTupleDescriptor & GetDescriptor() const
Definition: RNTuple.hxx:152
RConfig.hxx
ROOT::Experimental::RNTupleWriter::fSink
std::unique_ptr< Detail::RPageSink > fSink
Definition: RNTuple.hxx:216
ROOT::Experimental::Detail::RNTupleMetrics::Enable
void Enable()
Definition: RNTupleMetrics.cxx:77
ROOT::Experimental::RNTupleView
An RNTupleView provides read-only access to a single field of the ntuple.
Definition: RNTupleView.hxx:152
ROOT::Experimental::REntry
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition: REntry.hxx:42
ROOT::Experimental::RNTupleReader::RIterator::operator++
iterator operator++(int)
Definition: RNTuple.hxx:126
ROOT::Experimental::RNTupleReader::RIterator::pointer
NTupleSize_t * pointer
Definition: RNTuple.hxx:119
ROOT::Experimental::RNTupleReader::RIterator::iterator_category
std::forward_iterator_tag iterator_category
Definition: RNTuple.hxx:116
ROOT::Experimental::RNTupleWriter::fClusterSizeEntries
NTupleSize_t fClusterSizeEntries
Definition: RNTuple.hxx:219
RPageStorage.hxx
ROOT::Experimental::RNTupleReader::RIterator::operator==
bool operator==(const iterator &rh) const
Definition: RNTuple.hxx:130
ROOT::Experimental::RNTupleDescriptor
The on-storage meta-data of an ntuple.
Definition: RNTupleDescriptor.hxx:279
ROOT::Experimental::RNTupleReader::RIterator::RIterator
RIterator()=default
ROOT::Experimental::RCollectionNTuple::fDefaultEntry
std::unique_ptr< REntry > fDefaultEntry
Definition: RNTuple.hxx:267
ROOT::Experimental::RNTupleReader::RIterator::RIterator
RIterator(NTupleSize_t index)
Definition: RNTuple.hxx:123
RStringView.hxx
ROOT::Experimental::kInvalidNTupleIndex
constexpr NTupleSize_t kInvalidNTupleIndex
Definition: RNTupleUtil.hxx:56
ROOT::Experimental::RNTupleWriter
An RNTuple that gets filled with entries (data) and writes them to storage.
Definition: RNTuple.hxx:213
ROOT::Experimental::RNTupleViewCollection
A view for a collection, that can itself generate new ntuple views for its nested fields.
Definition: RNTupleView.hxx:210
ROOT::Experimental::RNTupleReader::RIterator::reference
NTupleSize_t & reference
Definition: RNTuple.hxx:120
ROOT::R::function
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:151
R__unlikely
#define R__unlikely(expr)
Definition: RConfig.hxx:604
ROOT::Experimental::RNTupleModel
The RNTupleModel encapulates the schema of an ntuple.
Definition: RNTupleModel.hxx:46
ROOT::Experimental::RNTupleImtTaskScheduler::~RNTupleImtTaskScheduler
virtual ~RNTupleImtTaskScheduler()=default
ROOT::Experimental::RCollectionNTuple::GetOffsetPtr
ClusterSize_t * GetOffsetPtr()
Definition: RNTuple.hxx:282
what
static const char * what
Definition: stlLoader.cc:6
ROOT::Experimental::RNTupleReader::end
RIterator end()
Definition: RNTuple.hxx:195
RNTupleUtil.hxx
TFile
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
ROOT::Experimental::RNTupleReader::fSource
std::unique_ptr< Detail::RPageSource > fSource
Definition: RNTuple.hxx:96
ROOT::Experimental::RNTupleGlobalRange
Used to loop over indexes (entries or collections) between start and end.
Definition: RNTupleView.hxx:40
ROOT::Experimental::RClusterSize
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
Definition: RNTupleUtil.hxx:58
ROOT::Experimental::RNTupleWriter::RNTupleWriter
RNTupleWriter(const RNTupleWriter &)=delete
ROOT::Experimental::RNTupleImtTaskScheduler::fTaskGroup
std::unique_ptr< TTaskGroup > fTaskGroup
Definition: RNTuple.hxx:69
ROOT::Experimental::RNTupleReader::fMetrics
Detail::RNTupleMetrics fMetrics
Definition: RNTuple.hxx:103
ROOT::Experimental::RNTupleReader::Clone
std::unique_ptr< RNTupleReader > Clone()
Definition: RNTuple.hxx:147
ROOT::Experimental::RNTupleReader::fDisplayReader
std::unique_ptr< RNTupleReader > fDisplayReader
We use a dedicated on-demand reader for Show() and Scan().
Definition: RNTuple.hxx:102
ROOT::Experimental::RNTupleReader::RIterator::operator++
iterator & operator++()
Definition: RNTuple.hxx:127
ROOT::Experimental::ENTupleInfo::kStorageDetails
@ kStorageDetails
ROOT::Experimental::ENTupleShowFormat::kCurrentModelJSON
@ kCurrentModelJSON
file
Definition: file.py:1
ROOT::Experimental::RNTupleReader::RIterator::fIndex
NTupleSize_t fIndex
Definition: RNTuple.hxx:113
ROOT::Experimental::RNTupleReader::RIterator
Definition: RNTuple.hxx:111
ROOT::Experimental::RNTupleReader::RIterator::operator!=
bool operator!=(const iterator &rh) const
Definition: RNTuple.hxx:131
ROOT::Experimental::RNTupleReader::begin
RIterator begin()
Definition: RNTuple.hxx:194
ROOT::Experimental::RNTupleReader::fUnzipTasks
std::unique_ptr< Detail::RPageStorage::RTaskScheduler > fUnzipTasks
Set as the page source's scheduler for parallel page decompression if IMT is on Needs to be destructe...
Definition: RNTuple.hxx:94
ROOT::Experimental::operator*
REveVectorT< TT > operator*(const REveVectorT< TT > &a, TT b)
Definition: REveVector.hxx:218
ROOT::Experimental::RNTupleReader::RIterator::operator->
pointer operator->()
Definition: RNTuple.hxx:129
make_cnn_model.model
model
Definition: make_cnn_model.py:6
ROOT::Experimental::RNTupleReader::GetViewCollection
RNTupleViewCollection GetViewCollection(std::string_view fieldName)
Definition: RNTuple.hxx:189
ROOT::Experimental::RNTupleReader::GetMetrics
const Detail::RNTupleMetrics & GetMetrics() const
Definition: RNTuple.hxx:198
ROOT::Experimental::RNTupleImtTaskScheduler::AddTask
void AddTask(const std::function< void(void)> &taskFunc) final
Take a callable that represents a task.
Definition: RNTuple.cxx:47
ROOT::Experimental::RCollectionNTuple::Fill
void Fill()
Definition: RNTuple.hxx:274
ROOT::Experimental::RNTupleReader::RIterator::~RIterator
~RIterator()=default
RNTupleOptions.hxx
ROOT
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: EExecutionPolicy.hxx:4
ROOT::Experimental::RNTupleReader::RIterator::difference_type
NTupleSize_t difference_type
Definition: RNTuple.hxx:118
ROOT::Experimental::RNTupleReader
An RNTuple that is used to read data from storage.
Definition: RNTuple.hxx:90
RNTupleView.hxx