Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTuple.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTuple.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTuple
17#define ROOT7_RNTuple
18
19#include <ROOT/RConfig.hxx> // for R__unlikely
21#include <ROOT/RNTupleModel.hxx>
23#include <ROOT/RNTupleUtil.hxx>
24#include <ROOT/RNTupleView.hxx>
25#include <ROOT/RPageStorage.hxx>
26#include <ROOT/RStringView.hxx>
27
28#include <iterator>
29#include <memory>
30#include <sstream>
31#include <utility>
32
33class TFile;
34
35namespace ROOT {
36namespace Experimental {
37
38class REntry;
39class RNTupleModel;
40
41namespace Detail {
42class RPageSink;
43class RPageSource;
44}
45
46
47/**
48 * Listing of the different options that can be printed by RNTupleReader::GetInfo()
49 */
50enum class ENTupleInfo {
51 kSummary, // The ntuple name, description, number of entries
52 kStorageDetails, // size on storage, page sizes, compression factor, etc.
53 kMetrics, // internals performance counters, requires that EnableMetrics() was called
54};
55
56/**
57 * Listing of the different entry output formats of RNTupleReader::Show()
58 */
60 kCurrentModelJSON, // prints a single entry/row with the current active model in JSON format.
61 kCompleteJSON, // prints a single entry/row with all the fields in JSON format.
62};
63
64
65#ifdef R__USE_IMT
66class TTaskGroup;
68private:
69 std::unique_ptr<TTaskGroup> fTaskGroup;
70public:
71 virtual ~RNTupleImtTaskScheduler() = default;
72 void Reset() final;
73 void AddTask(const std::function<void(void)> &taskFunc) final;
74 void Wait() final;
75};
76#endif
77
78// clang-format off
79/**
80\class ROOT::Experimental::RNTupleReader
81\ingroup NTuple
82\brief An RNTuple that is used to read data from storage
83
84An input ntuple provides data from storage as C++ objects. The ntuple model can be created from the data on storage
85or it can be imposed by the user. The latter case allows users to read into a specialized ntuple model that covers
86only a subset of the fields in the ntuple. The ntuple model is used when reading complete entries.
87Individual fields can be read as well by instantiating a tree view.
88*/
89// clang-format on
91private:
92 /// Set as the page source's scheduler for parallel page decompression if IMT is on
93 /// Needs to be destructed after the pages source is destructed (an thus be declared before)
94 std::unique_ptr<Detail::RPageStorage::RTaskScheduler> fUnzipTasks;
95
96 std::unique_ptr<Detail::RPageSource> fSource;
97 /// Needs to be destructed before fSource
98 std::unique_ptr<RNTupleModel> fModel;
99 /// We use a dedicated on-demand reader for Show() and Scan(). Printing data uses all the fields
100 /// from the full model even if the analysis code uses only a subset of fields. The display reader
101 /// is a clone of the original reader.
102 std::unique_ptr<RNTupleReader> fDisplayReader;
104
105 void ConnectModel(const RNTupleModel &model);
106 RNTupleReader *GetDisplayReader();
107 void InitPageSource();
108
109public:
110 // Browse through the entries
111 class RIterator {
112 private:
114 public:
116 using iterator_category = std::forward_iterator_tag;
121
122 RIterator() = default;
123 explicit RIterator(NTupleSize_t index) : fIndex(index) {}
124 ~RIterator() = default;
125
126 iterator operator++(int) /* postfix */ { auto r = *this; fIndex++; return r; }
127 iterator& operator++() /* prefix */ { ++fIndex; return *this; }
128 reference operator* () { return fIndex; }
129 pointer operator->() { return &fIndex; }
130 bool operator==(const iterator& rh) const { return fIndex == rh.fIndex; }
131 bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
132 };
133
134
135 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model,
136 std::string_view ntupleName,
137 std::string_view storage,
138 const RNTupleReadOptions &options = RNTupleReadOptions());
139 static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName,
140 std::string_view storage,
141 const RNTupleReadOptions &options = RNTupleReadOptions());
142
143 /// The user imposes an ntuple model, which must be compatible with the model found in the data on storage
144 RNTupleReader(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Detail::RPageSource> source);
145 /// The model is generated from the ntuple metadata on storage
146 explicit RNTupleReader(std::unique_ptr<Detail::RPageSource> source);
147 std::unique_ptr<RNTupleReader> Clone() { return std::make_unique<RNTupleReader>(fSource->Clone()); }
149
150 RNTupleModel *GetModel();
151 NTupleSize_t GetNEntries() const { return fSource->GetNEntries(); }
152 const RNTupleDescriptor &GetDescriptor() const { return fSource->GetDescriptor(); }
153
154 /// Prints a detailed summary of the ntuple, including a list of fields.
155 void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout);
156
157 /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default,
158 /// prints the output in JSON format.
159 /// Uses the visitor pattern to traverse through each field of the given entry.
160 void Show(NTupleSize_t index, const ENTupleShowFormat format = ENTupleShowFormat::kCurrentModelJSON,
161 std::ostream &output = std::cout);
162
163 /// Analogous to Fill(), fills the default entry of the model. Returns false at the end of the ntuple.
164 /// On I/O errors, raises an exception.
165 void LoadEntry(NTupleSize_t index) { LoadEntry(index, *fModel->GetDefaultEntry()); }
166 /// Fills a user provided entry after checking that the entry has been instantiated from the ntuple model
167 void LoadEntry(NTupleSize_t index, REntry &entry) {
168 // TODO(jblomer): can be templated depending on the factory method / constructor
169 if (R__unlikely(!fModel)) {
170 fModel = fSource->GetDescriptor().GenerateModel();
171 ConnectModel(*fModel);
172 }
173
174 for (auto& value : entry) {
175 value.GetField()->Read(index, &value);
176 }
177 }
178
180
181 /// Provides access to an individual field that can contain either a scalar value or a collection, e.g.
182 /// GetView<double>("particles.pt") or GetView<std::vector<double>>("particle"). It can as well be the index
183 /// field of a collection itself, like GetView<NTupleSize_t>("particle")
184 template <typename T>
185 RNTupleView<T> GetView(std::string_view fieldName) {
186 auto fieldId = fSource->GetDescriptor().FindFieldId(fieldName);
187 return RNTupleView<T>(fieldId, fSource.get());
188 }
189 RNTupleViewCollection GetViewCollection(std::string_view fieldName) {
190 auto fieldId = fSource->GetDescriptor().FindFieldId(fieldName);
191 return RNTupleViewCollection(fieldId, fSource.get());
192 }
193
194 RIterator begin() { return RIterator(0); }
195 RIterator end() { return RIterator(GetNEntries()); }
196
197 void EnableMetrics() { fMetrics.Enable(); }
198 const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
199};
200
201// clang-format off
202/**
203\class ROOT::Experimental::RNTupleWriter
204\ingroup NTuple
205\brief An RNTuple that gets filled with entries (data) and writes them to storage
206
207An output ntuple can be filled with entries. The caller has to make sure that the data that gets filled into an ntuple
208is not modified for the time of the Fill() call. The fill call serializes the C++ object into the column format and
209writes data into the corresponding column page buffers. Writing of the buffers to storage is deferred and can be
210triggered by Flush() or by destructing the ntuple. On I/O errors, an exception is thrown.
211*/
212// clang-format on
214private:
215 static constexpr NTupleSize_t kDefaultClusterSizeEntries = 64000;
216 std::unique_ptr<Detail::RPageSink> fSink;
217 /// Needs to be destructed before fSink
218 std::unique_ptr<RNTupleModel> fModel;
222
223public:
224 static std::unique_ptr<RNTupleWriter> Recreate(std::unique_ptr<RNTupleModel> model,
225 std::string_view ntupleName,
226 std::string_view storage,
227 const RNTupleWriteOptions &options = RNTupleWriteOptions());
228 static std::unique_ptr<RNTupleWriter> Append(std::unique_ptr<RNTupleModel> model,
229 std::string_view ntupleName,
230 TFile &file,
231 const RNTupleWriteOptions &options = RNTupleWriteOptions());
232 RNTupleWriter(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Detail::RPageSink> sink);
233 RNTupleWriter(const RNTupleWriter&) = delete;
236
237 /// The simplest user interface if the default entry that comes with the ntuple model is used
238 void Fill() { Fill(*fModel->GetDefaultEntry()); }
239 /// Multiple entries can have been instantiated from the tnuple model. This method will perform
240 /// a light check whether the entry comes from the ntuple's own model
241 void Fill(REntry &entry) {
242 for (auto& value : entry) {
243 value.GetField()->Append(value);
244 }
245 fNEntries++;
246 if ((fNEntries % fClusterSizeEntries) == 0)
247 CommitCluster();
248 }
249 /// Ensure that the data from the so far seen Fill calls has been written to storage
250 void CommitCluster();
251};
252
253// clang-format off
254/**
255\class ROOT::Experimental::RCollectionNTuple
256\ingroup NTuple
257\brief A virtual ntuple for collections that can be used to some extent like a real ntuple
258*
259* This class is between a field and a ntuple. It carries the offset column for the collection and the default entry
260* taken from the collection model. It does not, however, have a tree model because the collection model has been merged
261* into the larger ntuple model.
262*/
263// clang-format on
265private:
267 std::unique_ptr<REntry> fDefaultEntry;
268public:
269 explicit RCollectionNTuple(std::unique_ptr<REntry> defaultEntry);
273
274 void Fill() { Fill(fDefaultEntry.get()); }
275 void Fill(REntry *entry) {
276 for (auto& treeValue : *entry) {
277 treeValue.GetField()->Append(treeValue);
278 }
279 fOffset++;
280 }
281
282 ClusterSize_t* GetOffsetPtr() { return &fOffset; }
283};
284
285} // namespace Experimental
286} // namespace ROOT
287
288#endif
ROOT::R::TRInterface & r
Definition Object.C:4
#define R__unlikely(expr)
Definition RConfig.hxx:608
TTime operator*(const TTime &t1, const TTime &t2)
Definition TTime.h:85
A collection of Counter objects with a name, a unit, and a description.
The interface of a task scheduler to schedule page (de)compression tasks.
A virtual ntuple for collections that can be used to some extent like a real ntuple.
Definition RNTuple.hxx:264
RCollectionNTuple(const RCollectionNTuple &)=delete
std::unique_ptr< REntry > fDefaultEntry
Definition RNTuple.hxx:267
RCollectionNTuple & operator=(const RCollectionNTuple &)=delete
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:42
The on-storage meta-data of an ntuple.
Used to loop over indexes (entries or collections) between start and end.
void Reset() final
Start a new set of tasks.
Definition RNTuple.cxx:41
std::unique_ptr< TTaskGroup > fTaskGroup
Definition RNTuple.hxx:69
void AddTask(const std::function< void(void)> &taskFunc) final
Take a callable that represents a task.
Definition RNTuple.cxx:47
void Wait() final
Blocks until all scheduled tasks finished.
Definition RNTuple.cxx:53
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
bool operator!=(const iterator &rh) const
Definition RNTuple.hxx:131
bool operator==(const iterator &rh) const
Definition RNTuple.hxx:130
std::forward_iterator_tag iterator_category
Definition RNTuple.hxx:116
An RNTuple that is used to read data from storage.
Definition RNTuple.hxx:90
std::unique_ptr< RNTupleReader > Clone()
Definition RNTuple.hxx:147
Detail::RNTupleMetrics fMetrics
Definition RNTuple.hxx:103
std::unique_ptr< Detail::RPageStorage::RTaskScheduler > fUnzipTasks
Set as the page source's scheduler for parallel page decompression if IMT is on Needs to be destructe...
Definition RNTuple.hxx:94
std::unique_ptr< RNTupleReader > fDisplayReader
We use a dedicated on-demand reader for Show() and Scan().
Definition RNTuple.hxx:102
const Detail::RNTupleMetrics & GetMetrics() const
Definition RNTuple.hxx:198
const RNTupleDescriptor & GetDescriptor() const
Definition RNTuple.hxx:152
RNTupleView< T > GetView(std::string_view fieldName)
Provides access to an individual field that can contain either a scalar value or a collection,...
Definition RNTuple.hxx:185
NTupleSize_t GetNEntries() const
Definition RNTuple.hxx:151
std::unique_ptr< Detail::RPageSource > fSource
Definition RNTuple.hxx:96
RNTupleGlobalRange GetEntryRange()
Definition RNTuple.hxx:179
RNTupleViewCollection GetViewCollection(std::string_view fieldName)
Definition RNTuple.hxx:189
void LoadEntry(NTupleSize_t index)
Analogous to Fill(), fills the default entry of the model.
Definition RNTuple.hxx:165
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSource.
Definition RNTuple.hxx:98
void LoadEntry(NTupleSize_t index, REntry &entry)
Fills a user provided entry after checking that the entry has been instantiated from the ntuple model...
Definition RNTuple.hxx:167
A view for a collection, that can itself generate new ntuple views for its nested fields.
An RNTupleView provides read-only access to a single field of the ntuple.
Common user-tunable settings for storing ntuples.
An RNTuple that gets filled with entries (data) and writes them to storage.
Definition RNTuple.hxx:213
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSink.
Definition RNTuple.hxx:218
RNTupleWriter(const RNTupleWriter &)=delete
RNTupleWriter & operator=(const RNTupleWriter &)=delete
std::unique_ptr< Detail::RPageSink > fSink
Definition RNTuple.hxx:216
void Fill()
The simplest user interface if the default entry that comes with the ntuple model is used.
Definition RNTuple.hxx:238
void Fill(REntry &entry)
Multiple entries can have been instantiated from the tnuple model.
Definition RNTuple.hxx:241
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition TFile.h:54
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
Definition RNTuple.hxx:50
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr NTupleSize_t kInvalidNTupleIndex
ENTupleShowFormat
Listing of the different entry output formats of RNTupleReader::Show()
Definition RNTuple.hxx:59
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition file.py:1
static const char * what
Definition stlLoader.cc:6
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
static void output(int code)
Definition gifencode.c:226