Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleModel.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleModel.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleModel
17#define ROOT7_RNTupleModel
18
19#include <ROOT/REntry.hxx>
20#include <ROOT/RError.hxx>
21#include <ROOT/RField.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <string_view>
24
25#include <cstdint>
26#include <functional>
27#include <memory>
28#include <string>
29#include <unordered_map>
30#include <unordered_set>
31#include <utility>
32
33namespace ROOT {
34namespace Experimental {
35
36class RNTupleCollectionWriter;
37class RNTupleModel;
38class RNTupleWriter;
39
40namespace Internal {
41class RPageSinkBuf;
42
43// clang-format off
44/**
45\class ROOT::Experimental::Internal::RNTupleModelChangeset
46\ingroup NTuple
47\brief The incremental changes to a `RNTupleModel`
48
49Represents a set of alterations to a `RNTupleModel` that happened after the model is used to initialize a `RPageSink`
50instance. This object can be used to communicate metadata updates to a `RPageSink`.
51You will not normally use this directly; see `RNTupleModel::RUpdater` instead.
52*/
53// clang-format on
56 /// Points to the fields in fModel that were added as part of an updater transaction
57 std::vector<RFieldBase *> fAddedFields;
58 /// Points to the projected fields in fModel that were added as part of an updater transaction
59 std::vector<RFieldBase *> fAddedProjectedFields;
60
62 bool IsEmpty() const { return fAddedFields.empty() && fAddedProjectedFields.empty(); }
63};
64
65/// Merge two RNTuple models. The resulting model will take the description from the left-hand model.
66/// When `rightFieldPrefix` is specified, the right-hand model will be stored in an untyped sub-collection, identified
67/// by the prefix. This way, a field from the right-hand model is represented as `<prefix>.<fieldname>`.
68/// When no prefix is specified, the fields from the right-hand model get added directly to the resulting model.
69///
70/// Note that both models must be frozen before merging.
71std::unique_ptr<RNTupleModel>
72MergeModels(const RNTupleModel &left, const RNTupleModel &right, std::string_view rightFieldPrefix = "");
73} // namespace Internal
74
75// clang-format off
76/**
77\class ROOT::Experimental::RNTupleModel
78\ingroup NTuple
79\brief The RNTupleModel encapulates the schema of an ntuple.
80
81The ntuple model comprises a collection of hierarchically organized fields. From a model, "entries"
82can be extracted. For convenience, the model provides a default entry unless it is created as a "bare model".
83Models have a unique model identifier that faciliates checking whether entries are compatible with it
84(i.e.: have been extracted from that model).
85
86A model is subject to a state transition during its lifetime: it starts in a building state, in which fields can be
87added and modified. Once the schema is finalized, the model gets frozen. Only frozen models can create entries.
88*/
89// clang-format on
91 friend std::unique_ptr<RNTupleModel>
92 Internal::MergeModels(const RNTupleModel &left, const RNTupleModel &right, std::string_view rightFieldPrefix);
93
94public:
95 /// A wrapper over a field name and an optional description; used in `AddField()` and `RUpdater::AddField()`
98 NameWithDescription_t(const std::string &name) : fName(name) {}
99 NameWithDescription_t(std::string_view name) : fName(name) {}
100 NameWithDescription_t(std::string_view name, std::string_view descr) : fName(name), fDescription(descr) {}
101
102 std::string_view fName;
103 std::string_view fDescription = "";
104 };
105
106 /// Projected fields are fields whose columns are reused from existing fields. Projected fields are not attached
107 /// to the models zero field. Only the real source fields are written to, projected fields are stored as meta-data
108 /// (header) information only. Only top-level projected fields are supported because otherwise the layout of types
109 /// could be altered in unexpected ways.
110 /// All projected fields and the source fields used to back them are kept in this class.
112 public:
113 /// The map keys are the projected target fields, the map values are the backing source fields
114 /// Note that sub fields are treated individually and indepently of their parent field
115 using FieldMap_t = std::unordered_map<const RFieldBase *, const RFieldBase *>;
116
117 private:
118 explicit RProjectedFields(std::unique_ptr<RFieldZero> fieldZero) : fFieldZero(std::move(fieldZero)) {}
119 /// The projected fields are attached to this zero field
120 std::unique_ptr<RFieldZero> fFieldZero;
121 /// Maps the source fields from fModel to the target projected fields attached to fFieldZero
123 /// The model this set of projected fields belongs to
125
126 /// Asserts that the passed field is a valid target of the source field provided in the field map.
127 /// Checks the field without looking into sub fields.
129
130 public:
131 explicit RProjectedFields(const RNTupleModel *model) : fFieldZero(std::make_unique<RFieldZero>()), fModel(model)
132 {
133 }
138 ~RProjectedFields() = default;
139
140 /// The new model needs to be a clone of fModel
141 std::unique_ptr<RProjectedFields> Clone(const RNTupleModel *newModel) const;
142
143 RFieldZero *GetFieldZero() const { return fFieldZero.get(); }
144 const RFieldBase *GetSourceField(const RFieldBase *target) const;
145 /// Adds a new projected field. The field map needs to provide valid source fields of fModel for 'field'
146 /// and each of its sub fields.
147 RResult<void> Add(std::unique_ptr<RFieldBase> field, const FieldMap_t &fieldMap);
148 bool IsEmpty() const { return fFieldZero->begin() == fFieldZero->end(); }
149 };
150
151 /// A model is usually immutable after passing it to an `RNTupleWriter`. However, for the rare
152 /// cases that require changing the model after the fact, `RUpdater` provides limited support for
153 /// incremental updates, e.g. addition of new fields.
154 ///
155 /// See `RNTupleWriter::CreateModelUpdater()` for an example.
156 class RUpdater {
157 private:
160 std::uint64_t fNewModelId = 0; ///< The model ID after committing
161
162 public:
163 explicit RUpdater(RNTupleWriter &writer);
165 /// Begin a new set of alterations to the underlying model. As a side effect, all `REntry` instances related to
166 /// the model are invalidated.
167 void BeginUpdate();
168 /// Commit changes since the last call to `BeginUpdate()`. All the invalidated `REntry`s remain invalid.
169 /// `CreateEntry()` or `CreateBareEntry()` can be used to create an `REntry` that matching the new model.
170 /// Upon completion, `BeginUpdate()` can be called again to begin a new set of changes.
171 void CommitUpdate();
172
173 template <typename T, typename... ArgsT>
174 std::shared_ptr<T> MakeField(const NameWithDescription_t &fieldNameDesc, ArgsT &&...args)
175 {
176 auto objPtr = fOpenChangeset.fModel.MakeField<T>(fieldNameDesc, std::forward<ArgsT>(args)...);
177 auto fieldZero = fOpenChangeset.fModel.fFieldZero.get();
178 auto it = std::find_if(fieldZero->begin(), fieldZero->end(),
179 [&](const auto &f) { return f.GetFieldName() == fieldNameDesc.fName; });
180 R__ASSERT(it != fieldZero->end());
181 fOpenChangeset.fAddedFields.emplace_back(&(*it));
182 return objPtr;
183 }
184
185 void AddField(std::unique_ptr<RFieldBase> field);
186
188 AddProjectedField(std::unique_ptr<RFieldBase> field, std::function<std::string(const std::string &)> mapping);
189 };
190
191private:
192 /// Hierarchy of fields consisting of simple types and collections (sub trees)
193 std::unique_ptr<RFieldZero> fFieldZero;
194 /// Contains field values corresponding to the created top-level fields
195 std::unique_ptr<REntry> fDefaultEntry;
196 /// Keeps track of which field names are taken, including projected field names.
197 std::unordered_set<std::string> fFieldNames;
198 /// Free text set by the user
199 std::string fDescription;
200 /// The set of projected top-level fields
201 std::unique_ptr<RProjectedFields> fProjectedFields;
202 /// Every model has a unique ID to distinguish it from other models. Entries are linked to models via the ID.
203 /// Cloned models get a new model ID.
204 std::uint64_t fModelId = 0;
205 /// Changed by Freeze() / Unfreeze() and by the RUpdater.
206 bool fIsFrozen = false;
207
208 /// Checks that user-provided field names are valid in the context
209 /// of this NTuple model. Throws an RException for invalid names.
210 void EnsureValidFieldName(std::string_view fieldName);
211
212 /// Throws an RException if fFrozen is true
213 void EnsureNotFrozen() const;
214
215 /// Throws an RException if fDefaultEntry is nullptr
216 void EnsureNotBare() const;
217
218 /// The field name can be a top-level field or a nested field. Returns nullptr if the field is not in the model.
219 RFieldBase *FindField(std::string_view fieldName) const;
220
221 RNTupleModel(std::unique_ptr<RFieldZero> fieldZero);
222
223public:
224 RNTupleModel(const RNTupleModel&) = delete;
226 ~RNTupleModel() = default;
227
228 std::unique_ptr<RNTupleModel> Clone() const;
229 static std::unique_ptr<RNTupleModel> Create();
230 static std::unique_ptr<RNTupleModel> Create(std::unique_ptr<RFieldZero> fieldZero);
231 /// A bare model has no default entry
232 static std::unique_ptr<RNTupleModel> CreateBare();
233 static std::unique_ptr<RNTupleModel> CreateBare(std::unique_ptr<RFieldZero> fieldZero);
234
235 /// Creates a new field given a `name` or `{name, description}` pair and a
236 /// corresponding value that is managed by a shared pointer.
237 ///
238 /// **Example: create some fields and fill an %RNTuple**
239 /// ~~~ {.cpp}
240 /// #include <ROOT/RNTupleModel.hxx>
241 /// #include <ROOT/RNTupleWriter.hxx>
242 /// using ROOT::Experimental::RNTupleModel;
243 /// using ROOT::Experimental::RNTupleWriter;
244 ///
245 /// #include <vector>
246 ///
247 /// auto model = RNTupleModel::Create();
248 /// auto pt = model->MakeField<float>("pt");
249 /// auto vec = model->MakeField<std::vector<int>>("vec");
250 ///
251 /// // The RNTuple is written to disk when the RNTupleWriter goes out of scope
252 /// {
253 /// auto writer = RNTupleWriter::Recreate(std::move(model), "myNTuple", "myFile.root");
254 /// for (int i = 0; i < 100; i++) {
255 /// *pt = static_cast<float>(i);
256 /// *vec = {i, i+1, i+2};
257 /// writer->Fill();
258 /// }
259 /// }
260 /// ~~~
261 ///
262 /// **Example: create a field with an initial value**
263 /// ~~~ {.cpp}
264 /// #include <ROOT/RNTupleModel.hxx>
265 /// using ROOT::Experimental::RNTupleModel;
266 ///
267 /// auto model = RNTupleModel::Create();
268 /// // pt's initial value is 42.0
269 /// auto pt = model->MakeField<float>("pt", 42.0);
270 /// ~~~
271 /// **Example: create a field with a description**
272 /// ~~~ {.cpp}
273 /// #include <ROOT/RNTupleModel.hxx>
274 /// using ROOT::Experimental::RNTupleModel;
275 ///
276 /// auto model = RNTupleModel::Create();
277 /// auto hadronFlavour = model->MakeField<float>({
278 /// "hadronFlavour", "flavour from hadron ghost clustering"
279 /// });
280 /// ~~~
281 template <typename T, typename... ArgsT>
282 std::shared_ptr<T> MakeField(const NameWithDescription_t &fieldNameDesc, ArgsT &&...args)
283 {
285 EnsureValidFieldName(fieldNameDesc.fName);
286 auto field = std::make_unique<RField<T>>(fieldNameDesc.fName);
287 field->SetDescription(fieldNameDesc.fDescription);
288 std::shared_ptr<T> ptr;
289 if (fDefaultEntry)
290 ptr = fDefaultEntry->AddValue<T>(*field, std::forward<ArgsT>(args)...);
291 fFieldNames.insert(field->GetFieldName());
292 fFieldZero->Attach(std::move(field));
293 return ptr;
294 }
295
296 /// Adds a field whose type is not known at compile time. Thus there is no shared pointer returned.
297 ///
298 /// Throws an exception if the field is null.
299 void AddField(std::unique_ptr<RFieldBase> field);
300
301 /// Adds a top-level field based on existing fields. The mapping function is called with the qualified field names
302 /// of the provided field and the subfields. It should return the qualified field names used as a mapping source.
303 /// Projected fields can only be used for models used to write data.
305 AddProjectedField(std::unique_ptr<RFieldBase> field, std::function<std::string(const std::string &)> mapping);
307
308 void Freeze();
309 void Unfreeze();
310 bool IsFrozen() const { return fIsFrozen; }
311 std::uint64_t GetModelId() const { return fModelId; }
312
313 /// Ingests a model for a sub collection and attaches it to the current model
314 ///
315 /// Throws an exception if collectionModel is null.
316 std::shared_ptr<RNTupleCollectionWriter>
317 MakeCollection(std::string_view fieldName, std::unique_ptr<RNTupleModel> collectionModel);
318
319 std::unique_ptr<REntry> CreateEntry() const;
320 /// In a bare entry, all values point to nullptr. The resulting entry shall use BindValue() in order
321 /// set memory addresses to be serialized / deserialized
322 std::unique_ptr<REntry> CreateBareEntry() const;
323 /// Creates a token to be used in REntry methods to address a top-level field
324 REntry::RFieldToken GetToken(std::string_view fieldName) const;
325 /// Calls the given field's CreateBulk() method. Throws an exception if no field with the given name exists.
326 RFieldBase::RBulk CreateBulk(std::string_view fieldName) const;
327
329 const REntry &GetDefaultEntry() const;
330
331 /// Non-const access to the root field is used to commit clusters during writing
332 /// and to set the on-disk field IDs when connecting a model to a page source or sink.
334 const RFieldZero &GetFieldZero() const { return *fFieldZero; }
335 const RFieldBase &GetField(std::string_view fieldName) const;
336
337 std::string GetDescription() const { return fDescription; }
338 void SetDescription(std::string_view description);
339};
340
341} // namespace Experimental
342} // namespace ROOT
343
344#endif
#define f(i)
Definition RSha256.hxx:104
#define R__ASSERT(e)
Definition TError.h:118
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t target
char name[80]
Definition TGX11.cxx:110
The field token identifies a top-level field in this entry.
Definition REntry.hxx:55
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:46
Similar to RValue but manages an array of consecutive values.
Definition RField.hxx:232
A field translates read and write calls from/to underlying columns to/from tree values.
Definition RField.hxx:96
The container field for an ntuple model, which itself has no physical representation.
Definition RField.hxx:719
Projected fields are fields whose columns are reused from existing fields.
FieldMap_t fFieldMap
Maps the source fields from fModel to the target projected fields attached to fFieldZero.
RProjectedFields & operator=(RProjectedFields &&)=default
RProjectedFields(std::unique_ptr< RFieldZero > fieldZero)
RResult< void > EnsureValidMapping(const RFieldBase *target, const FieldMap_t &fieldMap)
Asserts that the passed field is a valid target of the source field provided in the field map.
const RNTupleModel * fModel
The model this set of projected fields belongs to.
std::unordered_map< const RFieldBase *, const RFieldBase * > FieldMap_t
The map keys are the projected target fields, the map values are the backing source fields Note that ...
std::unique_ptr< RFieldZero > fFieldZero
The projected fields are attached to this zero field.
RProjectedFields(const RProjectedFields &)=delete
const RFieldBase * GetSourceField(const RFieldBase *target) const
RProjectedFields & operator=(const RProjectedFields &)=delete
RResult< void > Add(std::unique_ptr< RFieldBase > field, const FieldMap_t &fieldMap)
Adds a new projected field.
A model is usually immutable after passing it to an RNTupleWriter.
Internal::RNTupleModelChangeset fOpenChangeset
void CommitUpdate()
Commit changes since the last call to BeginUpdate().
void BeginUpdate()
Begin a new set of alterations to the underlying model.
std::uint64_t fNewModelId
The model ID after committing.
std::shared_ptr< T > MakeField(const NameWithDescription_t &fieldNameDesc, ArgsT &&...args)
RResult< void > AddProjectedField(std::unique_ptr< RFieldBase > field, std::function< std::string(const std::string &)> mapping)
void AddField(std::unique_ptr< RFieldBase > field)
The RNTupleModel encapulates the schema of an ntuple.
std::unordered_set< std::string > fFieldNames
Keeps track of which field names are taken, including projected field names.
std::string fDescription
Free text set by the user.
void EnsureValidFieldName(std::string_view fieldName)
Checks that user-provided field names are valid in the context of this NTuple model.
std::uint64_t fModelId
Every model has a unique ID to distinguish it from other models.
std::uint64_t GetModelId() const
RNTupleModel(const RNTupleModel &)=delete
REntry::RFieldToken GetToken(std::string_view fieldName) const
Creates a token to be used in REntry methods to address a top-level field.
RResult< void > AddProjectedField(std::unique_ptr< RFieldBase > field, std::function< std::string(const std::string &)> mapping)
Adds a top-level field based on existing fields.
void EnsureNotBare() const
Throws an RException if fDefaultEntry is nullptr.
std::unique_ptr< RNTupleModel > Clone() const
void EnsureNotFrozen() const
Throws an RException if fFrozen is true.
std::shared_ptr< T > MakeField(const NameWithDescription_t &fieldNameDesc, ArgsT &&...args)
Creates a new field given a name or {name, description} pair and a corresponding value that is manage...
std::shared_ptr< RNTupleCollectionWriter > MakeCollection(std::string_view fieldName, std::unique_ptr< RNTupleModel > collectionModel)
Ingests a model for a sub collection and attaches it to the current model.
const RFieldBase & GetField(std::string_view fieldName) const
std::unique_ptr< REntry > CreateBareEntry() const
In a bare entry, all values point to nullptr.
std::unique_ptr< REntry > CreateEntry() const
RFieldBase::RBulk CreateBulk(std::string_view fieldName) const
Calls the given field's CreateBulk() method. Throws an exception if no field with the given name exis...
static std::unique_ptr< RNTupleModel > Create()
std::unique_ptr< RProjectedFields > fProjectedFields
The set of projected top-level fields.
const RFieldZero & GetFieldZero() const
void SetDescription(std::string_view description)
std::unique_ptr< REntry > fDefaultEntry
Contains field values corresponding to the created top-level fields.
RFieldBase * FindField(std::string_view fieldName) const
The field name can be a top-level field or a nested field. Returns nullptr if the field is not in the...
static std::unique_ptr< RNTupleModel > CreateBare()
A bare model has no default entry.
const RProjectedFields & GetProjectedFields() const
void AddField(std::unique_ptr< RFieldBase > field)
Adds a field whose type is not known at compile time.
RFieldZero & GetFieldZero()
Non-const access to the root field is used to commit clusters during writing and to set the on-disk f...
RNTupleModel & operator=(const RNTupleModel &)=delete
bool fIsFrozen
Changed by Freeze() / Unfreeze() and by the RUpdater.
std::unique_ptr< RFieldZero > fFieldZero
Hierarchy of fields consisting of simple types and collections (sub trees)
An RNTuple that gets filled with entries (data) and writes them to storage.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:194
std::unique_ptr< RNTupleModel > MergeModels(const RNTupleModel &left, const RNTupleModel &right, std::string_view rightFieldPrefix="")
Merge two RNTuple models.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The incremental changes to a RNTupleModel
std::vector< RFieldBase * > fAddedProjectedFields
Points to the projected fields in fModel that were added as part of an updater transaction.
std::vector< RFieldBase * > fAddedFields
Points to the fields in fModel that were added as part of an updater transaction.
A wrapper over a field name and an optional description; used in AddField() and RUpdater::AddField()
NameWithDescription_t(std::string_view name, std::string_view descr)