Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleProcessor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleProcessor.hxx
2/// \ingroup NTuple
3/// \author Florine de Geus <florine.de.geus@cern.ch>
4/// \date 2024-03-26
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT_RNTupleProcessor
17#define ROOT_RNTupleProcessor
18
19#include <ROOT/REntry.hxx>
20#include <ROOT/RError.hxx>
23#include <ROOT/RNTupleModel.hxx>
24#include <ROOT/RNTupleTypes.hxx>
26#include <ROOT/RPageStorage.hxx>
27
28#include <memory>
29#include <string>
30#include <string_view>
31#include <vector>
32
33namespace ROOT {
34namespace Experimental {
35
36namespace Internal {
37struct RNTupleProcessorEntryLoader;
38} // namespace Internal
39
40// clang-format off
41/**
42\class ROOT::Experimental::RNTupleOpenSpec
43\ingroup NTuple
44\brief Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.
45
46An RNTupleOpenSpec can be created by providing either a string with a path to the ROOT file or a pointer to the
47TDirectory (or any of its subclasses) that contains the RNTuple.
48
49Note that the RNTupleOpenSpec is *write-only*, to prevent usability issues with Python.
50*/
51// clang-format on
53 friend class RNTupleProcessor;
56
57private:
58 std::string fNTupleName;
59 std::variant<std::string, TDirectory *> fStorage;
60
61public:
62 RNTupleOpenSpec(std::string_view n, TDirectory *s) : fNTupleName(n), fStorage(s) {}
63 RNTupleOpenSpec(std::string_view n, const std::string &s) : fNTupleName(n), fStorage(s) {}
64
65 std::unique_ptr<ROOT::Internal::RPageSource> CreatePageSource() const;
66};
67
68// clang-format off
69/**
70\class ROOT::Experimental::RNTupleProcessorOptionalPtr<T>
71\ingroup NTuple
72\brief The RNTupleProcessorOptionalPtr provides access to values from fields present in an RNTupleProcessor, with support
73and checks for missing values.
74*/
75// clang-format on
76template <typename T>
78 friend class RNTupleProcessor;
79
80private:
83
89
90public:
91 /////////////////////////////////////////////////////////////////////////////
92 /// \brief Check if the pointer currently holds a valid value.
93 bool HasValue() const { return fProcessorEntry->IsValidField(fFieldIndex); }
94
95 /////////////////////////////////////////////////////////////////////////////
96 /// \brief Get a shared pointer to the field value managed by the processor's entry.
97 ///
98 /// \return A `std::shared_ptr<T>` if the field is valid in the current entry, or a `nullptr` otherwise.
99 std::shared_ptr<T> GetPtr() const
100 {
101 if (fProcessorEntry->IsValidField(fFieldIndex)) {
102 const auto &value = fProcessorEntry->GetValue(fFieldIndex);
103 return value.template GetPtr<T>();
104 }
105
106 return nullptr;
107 }
108
109 /////////////////////////////////////////////////////////////////////////////
110 /// \brief Get a non-owning pointer to the field value managed by the processor's entry.
111 ///
112 /// \return A `T*` if the field is valid in the current entry, or a `nullptr` otherwise.
113 T *GetRawPtr() const { return GetPtr().get(); }
114
115 /////////////////////////////////////////////////////////////////////////////
116 /// \brief Bind the value to `valuePtr`.
117 ///
118 /// \param[in] valuePtr Pointer to bind the value to.
119 ///
120 /// \warning Use this function with care! Values may not always be valid for every entry during processing, for
121 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
122 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
123 /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing
124 /// its data through this interface, to ensure that only valid data can be read.
125 void BindRawPtr(T *valuePtr) { fProcessorEntry->BindRawPtr(fFieldIndex, valuePtr); }
126
127 /////////////////////////////////////////////////////////////////////////////
128 /// \brief Get a reference to the field value managed by the processor's entry.
129 ///
130 /// Throws an exception if the field is invalid in the processor's current entry.
131 const T &operator*() const
132 {
133 if (auto ptr = GetPtr())
134 return *ptr;
135 else
136 throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) +
137 "\" because it has no value for the current entry"));
138 }
139
140 /////////////////////////////////////////////////////////////////////////////
141 /// \brief Access the field value managed by the processor's entry.
142 ///
143 /// Throws an exception if the field is invalid in the processor's current entry.
144 const T *operator->() const
145 {
146 if (auto ptr = GetPtr())
147 return ptr.get();
148 else
149 throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) +
150 "\" because it has no value for the current entry"));
151 }
152};
153
154// clang-format off
155/**
156\class ROOT::Experimental::RNTupleProcessorOptionalPtr<void>
157\ingroup NTuple
158\brief Specialization of RNTupleProcessorOptionalPtr<T> for `void`-type pointers.
159*/
160// clang-format on
161template <>
163 friend class RNTupleProcessor;
164
165private:
168
174
175public:
176 /////////////////////////////////////////////////////////////////////////////
177 /// \brief Check if the pointer currently holds a valid value.
178 bool HasValue() const { return fProcessorEntry->IsValidField(fFieldIndex); }
179
180 /////////////////////////////////////////////////////////////////////////////
181 /// \brief Get the pointer to the field value managed by the processor's entry.
182 ///
183 /// \return A `std::shared_ptr<void>` if the field is valid in the current entry, or a `nullptr` otherwise.
184 std::shared_ptr<void> GetPtr() const
185 {
186 if (fProcessorEntry->IsValidField(fFieldIndex)) {
187 const auto &value = fProcessorEntry->GetValue(fFieldIndex);
188 return value.template GetPtr<void>();
189 }
190
191 return nullptr;
192 }
193
194 /////////////////////////////////////////////////////////////////////////////
195 /// \brief Get a non-owning pointer to the field value managed by the processor's entry.
196 ///
197 /// \return A `void*` if the field is valid in the current entry, or a `nullptr` otherwise.
198 void *GetRawPtr() const { return GetPtr().get(); }
199
200 /////////////////////////////////////////////////////////////////////////////
201 /// \brief Bind the value to `valuePtr`.
202 ///
203 /// \param[in] valuePtr Pointer to bind the value to.
204 ///
205 /// \warning Use this function with care! Values may not always be valid for every entry during processing, for
206 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
207 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
208 /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing
209 /// its data through this interface, to ensure that only valid data can be read.
210 void BindRawPtr(void *valuePtr) { fProcessorEntry->BindRawPtr(fFieldIndex, valuePtr); }
211};
212
213// clang-format off
214/**
215\class ROOT::Experimental::RNTupleProcessor
216\ingroup NTuple
217\brief Interface for iterating over entries of vertically ("chained") and/or horizontally ("joined") combined RNTuples.
218
219Example usage (see ntpl012_processor_chain.C and ntpl015_processor_join.C for bigger examples):
220
221~~~{.cpp}
222#include <ROOT/RNTupleProcessor.hxx>
223using ROOT::Experimental::RNTupleProcessor;
224using ROOT::Experimental::RNTupleOpenSpec;
225
226std::vector<RNTupleOpenSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}};
227auto processor = RNTupleProcessor::CreateChain(ntuples);
228
229auto pt = processor->RequestField<float>("pt");
230
231for (const auto idx : *processor) {
232 std::cout << "event = " << idx << ", pt = " << *pt << std::endl;
233}
234~~~
235
236An RNTupleProcessor is created either:
2371. By providing one or more RNTupleOpenSpecs, each of which contains the name and storage location of a single RNTuple;
2382. By providing a previously created RNTupleProcessor.
239
240The RNTupleProcessor provides an iterator which gives access to the index of the current *global* entry of the
241processor, i.e. taking into account previously processed RNTuples.
242
243Because the schemas of each RNTuple that are part of an RNTupleProcessor may not necessarily be identical, or because
244it can occur that entries are only partially complete in a join-based processor, field values may be marked as
245"invalid", at which point their data should not be read. This is handled by the RNTupleProcessorOptionalPtr
246that is returned by RequestField().
247*/
248// clang-format on
254
255protected:
256 std::string fProcessorName;
257 std::shared_ptr<Internal::RNTupleProcessorEntry> fEntry = nullptr;
258 std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fFieldIdxs;
259
260 /// Total number of entries. Only to be used internally by the processor, not meant to be exposed in the public
261 /// interface.
263
264 ROOT::NTupleSize_t fNEntriesProcessed = 0; //< Total number of entries processed so far
265 ROOT::NTupleSize_t fCurrentEntryNumber = 0; //< Current processor entry number
266 std::size_t fCurrentProcessorNumber = 0; //< Number of the currently open inner processor
267
268 /////////////////////////////////////////////////////////////////////////////
269 /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
270 virtual void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry) = 0;
271
272 /////////////////////////////////////////////////////////////////////////////
273 /// \brief Check if the processor already has been initialized.
274 bool IsInitialized() const { return fEntry != nullptr; }
275
276 /////////////////////////////////////////////////////////////////////////////
277 /// \brief Connect fields to the page source of the processor's underlying RNTuple(s).
278 ///
279 /// \param[in] fieldIdxs Indices of the fields to connect.
280 /// \param[in] provenance Provenance of the processor.
281 /// \param[in] updateFields Whether the fields in the entry need to be updated, because the current underlying
282 /// RNTuple source changed.
283 virtual void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
284 const Internal::RNTupleProcessorProvenance &provenance, bool updateFields) = 0;
285
286 /////////////////////////////////////////////////////////////////////////////
287 /// \brief Load the entry identified by the provided entry number.
288 ///
289 /// \param[in] entryNumber Entry number to load
290 ///
291 /// \return `entryNumber` if the entry was successfully loaded, `kInvalidNTupleIndex` otherwise.
293
294 /////////////////////////////////////////////////////////////////////////////
295 /// \brief Get the total number of entries in this processor
297
298 /////////////////////////////////////////////////////////////////////////////
299 /// \brief Check if a field exists on-disk and can be read by the processor.
300 ///
301 /// \param[in] fieldName Name of the field to check.
302 virtual bool CanReadFieldFromDisk(std::string_view fieldName) = 0;
303
304 /////////////////////////////////////////////////////////////////////////////
305 /// \brief Add a field to the entry.
306 ///
307 ///
308 /// \param[in] fieldName Name of the field to add.
309 /// \param[in] typeName Type of the field to add.
310 /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be
311 /// created.
312 /// \param[in] provenance Provenance of the processor.
313 ///
314 /// \return The index of the newly added field in the entry.
315 ///
316 /// In case the field was already present in the entry, the index of the existing field is returned.
318 AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr,
319 const Internal::RNTupleProcessorProvenance &provenance) = 0;
320
321 /////////////////////////////////////////////////////////////////////////////
322 /// \brief Add the entry mappings for this processor to the provided join table.
323 ///
324 /// \param[in] joinTable the join table to map the entries to.
325 /// \param[in] entryOffset In case the entry mapping is added from a chain, the offset of the entry indexes to use
326 /// with respect to the processor's position in the chain.
327 virtual void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) = 0;
328
329 /////////////////////////////////////////////////////////////////////////////
330 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
331 ///
332 /// \param[in,out] output Output stream to print to.
333 virtual void PrintStructureImpl(std::ostream &output) const = 0;
334
335 /////////////////////////////////////////////////////////////////////////////
336 /// \brief Create a new base RNTupleProcessor.
337 ///
338 /// \param[in] processorName Name of the processor. By default, this is the name of the underlying RNTuple for
339 /// RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the primary
340 /// RNTuple for RNTupleJoinProcessor.
341 RNTupleProcessor(std::string_view processorName) : fProcessorName(processorName) {}
342
343public:
348 virtual ~RNTupleProcessor() = default;
349
350 /////////////////////////////////////////////////////////////////////////////
351 /// \brief Get the total number of entries processed so far.
353
354 /////////////////////////////////////////////////////////////////////////////
355 /// \brief Get the entry number that is currently being processed.
357
358 /////////////////////////////////////////////////////////////////////////////
359 /// \brief Get the number of the inner processor currently being read.
360 ///
361 /// This method is only relevant for the RNTupleChainProcessor. For the other processors, 0 is always returned.
363
364 /////////////////////////////////////////////////////////////////////////////
365 /// \brief Get the name of the processor.
366 ///
367 /// Unless this name was explicitly specified during creation of the processor, this is the name of the underlying
368 /// RNTuple for RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the
369 /// primary processor for RNTupleJoinProcessor.
370 const std::string &GetProcessorName() const { return fProcessorName; }
371
372 /////////////////////////////////////////////////////////////////////////////
373 /// \brief Request access to a field for reading during processing.
374 ///
375 /// \tparam T Type of the requested field.
376 ///
377 /// \param[in] fieldName Name of the requested field.
378 /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be
379 /// created.
380 ///
381 /// \return An RNTupleProcessorOptionalPtr, which provides access to the field's value.
382 ///
383 /// \warning Provide a `valuePtr` with care! Values may not always be valid for every entry during processing, for
384 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
385 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
386 /// invalid data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through
387 /// the interface of the returned `RNTupleProcessorOptionalPtr`, to ensure that only valid data can be read.
388 template <typename T>
389 RNTupleProcessorOptionalPtr<T> RequestField(const std::string &fieldName, void *valuePtr = nullptr)
390 {
392 std::string typeName{};
393 if constexpr (!std::is_void_v<T>) {
394 typeName = ROOT::Internal::GetRenormalizedTypeName(typeid(T));
395 }
396 auto fieldIdx = AddFieldToEntry(fieldName, typeName, valuePtr, Internal::RNTupleProcessorProvenance());
397 return RNTupleProcessorOptionalPtr<T>(fEntry.get(), fieldIdx);
398 }
399
400 /////////////////////////////////////////////////////////////////////////////
401 /// \brief Print a graphical representation of the processor composition.
402 ///
403 /// \param[in,out] output Stream to print to (default is stdout).
404 ///
405 /// ### Example:
406 /// The structure of a processor representing a join between a single primary RNTuple and a chain of two auxiliary
407 /// RNTuples will be printed as follows:
408 /// ~~~
409 /// +-----------------------------+ +-----------------------------+
410 /// | ntuple | | ntuple_aux |
411 /// | ntuple.root | | ntuple_aux1.root |
412 /// +-----------------------------+ +-----------------------------+
413 /// +-----------------------------+
414 /// | ntuple_aux |
415 /// | ntuple_aux2.root |
416 /// +-----------------------------+
417 /// ~~~
418 void PrintStructure(std::ostream &output = std::cout) { PrintStructureImpl(output); }
419
420 // clang-format off
421 /**
422 \class ROOT::Experimental::RNTupleProcessor::RIterator
423 \ingroup NTuple
424 \brief Iterator over the entries of an RNTuple, or vertical concatenation thereof.
425 */
426 // clang-format on
427 class RIterator {
428 private:
431
432 public:
433 using iterator_category = std::input_iterator_tag;
436 using difference_type = std::ptrdiff_t;
439
441 : fProcessor(processor), fCurrentEntryNumber(entryNumber)
442 {
443 if (!fProcessor.fEntry) {
444 fCurrentEntryNumber = ROOT::kInvalidNTupleIndex;
445 }
446 // This constructor is called with kInvalidNTupleIndex for RNTupleProcessor::end(). In that case, we already
447 // know there is nothing to load.
449 fProcessor.Connect(fProcessor.fEntry->GetFieldIndices(), Internal::RNTupleProcessorProvenance(),
450 /*updateFields=*/false);
452 }
453 }
454
456 {
458 return *this;
459 }
460
462 {
463 auto obj = *this;
464 ++(*this);
465 return obj;
466 }
467
469
470 friend bool operator!=(const iterator &lh, const iterator &rh)
471 {
473 }
474 friend bool operator==(const iterator &lh, const iterator &rh)
475 {
477 }
478 };
479
480 RIterator begin() { return RIterator(*this, 0); }
482
483 /////////////////////////////////////////////////////////////////////////////
484 /// \brief Create an RNTupleProcessor for a single RNTuple.
485 ///
486 /// \param[in] ntuple The name and storage location of the RNTuple to process.
487 /// \param[in] processorName The name to give to the processor. If empty, the name of the input RNTuple is used.
488 ///
489 /// \return A pointer to the newly created RNTupleProcessor.
490 static std::unique_ptr<RNTupleProcessor> Create(RNTupleOpenSpec ntuple, std::string_view processorName = "");
491
492 /////////////////////////////////////////////////////////////////////////////
493 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples.
494 ///
495 /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process.
496 /// \param[in] processorName The name to give to the processor. If empty, the name of the first RNTuple is used.
497 ///
498 /// \return A pointer to the newly created RNTupleProcessor.
499 static std::unique_ptr<RNTupleProcessor>
500 CreateChain(std::vector<RNTupleOpenSpec> ntuples, std::string_view processorName = "");
501
502 /////////////////////////////////////////////////////////////////////////////
503 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of other RNTupleProcessors.
504 ///
505 /// \param[in] innerProcessors A list with the processors to chain.
506 /// \param[in] processorName The name to give to the processor. If empty, the name of the first inner processor is
507 /// used.
508 ///
509 /// \return A pointer to the newly created RNTupleProcessor.
510 static std::unique_ptr<RNTupleProcessor>
511 CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors, std::string_view processorName = "");
512
513 /////////////////////////////////////////////////////////////////////////////
514 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
515 ///
516 /// \param[in] primaryNTuple The name and location of the primary RNTuple. Its entries are processed in sequential
517 /// order.
518 /// \param[in] auxNTuple The name and location of the RNTuple to join the primary RNTuple with. The order in which
519 /// its entries are processed is determined by the primary RNTuple and doesn't necessarily have to be sequential.
520 /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned.
521 /// The join is made based on the combined join field values, and therefore each field has to be present in each
522 /// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned.
523 /// \param[in] processorName The name to give to the processor. If empty, the name of the primary RNTuple is used.
524 ///
525 /// \return A pointer to the newly created RNTupleProcessor.
526 static std::unique_ptr<RNTupleProcessor> CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple,
527 const std::vector<std::string> &joinFields,
528 std::string_view processorName = "");
529
530 /////////////////////////////////////////////////////////////////////////////
531 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
532 ///
533 /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order.
534 /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are
535 /// processed is determined by the primary processor and doesn't necessarily have to be sequential.
536 /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned.
537 /// The join is made based on the combined join field values, and therefore each field has to be present in each
538 /// specified processors. If an empty list is provided, it is assumed that the specified processors are fully
539 /// aligned.
540 /// \param[in] processorName The name to give to the processor. If empty, the name of the primary processor is used.
541 ///
542 /// \return A pointer to the newly created RNTupleProcessor.
543 static std::unique_ptr<RNTupleProcessor>
544 CreateJoin(std::unique_ptr<RNTupleProcessor> primaryProcessor, std::unique_ptr<RNTupleProcessor> auxProcessor,
545 const std::vector<std::string> &joinFields, std::string_view processorName = "");
546};
547
548// clang-format off
549/**
550\class ROOT::Experimental::RNTupleSingleProcessor
551\ingroup NTuple
552\brief Processor specialization for processing a single RNTuple.
553*/
554// clang-format on
556 friend class RNTupleProcessor;
557
558private:
560 std::unique_ptr<ROOT::Internal::RPageSource> fPageSource;
561
562 /////////////////////////////////////////////////////////////////////////////
563 /// \brief Create a new field and connect it to the processor's page source.
564 ///
565 /// \param[in] qualifiedFieldName Name of the field to add, prefixed with its parent fields, if applicable.
566 /// \param[in] typeName Type of the field to add.
567 ///
568 /// \return The newly created field.
569 /// \throws ROOT::RException In case the requested field cannot be found on disk.
570 std::unique_ptr<ROOT::RFieldBase>
571 CreateAndConnectField(const std::string &qualifiedFieldName, const std::string &typeName);
572
573 /////////////////////////////////////////////////////////////////////////////
574 /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
575 ///
576 /// At this point, the page source for the underlying RNTuple of the processor will be created and opened.
577 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
578
579 /////////////////////////////////////////////////////////////////////////////
580 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
581 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
583 bool updateFields = false) final;
584
585 /////////////////////////////////////////////////////////////////////////////
586 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
587 /// processor).
588 ///
589 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
590 ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final;
591
592 /////////////////////////////////////////////////////////////////////////////
593 /// \brief Get the total number of entries in this processor.
595 {
596 Initialize();
599 return fNEntries;
600 }
601
602 /////////////////////////////////////////////////////////////////////////////
603 /// \brief Check if a field exists on-disk and can be read by the processor.
604 ///
605 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
606 bool CanReadFieldFromDisk(std::string_view fieldName) final;
607
608 /////////////////////////////////////////////////////////////////////////////
609 /// \brief Add a field to the entry.
610 ///
611 /// \sa RNTupleProcessor::AddFieldToEntry()
613 const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr,
615
616 /////////////////////////////////////////////////////////////////////////////
617 /// \brief Add the entry mappings for this processor to the provided join table.
618 ///
619 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
620 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
621
622 /////////////////////////////////////////////////////////////////////////////
623 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
624 ///
625 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
626 void PrintStructureImpl(std::ostream &output) const final;
627
628 /////////////////////////////////////////////////////////////////////////////
629 /// \brief Construct a new RNTupleProcessor for processing a single RNTuple.
630 ///
631 /// \param[in] ntuple The source specification (name and storage location) for the RNTuple to process.
632 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::Create, this is
633 /// the name of the underlying RNTuple.
634 RNTupleSingleProcessor(RNTupleOpenSpec ntuple, std::string_view processorName);
635
636public:
639 RNTupleSingleProcessor &operator=(const RNTupleSingleProcessor &) = delete;
642 {
643 // The entry's fields need to be deleted before fPageSource.
644 if (fEntry)
645 fEntry->Clear();
646 };
647};
648
649// clang-format off
650/**
651\class ROOT::Experimental::RNTupleChainProcessor
652\ingroup NTuple
653\brief Processor specialization for vertically combined (*chained*) RNTupleProcessors.
654*/
655// clang-format on
657 friend class RNTupleProcessor;
658
659private:
660 std::vector<std::unique_ptr<RNTupleProcessor>> fInnerProcessors;
661 std::vector<ROOT::NTupleSize_t> fInnerNEntries;
662
664
665 /////////////////////////////////////////////////////////////////////////////
666 /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
667 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
668
669 /////////////////////////////////////////////////////////////////////////////
670 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
671 ///
672 /// \sa RNTupleProcessor::Connect()
673 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
675 bool updateFields = false) final;
676
677 /////////////////////////////////////////////////////////////////////////////
678 /// \brief Update the entry to reflect any missing fields in the current inner processor.
679 void ConnectInnerProcessor(std::size_t processorNumber);
680
681 /////////////////////////////////////////////////////////////////////////////
682 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
683 /// processor).
684 ///
685 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
686 ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final;
687
688 /////////////////////////////////////////////////////////////////////////////
689 /// \brief Get the total number of entries in this processor.
690 ///
691 /// \note This requires opening all underlying RNTuples being processed in the chain, and could become costly!
693
694 /////////////////////////////////////////////////////////////////////////////
695 /// \brief Check if a field exists on-disk and can be read by the processor.
696 ///
697 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
698 bool CanReadFieldFromDisk(std::string_view fieldName) final
699 {
700 return fInnerProcessors[fCurrentProcessorNumber]->CanReadFieldFromDisk(fieldName);
701 }
702
703 /////////////////////////////////////////////////////////////////////////////
704 /// \brief Add a field to the entry.
705 ///
706 /// \sa RNTupleProcessor::AddFieldToEntry()
708 const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr,
710
711 /////////////////////////////////////////////////////////////////////////////
712 /// \brief Add the entry mappings for this processor to the provided join table.
713 ///
714 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
715 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
716
717 /////////////////////////////////////////////////////////////////////////////
718 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
719 ///
720 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
721 void PrintStructureImpl(std::ostream &output) const final;
722
723 /////////////////////////////////////////////////////////////////////////////
724 /// \brief Construct a new RNTupleChainProcessor.
725 ///
726 /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process.
727 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateChain, this
728 /// is the name of the first inner processor.
729 ///
730 /// RNTuples are processed in the order in which they are specified.
731 RNTupleChainProcessor(std::vector<std::unique_ptr<RNTupleProcessor>> processors, std::string_view processorName);
732
733public:
736 RNTupleChainProcessor &operator=(const RNTupleChainProcessor &) = delete;
738 ~RNTupleChainProcessor() override = default;
739};
740
741// clang-format off
742/**
743\class ROOT::Experimental::RNTupleJoinProcessor
744\ingroup NTuple
745\brief Processor specialization for horizontally combined (*joined*) RNTupleProcessors.
746*/
747// clang-format on
749 friend class RNTupleProcessor;
750
751private:
752 std::unique_ptr<RNTupleProcessor> fPrimaryProcessor;
753 std::unique_ptr<RNTupleProcessor> fAuxiliaryProcessor;
754
755 std::vector<std::string> fJoinFieldNames;
756 std::set<Internal::RNTupleProcessorEntry::FieldIndex_t> fJoinFieldIdxs;
757
758 std::unique_ptr<Internal::RNTupleJoinTable> fJoinTable;
759 bool fJoinTableIsBuilt = false;
760
761 std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fAuxiliaryFieldIdxs;
762
763 /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
764 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
765
766 /////////////////////////////////////////////////////////////////////////////
767 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
768 ///
769 /// \sa RNTupleProcessor::Connect()
770 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
772 bool updateFields = false) final;
773
774 /////////////////////////////////////////////////////////////////////////////
775 /// \brief Load the entry identified by the provided entry number of the primary processor.
776 ///
777 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
778 ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final;
779
780 /////////////////////////////////////////////////////////////////////////////
781 /// \brief Get the total number of entries in this processor.
783
784 /////////////////////////////////////////////////////////////////////////////
785 /// \brief Set the validity for all fields in the auxiliary processor at once.
786 void SetAuxiliaryFieldValidity(bool validity);
787
788 /////////////////////////////////////////////////////////////////////////////
789 /// \brief Check if a field exists on-disk and can be read by the processor.
790 ///
791 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
792 bool CanReadFieldFromDisk(std::string_view fieldName) final
793 {
794 if (!fPrimaryProcessor->CanReadFieldFromDisk(fieldName)) {
795 if (fieldName.find(fAuxiliaryProcessor->GetProcessorName()) == 0)
796 fieldName = fieldName.substr(fAuxiliaryProcessor->GetProcessorName().size() + 1);
797 return fAuxiliaryProcessor->CanReadFieldFromDisk(fieldName);
798 }
799
800 return true;
801 }
802
803 /////////////////////////////////////////////////////////////////////////////
804 /// \brief Add a field to the entry.
805 ///
806 /// \sa RNTupleProcessor::AddFieldToEntry()
808 const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr,
810
811 /////////////////////////////////////////////////////////////////////////////
812 /// \brief Add the entry mappings for this processor to the provided join table.
813 ///
814 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
815 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
816
817 /////////////////////////////////////////////////////////////////////////////
818 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
819 ///
820 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
821 void PrintStructureImpl(std::ostream &output) const final;
822
823 /////////////////////////////////////////////////////////////////////////////
824 /// \brief Construct a new RNTupleJoinProcessor.
825 /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order.
826 /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are
827 /// processed is determined by the primary processor and doesn't necessarily have to be sequential.
828 /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned.
829 /// The join is made based on the combined join field values, and therefore each field has to be present in each
830 /// specified processor. If an empty list is provided, it is assumed that the processors are fully aligned.
831 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateJoin, this
832 /// is the name of the primary processor.
833 RNTupleJoinProcessor(std::unique_ptr<RNTupleProcessor> primaryProcessor,
834 std::unique_ptr<RNTupleProcessor> auxProcessor, const std::vector<std::string> &joinFields,
835 std::string_view processorName);
836
837public:
839 RNTupleJoinProcessor operator=(const RNTupleJoinProcessor &) = delete;
842 ~RNTupleJoinProcessor() override = default;
843};
844
845} // namespace Experimental
846} // namespace ROOT
847
848#endif // ROOT_RNTupleProcessor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:300
if(isa< VarDecl >(D)||isa< FieldDecl >(D)||isa< EnumConstantDecl >(D))
Definition TCling.cxx:7039
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Builds a join table on one or several fields of an RNTuple so it can be joined onto other RNTuples.
Collection of values in an RNTupleProcessor, analogous to REntry, with checks and support for missing...
Processor specialization for vertically combined (chained) RNTupleProcessors.
bool CanReadFieldFromDisk(std::string_view fieldName) final
Check if a field exists on-disk and can be read by the processor.
void PrintStructureImpl(std::ostream &output) const final
Processor-specific implementation for printing its structure, called by PrintStructure().
void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0) final
Add the entry mappings for this processor to the provided join table.
RNTupleChainProcessor(std::vector< std::unique_ptr< RNTupleProcessor > > processors, std::string_view processorName)
Construct a new RNTupleChainProcessor.
void ConnectInnerProcessor(std::size_t processorNumber)
Update the entry to reflect any missing fields in the current inner processor.
Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr=nullptr, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance()) final
Add a field to the entry.
Internal::RNTupleProcessorProvenance fProvenance
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry=nullptr) final
Initialize the processor by creating an (initially empty) fEntry, or setting an existing one.
std::vector< ROOT::NTupleSize_t > fInnerNEntries
void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance(), bool updateFields=false) final
Connect the provided fields indices in the entry to their on-disk fields.
ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final
Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in th...
std::vector< std::unique_ptr< RNTupleProcessor > > fInnerProcessors
Processor specialization for horizontally combined (joined) RNTupleProcessors.
std::set< Internal::RNTupleProcessorEntry::FieldIndex_t > fJoinFieldIdxs
std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > fAuxiliaryFieldIdxs
RNTupleJoinProcessor(std::unique_ptr< RNTupleProcessor > primaryProcessor, std::unique_ptr< RNTupleProcessor > auxProcessor, const std::vector< std::string > &joinFields, std::string_view processorName)
Construct a new RNTupleJoinProcessor.
ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final
Load the entry identified by the provided entry number of the primary processor.
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
void SetAuxiliaryFieldValidity(bool validity)
Set the validity for all fields in the auxiliary processor at once.
void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance(), bool updateFields=false) final
Connect the provided fields indices in the entry to their on-disk fields.
bool CanReadFieldFromDisk(std::string_view fieldName) final
Check if a field exists on-disk and can be read by the processor.
std::unique_ptr< RNTupleProcessor > fPrimaryProcessor
std::unique_ptr< Internal::RNTupleJoinTable > fJoinTable
std::unique_ptr< RNTupleProcessor > fAuxiliaryProcessor
Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.
RNTupleOpenSpec(std::string_view n, const std::string &s)
std::variant< std::string, TDirectory * > fStorage
RNTupleOpenSpec(std::string_view n, TDirectory *s)
std::unique_ptr< ROOT::Internal::RPageSource > CreatePageSource() const
RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry, Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx)
void BindRawPtr(void *valuePtr)
Bind the value to valuePtr.
void * GetRawPtr() const
Get a non-owning pointer to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex
std::shared_ptr< void > GetPtr() const
Get the pointer to the field value managed by the processor's entry.
bool HasValue() const
Check if the pointer currently holds a valid value.
std::shared_ptr< T > GetPtr() const
Get a shared pointer to the field value managed by the processor's entry.
const T & operator*() const
Get a reference to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex
const T * operator->() const
Access the field value managed by the processor's entry.
void BindRawPtr(T *valuePtr)
Bind the value to valuePtr.
bool HasValue() const
Check if the pointer currently holds a valid value.
T * GetRawPtr() const
Get a non-owning pointer to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry * fProcessorEntry
RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry, Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx)
Identifies how a processor is composed.
Iterator over the entries of an RNTuple, or vertical concatenation thereof.
friend bool operator==(const iterator &lh, const iterator &rh)
friend bool operator!=(const iterator &lh, const iterator &rh)
RIterator(RNTupleProcessor &processor, ROOT::NTupleSize_t entryNumber)
Interface for iterating over entries of vertically ("chained") and/or horizontally ("joined") combine...
virtual bool CanReadFieldFromDisk(std::string_view fieldName)=0
Check if a field exists on-disk and can be read by the processor.
const std::string & GetProcessorName() const
Get the name of the processor.
RNTupleProcessorOptionalPtr< T > RequestField(const std::string &fieldName, void *valuePtr=nullptr)
Request access to a field for reading during processing.
virtual ROOT::NTupleSize_t GetNEntries()=0
Get the total number of entries in this processor.
static std::unique_ptr< RNTupleProcessor > CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple, const std::vector< std::string > &joinFields, std::string_view processorName="")
Create an RNTupleProcessor for a join (i.e., a horizontal combination) of RNTuples.
ROOT::NTupleSize_t fNEntries
Total number of entries.
friend struct ROOT::Experimental::Internal::RNTupleProcessorEntryLoader
static std::unique_ptr< RNTupleProcessor > CreateChain(std::vector< RNTupleOpenSpec > ntuples, std::string_view processorName="")
Create an RNTupleProcessor for a chain (i.e., a vertical combination) of RNTuples.
RNTupleProcessor(RNTupleProcessor &&)=delete
std::shared_ptr< Internal::RNTupleProcessorEntry > fEntry
virtual void PrintStructureImpl(std::ostream &output) const =0
Processor-specific implementation for printing its structure, called by PrintStructure().
virtual ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber)=0
Load the entry identified by the provided entry number.
ROOT::NTupleSize_t GetCurrentEntryNumber() const
Get the entry number that is currently being processed.
virtual void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance, bool updateFields)=0
Connect fields to the page source of the processor's underlying RNTuple(s).
std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > fFieldIdxs
virtual void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry)=0
Initialize the processor by creating an (initially empty) fEntry, or setting an existing one.
bool IsInitialized() const
Check if the processor already has been initialized.
virtual void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0)=0
Add the entry mappings for this processor to the provided join table.
std::size_t GetCurrentProcessorNumber() const
Get the number of the inner processor currently being read.
virtual Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr, const Internal::RNTupleProcessorProvenance &provenance)=0
Add a field to the entry.
void PrintStructure(std::ostream &output=std::cout)
Print a graphical representation of the processor composition.
ROOT::NTupleSize_t GetNEntriesProcessed() const
Get the total number of entries processed so far.
RNTupleProcessor(const RNTupleProcessor &)=delete
RNTupleProcessor & operator=(RNTupleProcessor &&)=delete
RNTupleProcessor(std::string_view processorName)
Create a new base RNTupleProcessor.
static std::unique_ptr< RNTupleProcessor > Create(RNTupleOpenSpec ntuple, std::string_view processorName="")
Create an RNTupleProcessor for a single RNTuple.
RNTupleProcessor & operator=(const RNTupleProcessor &)=delete
Processor specialization for processing a single RNTuple.
void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0) final
Add the entry mappings for this processor to the provided join table.
void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance(), bool updateFields=false) final
Connect the provided fields indices in the entry to their on-disk fields.
void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry=nullptr) final
Initialize the processor by creating an (initially empty) fEntry, or setting an existing one.
void PrintStructureImpl(std::ostream &output) const final
Processor-specific implementation for printing its structure, called by PrintStructure().
RNTupleSingleProcessor(RNTupleOpenSpec ntuple, std::string_view processorName)
Construct a new RNTupleProcessor for processing a single RNTuple.
std::unique_ptr< ROOT::Internal::RPageSource > fPageSource
bool CanReadFieldFromDisk(std::string_view fieldName) final
Check if a field exists on-disk and can be read by the processor.
ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final
Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in th...
Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr=nullptr, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance()) final
Add a field to the entry.
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
std::unique_ptr< ROOT::RFieldBase > CreateAndConnectField(const std::string &qualifiedFieldName, const std::string &typeName)
Create a new field and connect it to the processor's page source.
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Describe directory structure in memory.
Definition TDirectory.h:45
STL class.
STL class.
STL class.
STL class.
const Int_t n
Definition legend1.C:16
Namespace for ROOT features in testing.
Definition TROOT.h:100
std::string GetRenormalizedTypeName(const std::string &metaNormalizedName)
Given a type name normalized by ROOT meta, renormalize it for RNTuple. E.g., insert std::prefix.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.