Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorage.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorage.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-07-19
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPageStorage
17#define ROOT7_RPageStorage
18
19#include <ROOT/RCluster.hxx>
24#include <ROOT/RNTupleUtil.hxx>
25#include <ROOT/RPage.hxx>
27#include <ROOT/RSpan.hxx>
28#include <string_view>
29
30#include <atomic>
31#include <cstddef>
32#include <deque>
33#include <functional>
34#include <memory>
35#include <mutex>
36#include <shared_mutex>
37#include <unordered_set>
38#include <vector>
39
40namespace ROOT {
41namespace Experimental {
42
43class RFieldBase;
44class RNTupleModel;
45
46namespace Internal {
47class RColumn;
48class RColumnElementBase;
49class RNTupleCompressor;
50class RNTupleDecompressor;
51struct RNTupleModelChangeset;
52
53enum class EPageStorageType {
54 kSink,
55 kSource,
56};
57
58// clang-format off
59/**
60\class ROOT::Experimental::Internal::RPageStorage
61\ingroup NTuple
62\brief Common functionality of an ntuple storage for both reading and writing
63
64The RPageStore provides access to a storage container that keeps the bits of pages and clusters comprising
65an ntuple. Concrete implementations can use a TFile, a raw file, an object store, and so on.
66*/
67// clang-format on
69public:
70 /// The interface of a task scheduler to schedule page (de)compression tasks
72 public:
73 virtual ~RTaskScheduler() = default;
74 /// Take a callable that represents a task
75 virtual void AddTask(const std::function<void(void)> &taskFunc) = 0;
76 /// Blocks until all scheduled tasks finished
77 virtual void Wait() = 0;
78 };
79
80 /// A sealed page contains the bytes of a page as written to storage (packed & compressed). It is used
81 /// as an input to UnsealPages() as well as to transfer pages between different storage media.
82 /// RSealedPage does _not_ own the buffer it is pointing to in order to not interfere with the memory management
83 /// of concrete page sink and page source implementations.
84 struct RSealedPage {
85 const void *fBuffer = nullptr;
86 std::uint32_t fSize = 0;
87 std::uint32_t fNElements = 0;
88
89 RSealedPage() = default;
90 RSealedPage(const void *b, std::uint32_t s, std::uint32_t n) : fBuffer(b), fSize(s), fNElements(n) {}
91 RSealedPage(const RSealedPage &other) = delete;
92 RSealedPage& operator =(const RSealedPage &other) = delete;
93 RSealedPage(RSealedPage &&other) = default;
94 RSealedPage& operator =(RSealedPage &&other) = default;
95 };
96
97 using SealedPageSequence_t = std::deque<RSealedPage>;
98 /// A range of sealed pages referring to the same column that can be used for vector commit
101 SealedPageSequence_t::const_iterator fFirst;
102 SealedPageSequence_t::const_iterator fLast;
103
104 RSealedPageGroup(DescriptorId_t d, SealedPageSequence_t::const_iterator b, SealedPageSequence_t::const_iterator e)
106 {
107 }
108 };
109
110protected:
112
113 std::string fNTupleName;
116 {
117 if (!fTaskScheduler)
118 return;
120 }
121
122public:
123 explicit RPageStorage(std::string_view name);
124 RPageStorage(const RPageStorage &other) = delete;
125 RPageStorage& operator =(const RPageStorage &other) = delete;
126 RPageStorage(RPageStorage &&other) = default;
128 virtual ~RPageStorage();
129
130 /// Whether the concrete implementation is a sink or a source
132
135 const RColumn *fColumn = nullptr;
136
137 /// Returns true for a valid column handle; fColumn and fPhysicalId should always either both
138 /// be valid or both be invalid.
139 explicit operator bool() const { return fPhysicalId != kInvalidDescriptorId && fColumn; }
140 };
141 /// The column handle identifies a column with the current open page storage
143
144 /// Register a new column. When reading, the column must exist in the ntuple on disk corresponding to the meta-data.
145 /// When writing, every column can only be attached once.
146 virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) = 0;
147 /// Unregisters a column. A page source decreases the reference counter for the corresponding active column.
148 /// For a page sink, dropping columns is currently a no-op.
149 virtual void DropColumn(ColumnHandle_t columnHandle) = 0;
150
151 /// Every page store needs to be able to free pages it handed out. But Sinks and sources have different means
152 /// of allocating pages.
153 virtual void ReleasePage(RPage &page) = 0;
154
155 /// Returns the default metrics object. Subclasses might alternatively provide their own metrics object by
156 /// overriding this.
158
159 /// Returns the NTuple name.
160 const std::string &GetNTupleName() const { return fNTupleName; }
161
162 void SetTaskScheduler(RTaskScheduler *taskScheduler) { fTaskScheduler = taskScheduler; }
163}; // class RPageStorage
164
165// clang-format off
166/**
167\class ROOT::Experimental::Internal::RPageSink
168\ingroup NTuple
169\brief Abstract interface to write data into an ntuple
170
171The page sink takes the list of columns and afterwards a series of page commits and cluster commits.
172The user is responsible to commit clusters at a consistent point, i.e. when all pages corresponding to data
173up to the given entry number are committed.
174
175An object of this class may either be a wrapper (for example a RPageSinkBuf) or a "persistent" sink,
176inheriting from RPagePersistentSink.
177*/
178// clang-format on
179class RPageSink : public RPageStorage {
180protected:
181 std::unique_ptr<RNTupleWriteOptions> fOptions;
182
183 /// Helper to zip pages and header/footer; includes a 16MB (kMAXZIPBUF) zip buffer.
184 /// There could be concrete page sinks that don't need a compressor. Therefore, and in order to stay consistent
185 /// with the page source, we leave it up to the derived class whether or not the compressor gets constructed.
186 std::unique_ptr<RNTupleCompressor> fCompressor;
187
188 /// Helper for streaming a page. This is commonly used in derived, concrete page sinks. Note that if
189 /// compressionSetting is 0 (uncompressed) and the page is mappable, the returned sealed page will
190 /// point directly to the input page buffer. Otherwise, the sealed page references an internal buffer
191 /// of fCompressor. Thus, the buffer pointed to by the RSealedPage should never be freed.
192 /// Usage of this method requires construction of fCompressor.
193 RSealedPage SealPage(const RPage &page, const RColumnElementBase &element, int compressionSetting);
194
195 /// Seal a page using the provided buffer.
196 static RSealedPage SealPage(const RPage &page, const RColumnElementBase &element, int compressionSetting, void *buf,
197 bool allowAlias = true);
198
199private:
200 /// Flag if sink was initialized
201 bool fIsInitialized = false;
202
203public:
204 RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options);
205
206 RPageSink(const RPageSink&) = delete;
207 RPageSink& operator=(const RPageSink&) = delete;
208 RPageSink(RPageSink&&) = default;
210 ~RPageSink() override;
211
213 /// Returns the sink's write options.
214 const RNTupleWriteOptions &GetWriteOptions() const { return *fOptions; }
215
216 void DropColumn(ColumnHandle_t /*columnHandle*/) final {}
217
218 bool IsInitialized() const { return fIsInitialized; }
219
220 /// Return the RNTupleDescriptor being constructed.
221 virtual const RNTupleDescriptor &GetDescriptor() const = 0;
222
223 /// Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket)
224 /// Init() associates column handles to the columns referenced by the model
225 void Init(RNTupleModel &model)
226 {
227 if (fIsInitialized) {
228 throw RException(R__FAIL("already initialized"));
229 }
230 fIsInitialized = true;
231 InitImpl(model);
232 }
233
234protected:
235 virtual void InitImpl(RNTupleModel &model) = 0;
236
237public:
238 /// Incorporate incremental changes to the model into the ntuple descriptor. This happens, e.g. if new fields were
239 /// added after the initial call to `RPageSink::Init(RNTupleModel &)`.
240 /// `firstEntry` specifies the global index for the first stored element in the added columns.
241 virtual void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) = 0;
242
243 /// Write a page to the storage. The column must have been added before.
244 virtual void CommitPage(ColumnHandle_t columnHandle, const RPage &page) = 0;
245 /// Write a preprocessed page to storage. The column must have been added before.
246 virtual void CommitSealedPage(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) = 0;
247 /// Write a vector of preprocessed pages to storage. The corresponding columns must have been added before.
248 virtual void CommitSealedPageV(std::span<RPageStorage::RSealedPageGroup> ranges) = 0;
249 /// Finalize the current cluster and create a new one for the following data.
250 /// Returns the number of bytes written to storage (excluding meta-data).
251 virtual std::uint64_t CommitCluster(NTupleSize_t nNewEntries) = 0;
252 /// Write out the page locations (page list envelope) for all the committed clusters since the last call of
253 /// CommitClusterGroup (or the beginning of writing).
254 virtual void CommitClusterGroup() = 0;
255 /// Finalize the current cluster and the entrire data set.
256 virtual void CommitDataset() = 0;
257
258 /// Get a new, empty page for the given column that can be filled with up to nElements. If nElements is zero,
259 /// the page sink picks an appropriate size.
260 virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) = 0;
261
262 /// An RAII wrapper used to synchronize a page sink. See GetSinkGuard().
264 std::mutex *fLock;
265
266 public:
267 explicit RSinkGuard(std::mutex *lock) : fLock(lock)
268 {
269 if (fLock != nullptr) {
270 fLock->lock();
271 }
272 }
273 RSinkGuard(const RSinkGuard &) = delete;
274 RSinkGuard &operator=(const RSinkGuard &) = delete;
275 RSinkGuard(RSinkGuard &&) = delete;
278 {
279 if (fLock != nullptr) {
280 fLock->unlock();
281 }
282 }
283 };
284
286 {
287 // By default, there is no lock and the guard does nothing.
288 return RSinkGuard(nullptr);
289 }
290}; // class RPageSink
291
292// clang-format off
293/**
294\class ROOT::Experimental::Internal::RPagePersistentSink
295\ingroup NTuple
296\brief Base class for a sink with a physical storage backend
297*/
298// clang-format on
300private:
301 /// Used to map the IDs of the descriptor to the physical IDs issued during header/footer serialization
303
304 /// Remembers the starting cluster id for the next cluster group
305 std::uint64_t fNextClusterInGroup = 0;
306 /// Used to calculate the number of entries in the current cluster
308 /// Keeps track of the number of elements in the currently open cluster. Indexed by column id.
309 std::vector<RClusterDescriptor::RColumnRange> fOpenColumnRanges;
310 /// Keeps track of the written pages in the currently open cluster. Indexed by column id.
311 std::vector<RClusterDescriptor::RPageRange> fOpenPageRanges;
312
313protected:
315
316 /// Default I/O performance counters that get registered in fMetrics
317 struct RCounters {
325 };
326 std::unique_ptr<RCounters> fCounters;
327
328 virtual void InitImpl(unsigned char *serializedHeader, std::uint32_t length) = 0;
329
330 virtual RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) = 0;
331 virtual RNTupleLocator
332 CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) = 0;
333 /// Vector commit of preprocessed pages. The `ranges` array specifies a range of sealed pages to be
334 /// committed for each column. The returned vector contains, in order, the RNTupleLocator for each
335 /// page on each range in `ranges`, i.e. the first N entries refer to the N pages in `ranges[0]`,
336 /// followed by M entries that refer to the M pages in `ranges[1]`, etc.
337 /// The default is to call `CommitSealedPageImpl` for each page; derived classes may provide an
338 /// optimized implementation though.
339 virtual std::vector<RNTupleLocator> CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges);
340 /// Returns the number of bytes written to storage (excluding metadata)
341 virtual std::uint64_t CommitClusterImpl() = 0;
342 /// Returns the locator of the page list envelope of the given buffer that contains the serialized page list.
343 /// Typically, the implementation takes care of compressing and writing the provided buffer.
344 virtual RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) = 0;
345 virtual void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) = 0;
346
347 /// Enables the default set of metrics provided by RPageSink. `prefix` will be used as the prefix for
348 /// the counters registered in the internal RNTupleMetrics object.
349 /// This set of counters can be extended by a subclass by calling `fMetrics.MakeCounter<...>()`.
350 ///
351 /// A subclass using the default set of metrics is always responsible for updating the counters
352 /// appropriately, e.g. `fCounters->fNPageCommited.Inc()`
353 void EnableDefaultMetrics(const std::string &prefix);
354
355public:
356 RPagePersistentSink(std::string_view ntupleName, const RNTupleWriteOptions &options);
357
362 ~RPagePersistentSink() override;
363
364 /// Guess the concrete derived page source from the location
365 static std::unique_ptr<RPageSink> Create(std::string_view ntupleName, std::string_view location,
366 const RNTupleWriteOptions &options = RNTupleWriteOptions());
367
368 ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final;
369
371
372 /// Updates the descriptor and calls InitImpl() that handles the backend-specific details (file, DAOS, etc.)
373 void InitImpl(RNTupleModel &model) final;
374 void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) final;
375
376 /// Initialize sink based on an existing descriptor and fill into the descriptor builder.
377 void InitFromDescriptor(const RNTupleDescriptor &descriptor);
378
379 void CommitPage(ColumnHandle_t columnHandle, const RPage &page) final;
380 void CommitSealedPage(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final;
381 void CommitSealedPageV(std::span<RPageStorage::RSealedPageGroup> ranges) final;
382 std::uint64_t CommitCluster(NTupleSize_t nEntries) final;
383 void CommitClusterGroup() final;
384 void CommitDataset() final;
385}; // class RPagePersistentSink
386
387// clang-format off
388/**
389\class ROOT::Experimental::Internal::RPageSource
390\ingroup NTuple
391\brief Abstract interface to read data from an ntuple
392
393The page source is initialized with the columns of interest. Alias columns from projected fields are mapped to the
394corresponding physical columns. Pages from the columns of interest can then be mapped into memory.
395The page source also gives access to the ntuple's meta-data.
396*/
397// clang-format on
398class RPageSource : public RPageStorage {
399public:
400 /// Used in SetEntryRange / GetEntryRange
401 struct REntryRange {
403 NTupleSize_t fNEntries = 0;
404
405 /// Returns true if the given cluster has entries within the entry range
406 bool IntersectsWith(const RClusterDescriptor &clusterDesc) const;
407 };
408
409 /// An RAII wrapper used for the read-only access to RPageSource::fDescriptor. See GetExclDescriptorGuard().
412 std::shared_mutex &fLock;
413
414 public:
415 RSharedDescriptorGuard(const RNTupleDescriptor &desc, std::shared_mutex &lock) : fDescriptor(desc), fLock(lock)
416 {
417 fLock.lock_shared();
418 }
423 ~RSharedDescriptorGuard() { fLock.unlock_shared(); }
424 const RNTupleDescriptor *operator->() const { return &fDescriptor; }
425 const RNTupleDescriptor &GetRef() const { return fDescriptor; }
426 };
427
428 /// An RAII wrapper used for the writable access to RPageSource::fDescriptor. See GetSharedDescriptorGuard().
431 std::shared_mutex &fLock;
432
433 public:
434 RExclDescriptorGuard(RNTupleDescriptor &desc, std::shared_mutex &lock) : fDescriptor(desc), fLock(lock)
435 {
436 fLock.lock();
437 }
443 {
444 fDescriptor.IncGeneration();
445 fLock.unlock();
446 }
447 RNTupleDescriptor *operator->() const { return &fDescriptor; }
448 void MoveIn(RNTupleDescriptor &&desc) { fDescriptor = std::move(desc); }
449 };
450
451private:
453 mutable std::shared_mutex fDescriptorLock;
454 REntryRange fEntryRange; ///< Used by the cluster pool to prevent reading beyond the given range
455
456protected:
457 /// Default I/O performance counters that get registered in fMetrics
458 struct RCounters {
476 };
477
478 /// Keeps track of the requested physical column IDs. When using alias columns (projected fields), physical
479 /// columns may be requested multiple times.
481 private:
482 std::vector<DescriptorId_t> fIDs;
483 std::vector<std::size_t> fRefCounters;
484
485 public:
486 void Insert(DescriptorId_t physicalColumnID);
487 void Erase(DescriptorId_t physicalColumnID);
488 RCluster::ColumnSet_t ToColumnSet() const;
489 };
490
491 std::unique_ptr<RCounters> fCounters;
492
494 /// The active columns are implicitly defined by the model fields or views
496
497 /// Helper to unzip pages and header/footer; comprises a 16MB (kMAXZIPBUF) unzip buffer.
498 /// Not all page sources need a decompressor (e.g. virtual ones for chains and friends don't), thus we
499 /// leave it up to the derived class whether or not the decompressor gets constructed.
500 std::unique_ptr<RNTupleDecompressor> fDecompressor;
501
503 // Only called if a task scheduler is set. No-op be default.
504 virtual void UnzipClusterImpl(RCluster * /* cluster */)
505 { }
506
507 /// Prepare a page range read for the column set in `clusterKey`. Specifically, pages referencing the
508 /// `kTypePageZero` locator are filled in `pageZeroMap`; otherwise, `perPageFunc` is called for each page. This is
509 /// commonly used as part of `LoadClusters()` in derived classes.
510 void PrepareLoadCluster(
511 const RCluster::RKey &clusterKey, ROnDiskPageMap &pageZeroMap,
512 std::function<void(DescriptorId_t, NTupleSize_t, const RClusterDescriptor::RPageRange::RPageInfo &)> perPageFunc);
513
514 /// Enables the default set of metrics provided by RPageSource. `prefix` will be used as the prefix for
515 /// the counters registered in the internal RNTupleMetrics object.
516 /// A subclass using the default set of metrics is responsible for updating the counters
517 /// appropriately, e.g. `fCounters->fNRead.Inc()`
518 /// Alternatively, a subclass might provide its own RNTupleMetrics object by overriding the
519 /// GetMetrics() member function.
520 void EnableDefaultMetrics(const std::string &prefix);
521
522 /// Note that the underlying lock is not recursive. See GetSharedDescriptorGuard() for further information.
523 RExclDescriptorGuard GetExclDescriptorGuard() { return RExclDescriptorGuard(fDescriptor, fDescriptorLock); }
524
525public:
526 RPageSource(std::string_view ntupleName, const RNTupleReadOptions &fOptions);
527 RPageSource(const RPageSource&) = delete;
531 ~RPageSource() override;
532 /// Guess the concrete derived page source from the file name (location)
533 static std::unique_ptr<RPageSource> Create(std::string_view ntupleName, std::string_view location,
534 const RNTupleReadOptions &options = RNTupleReadOptions());
535 /// Open the same storage multiple time, e.g. for reading in multiple threads
536 virtual std::unique_ptr<RPageSource> Clone() const = 0;
537
539 const RNTupleReadOptions &GetReadOptions() const { return fOptions; }
540
541 /// Takes the read lock for the descriptor. Multiple threads can take the lock concurrently.
542 /// The underlying std::shared_mutex, however, is neither read nor write recursive:
543 /// within one thread, only one lock (shared or exclusive) must be acquired at the same time. This requires special
544 /// care in sections protected by GetSharedDescriptorGuard() and GetExclDescriptorGuard() especially to avoid that
545 /// the locks are acquired indirectly (e.g. by a call to GetNEntries()).
546 /// As a general guideline, no other method of the page source should be called (directly or indirectly) in a
547 /// guarded section.
549 {
550 return RSharedDescriptorGuard(fDescriptor, fDescriptorLock);
551 }
552
553 ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) override;
554 void DropColumn(ColumnHandle_t columnHandle) override;
555
556 /// Open the physical storage container for the tree
557 void Attach() { GetExclDescriptorGuard().MoveIn(AttachImpl()); }
558 NTupleSize_t GetNEntries();
559 NTupleSize_t GetNElements(ColumnHandle_t columnHandle);
560 ColumnId_t GetColumnId(ColumnHandle_t columnHandle);
561
562 /// Promise to only read from the given entry range. If set, prevents the cluster pool from reading-ahead beyond
563 /// the given range. The range needs to be within [0, GetNEntries()).
564 void SetEntryRange(const REntryRange &range);
565 REntryRange GetEntryRange() const { return fEntryRange; }
566
567 /// Allocates and fills a page that contains the index-th element
568 virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) = 0;
569 /// Another version of PopulatePage that allows to specify cluster-relative indexes
570 virtual RPage PopulatePage(ColumnHandle_t columnHandle, RClusterIndex clusterIndex) = 0;
571
572 /// Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage. The sealed page
573 /// can be used subsequently in a call to RPageSink::CommitSealedPage.
574 /// The fSize and fNElements member of the sealedPage parameters are always set. If sealedPage.fBuffer is nullptr,
575 /// no data will be copied but the returned size information can be used by the caller to allocate a large enough
576 /// buffer and call LoadSealedPage again.
577 virtual void
578 LoadSealedPage(DescriptorId_t physicalColumnId, RClusterIndex clusterIndex, RSealedPage &sealedPage) = 0;
579
580 /// Helper for unstreaming a page. This is commonly used in derived, concrete page sources. The implementation
581 /// currently always makes a memory copy, even if the sealed page is uncompressed and in the final memory layout.
582 /// The optimization of directly mapping pages is left to the concrete page source implementations.
583 /// Usage of this method requires construction of fDecompressor. Memory is allocated via
584 /// `RPageAllocatorHeap`; use `RPageAllocatorHeap::DeletePage()` to deallocate returned pages.
585 RPage UnsealPage(const RSealedPage &sealedPage, const RColumnElementBase &element, DescriptorId_t physicalColumnId);
586
587 /// Populates all the pages of the given cluster ids and columns; it is possible that some columns do not
588 /// contain any pages. The page source may load more columns than the minimal necessary set from `columns`.
589 /// To indicate which columns have been loaded, LoadClusters() must mark them with SetColumnAvailable().
590 /// That includes the ones from the `columns` that don't have pages; otherwise subsequent requests
591 /// for the cluster would assume an incomplete cluster and trigger loading again.
592 /// LoadClusters() is typically called from the I/O thread of a cluster pool, i.e. the method runs
593 /// concurrently to other methods of the page source.
594 virtual std::vector<std::unique_ptr<RCluster>> LoadClusters(std::span<RCluster::RKey> clusterKeys) = 0;
595
596 /// Parallel decompression and unpacking of the pages in the given cluster. The unzipped pages are supposed
597 /// to be preloaded in a page pool attached to the source. The method is triggered by the cluster pool's
598 /// unzip thread. It is an optional optimization, the method can safely do nothing. In particular, the
599 /// actual implementation will only run if a task scheduler is set. In practice, a task scheduler is set
600 /// if implicit multi-threading is turned on.
601 void UnzipCluster(RCluster *cluster);
602}; // class RPageSource
603
604} // namespace Internal
605
606} // namespace Experimental
607} // namespace ROOT
608
609#endif
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define e(i)
Definition RSha256.hxx:103
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
char name[80]
Definition TGX11.cxx:110
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
A collection of Counter objects with a name, a unit, and a description.
An either thread-safe or non thread safe counter for CPU ticks.
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:152
std::unordered_set< DescriptorId_t > ColumnSet_t
Definition RCluster.hxx:154
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RNTupleDescriptor.
The serialization context is used for the piecewise serialization of a descriptor.
A memory region that contains packed and compressed pages.
Definition RCluster.hxx:103
Base class for a sink with a physical storage backend.
RPagePersistentSink(const RPagePersistentSink &)=delete
RPagePersistentSink(RPagePersistentSink &&)=default
std::uint64_t fNextClusterInGroup
Remembers the starting cluster id for the next cluster group.
RPagePersistentSink & operator=(RPagePersistentSink &&)=default
virtual void InitImpl(unsigned char *serializedHeader, std::uint32_t length)=0
RNTupleSerializer::RContext fSerializationContext
Used to map the IDs of the descriptor to the physical IDs issued during header/footer serialization.
virtual RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page)=0
void InitFromDescriptor(const RNTupleDescriptor &descriptor)
Initialize sink based on an existing descriptor and fill into the descriptor builder.
virtual RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length)=0
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
NTupleSize_t fPrevClusterNEntries
Used to calculate the number of entries in the current cluster.
std::vector< RClusterDescriptor::RPageRange > fOpenPageRanges
Keeps track of the written pages in the currently open cluster. Indexed by column id.
std::uint64_t CommitCluster(NTupleSize_t nEntries) final
Finalize the current cluster and create a new one for the following data.
const RNTupleDescriptor & GetDescriptor() const final
Return the RNTupleDescriptor being constructed.
void CommitPage(ColumnHandle_t columnHandle, const RPage &page) final
Write a page to the storage. The column must have been added before.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
virtual void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length)=0
RPagePersistentSink & operator=(const RPagePersistentSink &)=delete
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the location.
void CommitDataset() final
Finalize the current cluster and the entrire data set.
Internal::RNTupleDescriptorBuilder fDescriptorBuilder
void CommitClusterGroup() final
Write out the page locations (page list envelope) for all the committed clusters since the last call ...
std::vector< RClusterDescriptor::RColumnRange > fOpenColumnRanges
Keeps track of the number of elements in the currently open cluster. Indexed by column id.
virtual std::uint64_t CommitClusterImpl()=0
Returns the number of bytes written to storage (excluding metadata)
void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) final
Incorporate incremental changes to the model into the ntuple descriptor.
void CommitSealedPage(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
Write a preprocessed page to storage. The column must have been added before.
void CommitSealedPageV(std::span< RPageStorage::RSealedPageGroup > ranges) final
Write a vector of preprocessed pages to storage. The corresponding columns must have been added befor...
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
virtual std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges)
Vector commit of preprocessed pages.
virtual RNTupleLocator CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage)=0
An RAII wrapper used to synchronize a page sink. See GetSinkGuard().
RSinkGuard & operator=(const RSinkGuard &)=delete
RSinkGuard & operator=(RSinkGuard &&)=delete
Abstract interface to write data into an ntuple.
RPageSink & operator=(RPageSink &&)=default
bool fIsInitialized
Flag if sink was initialized.
virtual const RNTupleDescriptor & GetDescriptor() const =0
Return the RNTupleDescriptor being constructed.
void Init(RNTupleModel &model)
Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket) In...
virtual void CommitPage(ColumnHandle_t columnHandle, const RPage &page)=0
Write a page to the storage. The column must have been added before.
const RNTupleWriteOptions & GetWriteOptions() const
Returns the sink's write options.
RPageSink & operator=(const RPageSink &)=delete
RSealedPage SealPage(const RPage &page, const RColumnElementBase &element, int compressionSetting)
Helper for streaming a page.
virtual void CommitClusterGroup()=0
Write out the page locations (page list envelope) for all the committed clusters since the last call ...
virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements)=0
Get a new, empty page for the given column that can be filled with up to nElements.
virtual std::uint64_t CommitCluster(NTupleSize_t nNewEntries)=0
Finalize the current cluster and create a new one for the following data.
RPageSink(const RPageSink &)=delete
virtual void CommitDataset()=0
Finalize the current cluster and the entrire data set.
virtual void CommitSealedPage(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage)=0
Write a preprocessed page to storage. The column must have been added before.
virtual void InitImpl(RNTupleModel &model)=0
void DropColumn(ColumnHandle_t) final
Unregisters a column.
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
std::unique_ptr< RNTupleCompressor > fCompressor
Helper to zip pages and header/footer; includes a 16MB (kMAXZIPBUF) zip buffer.
virtual void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry)=0
Incorporate incremental changes to the model into the ntuple descriptor.
virtual void CommitSealedPageV(std::span< RPageStorage::RSealedPageGroup > ranges)=0
Write a vector of preprocessed pages to storage. The corresponding columns must have been added befor...
std::unique_ptr< RNTupleWriteOptions > fOptions
Keeps track of the requested physical column IDs.
An RAII wrapper used for the writable access to RPageSource::fDescriptor. See GetSharedDescriptorGuar...
RExclDescriptorGuard(RNTupleDescriptor &desc, std::shared_mutex &lock)
RExclDescriptorGuard(const RExclDescriptorGuard &)=delete
RExclDescriptorGuard & operator=(RExclDescriptorGuard &&)=delete
RExclDescriptorGuard & operator=(const RExclDescriptorGuard &)=delete
An RAII wrapper used for the read-only access to RPageSource::fDescriptor. See GetExclDescriptorGuard...
RSharedDescriptorGuard & operator=(RSharedDescriptorGuard &&)=delete
RSharedDescriptorGuard(const RSharedDescriptorGuard &)=delete
RSharedDescriptorGuard(const RNTupleDescriptor &desc, std::shared_mutex &lock)
RSharedDescriptorGuard & operator=(const RSharedDescriptorGuard &)=delete
Abstract interface to read data from an ntuple.
virtual void LoadSealedPage(DescriptorId_t physicalColumnId, RClusterIndex clusterIndex, RSealedPage &sealedPage)=0
Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage.
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
RPageSource(const RPageSource &)=delete
RPageSource & operator=(RPageSource &&)=delete
std::unique_ptr< RCounters > fCounters
virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex)=0
Allocates and fills a page that contains the index-th element.
RExclDescriptorGuard GetExclDescriptorGuard()
Note that the underlying lock is not recursive. See GetSharedDescriptorGuard() for further informatio...
RActivePhysicalColumns fActivePhysicalColumns
The active columns are implicitly defined by the model fields or views.
RPageSource & operator=(const RPageSource &)=delete
virtual RNTupleDescriptor AttachImpl()=0
virtual std::unique_ptr< RPageSource > Clone() const =0
Open the same storage multiple time, e.g. for reading in multiple threads.
const RNTupleReadOptions & GetReadOptions() const
virtual std::vector< std::unique_ptr< RCluster > > LoadClusters(std::span< RCluster::RKey > clusterKeys)=0
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
REntryRange fEntryRange
Used by the cluster pool to prevent reading beyond the given range.
std::unique_ptr< RNTupleDecompressor > fDecompressor
Helper to unzip pages and header/footer; comprises a 16MB (kMAXZIPBUF) unzip buffer.
const RSharedDescriptorGuard GetSharedDescriptorGuard() const
Takes the read lock for the descriptor.
virtual RPage PopulatePage(ColumnHandle_t columnHandle, RClusterIndex clusterIndex)=0
Another version of PopulatePage that allows to specify cluster-relative indexes.
void Attach()
Open the physical storage container for the tree.
The interface of a task scheduler to schedule page (de)compression tasks.
virtual void Wait()=0
Blocks until all scheduled tasks finished.
virtual void AddTask(const std::function< void(void)> &taskFunc)=0
Take a callable that represents a task.
Common functionality of an ntuple storage for both reading and writing.
std::deque< RSealedPage > SealedPageSequence_t
virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column)=0
Register a new column.
virtual void DropColumn(ColumnHandle_t columnHandle)=0
Unregisters a column.
virtual Detail::RNTupleMetrics & GetMetrics()
Returns the default metrics object.
const std::string & GetNTupleName() const
Returns the NTuple name.
virtual EPageStorageType GetType()=0
Whether the concrete implementation is a sink or a source.
RPageStorage & operator=(const RPageStorage &other)=delete
RPageStorage(const RPageStorage &other)=delete
RPageStorage(RPageStorage &&other)=default
virtual void ReleasePage(RPage &page)=0
Every page store needs to be able to free pages it handed out.
void SetTaskScheduler(RTaskScheduler *taskScheduler)
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:41
Meta-data for a set of ntuple clusters.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
const Int_t n
Definition legend1.C:16
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:156
The incremental changes to a RNTupleModel
Default I/O performance counters that get registered in fMetrics.
Detail::RNTupleTickCounter< Detail::RNTupleAtomicCounter > & fTimeCpuZip
Detail::RNTupleTickCounter< Detail::RNTupleAtomicCounter > & fTimeCpuWrite
Default I/O performance counters that get registered in fMetrics.
Detail::RNTupleTickCounter< Detail::RNTupleAtomicCounter > & fTimeCpuUnzip
Detail::RNTupleTickCounter< Detail::RNTupleAtomicCounter > & fTimeCpuRead
A range of sealed pages referring to the same column that can be used for vector commit.
RSealedPageGroup(DescriptorId_t d, SealedPageSequence_t::const_iterator b, SealedPageSequence_t::const_iterator e)
A sealed page contains the bytes of a page as written to storage (packed & compressed).
RSealedPage & operator=(const RSealedPage &other)=delete
RSealedPage(const void *b, std::uint32_t s, std::uint32_t n)
We do not need to store the element size / uncompressed page size because we know to which column the...
Generic information about the physical location of data.