16#ifndef ROOT7_RPageStorage
17#define ROOT7_RPageStorage
34#include <shared_mutex>
35#include <unordered_set>
39namespace Experimental {
47class RColumnElementBase;
48class RNTupleCompressor;
49class RNTupleDecompressor;
103 SealedPageSequence_t::const_iterator
fFirst;
104 SealedPageSequence_t::const_iterator
fLast;
215 virtual std::vector<RNTupleLocator>
CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges);
232 int compressionSetting,
void *buf);
376 std::vector<DescriptorId_t>
fIDs;
431 virtual std::unique_ptr<RPageSource>
Clone()
const = 0;
477 virtual std::vector<std::unique_ptr<RCluster>>
LoadClusters(std::span<RCluster::RKey> clusterKeys) = 0;
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t b
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
An in-memory subset of the packed and compressed pages of a cluster.
std::unordered_set< DescriptorId_t > ColumnSet_t
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
A collection of Counter objects with a name, a unit, and a description.
An either thread-safe or non thread safe counter for CPU ticks.
Abstract interface to write data into an ntuple.
virtual RNTupleLocator CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage)=0
void CommitDataset()
Finalize the current cluster and the entrire data set.
RPageSink(const RPageSink &)=delete
RSealedPage SealPage(const RPage &page, const RColumnElementBase &element, int compressionSetting)
Helper for streaming a page.
std::vector< RClusterDescriptor::RPageRange > fOpenPageRanges
Keeps track of the written pages in the currently open cluster. Indexed by column id.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
void CommitSealedPageV(std::span< RPageStorage::RSealedPageGroup > ranges)
Write a vector of preprocessed pages to storage. The corresponding columns must have been added befor...
RPageSink & operator=(RPageSink &&)=default
virtual RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page)=0
RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options)
virtual std::uint64_t CommitClusterImpl(NTupleSize_t nEntries)=0
Returns the number of bytes written to storage (excluding metadata)
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
virtual void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length)=0
std::unique_ptr< RCounters > fCounters
RNTupleDescriptorBuilder fDescriptorBuilder
RNTupleMetrics & GetMetrics() override
Returns the default metrics object. Subclasses might alternatively provide their own metrics object b...
RPageSink & operator=(const RPageSink &)=delete
void DropColumn(ColumnHandle_t) final
Unregisters a column.
std::unique_ptr< RNTupleWriteOptions > fOptions
virtual RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length)=0
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
RPageSink(RPageSink &&)=default
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
std::uint64_t CommitCluster(NTupleSize_t nEntries)
Finalize the current cluster and create a new one for the following data.
const RNTupleWriteOptions & GetWriteOptions() const
Returns the sink's write options.
void CommitClusterGroup()
Write out the page locations (page list envelope) for all the committed clusters since the last call ...
virtual std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges)
Vector commit of preprocessed pages.
void CommitSealedPage(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage)
Write a preprocessed page to storage. The column must have been added before.
Internal::RNTupleSerializer::RContext fSerializationContext
Used to map the IDs of the descriptor to the physical IDs issued during header/footer serialization.
virtual void CreateImpl(const RNTupleModel &model, unsigned char *serializedHeader, std::uint32_t length)=0
NTupleSize_t fPrevClusterNEntries
Used to calculate the number of entries in the current cluster.
virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements)=0
Get a new, empty page for the given column that can be filled with up to nElements.
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
std::uint64_t fNextClusterInGroup
Remembers the starting cluster id for the next cluster group.
std::unique_ptr< RNTupleCompressor > fCompressor
Helper to zip pages and header/footer; includes a 16MB (kMAXZIPBUF) zip buffer.
std::vector< RClusterDescriptor::RColumnRange > fOpenColumnRanges
Keeps track of the number of elements in the currently open cluster. Indexed by column id.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
Keeps track of the requested physical column IDs.
std::vector< DescriptorId_t > fIDs
RCluster::ColumnSet_t ToColumnSet() const
void Erase(DescriptorId_t physicalColumnID)
void Insert(DescriptorId_t physicalColumnID)
std::vector< std::size_t > fRefCounters
An RAII wrapper used for the writable access to RPageSource::fDescriptor. See GetSharedDescriptorGuar...
RExclDescriptorGuard(RNTupleDescriptor &desc, std::shared_mutex &lock)
RNTupleDescriptor * operator->() const
RExclDescriptorGuard(RExclDescriptorGuard &&)=delete
RExclDescriptorGuard & operator=(RExclDescriptorGuard &&)=delete
std::shared_mutex & fLock
RExclDescriptorGuard & operator=(const RExclDescriptorGuard &)=delete
RExclDescriptorGuard(const RExclDescriptorGuard &)=delete
RNTupleDescriptor & fDescriptor
void MoveIn(RNTupleDescriptor &&desc)
An RAII wrapper used for the read-only access to RPageSource::fDescriptor. See GetExclDescriptorGuard...
const RNTupleDescriptor & GetRef() const
std::shared_mutex & fLock
RSharedDescriptorGuard(RSharedDescriptorGuard &&)=delete
const RNTupleDescriptor * operator->() const
const RNTupleDescriptor & fDescriptor
RSharedDescriptorGuard & operator=(const RSharedDescriptorGuard &)=delete
~RSharedDescriptorGuard()
RSharedDescriptorGuard & operator=(RSharedDescriptorGuard &&)=delete
RSharedDescriptorGuard(const RSharedDescriptorGuard &)=delete
RSharedDescriptorGuard(const RNTupleDescriptor &desc, std::shared_mutex &lock)
Abstract interface to read data from an ntuple.
virtual std::vector< std::unique_ptr< RCluster > > LoadClusters(std::span< RCluster::RKey > clusterKeys)=0
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
virtual std::unique_ptr< RPageSource > Clone() const =0
Open the same storage multiple time, e.g. for reading in multiple threads.
const RNTupleReadOptions & GetReadOptions() const
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSource.
void Attach()
Open the physical storage container for the tree.
virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex)=0
Allocates and fills a page that contains the index-th element.
std::unique_ptr< unsigned char[]> UnsealPage(const RSealedPage &sealedPage, const RColumnElementBase &element)
Helper for unstreaming a page.
std::unique_ptr< RCounters > fCounters
virtual void LoadSealedPage(DescriptorId_t physicalColumnId, const RClusterIndex &clusterIndex, RSealedPage &sealedPage)=0
Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage.
RNTupleReadOptions fOptions
RActivePhysicalColumns fActivePhysicalColumns
The active columns are implicitly defined by the model fields or views.
NTupleSize_t GetNEntries()
virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex)=0
Another version of PopulatePage that allows to specify cluster-relative indexes.
void DropColumn(ColumnHandle_t columnHandle) override
Unregisters a column.
NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
RPageSource(const RPageSource &)=delete
std::shared_mutex fDescriptorLock
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) override
Register a new column.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
RPageSource & operator=(RPageSource &&)=delete
virtual RNTupleDescriptor AttachImpl()=0
std::unique_ptr< RNTupleDecompressor > fDecompressor
Helper to unzip pages and header/footer; comprises a 16MB (kMAXZIPBUF) unzip buffer.
const RSharedDescriptorGuard GetSharedDescriptorGuard() const
Takes the read lock for the descriptor.
RPageSource(std::string_view ntupleName, const RNTupleReadOptions &fOptions)
RExclDescriptorGuard GetExclDescriptorGuard()
Note that the underlying lock is not recursive. See GetSharedDescriptorGuard() for further informatio...
virtual void UnzipClusterImpl(RCluster *)
RNTupleMetrics & GetMetrics() override
Returns the default metrics object. Subclasses might alternatively override the method and provide th...
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
RNTupleMetrics fMetrics
Wraps the I/O counters and is observed by the RNTupleReader metrics.
RPageSource & operator=(const RPageSource &)=delete
void UnzipCluster(RCluster *cluster)
Parallel decompression and unpacking of the pages in the given cluster.
ColumnId_t GetColumnId(ColumnHandle_t columnHandle)
RNTupleDescriptor fDescriptor
RPageSource(RPageSource &&)=delete
The interface of a task scheduler to schedule page (de)compression tasks.
virtual void Reset()=0
Start a new set of tasks.
virtual ~RTaskScheduler()=default
virtual void Wait()=0
Blocks until all scheduled tasks finished.
virtual void AddTask(const std::function< void(void)> &taskFunc)=0
Take a callable that represents a task.
Common functionality of an ntuple storage for both reading and writing.
const std::string & GetNTupleName() const
Returns the NTuple name.
RPageStorage(const RPageStorage &other)=delete
RPageStorage(RPageStorage &&other)=default
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
std::deque< RSealedPage > SealedPageSequence_t
virtual EPageStorageType GetType()=0
Whether the concrete implementation is a sink or a source.
void SetTaskScheduler(RTaskScheduler *taskScheduler)
virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column)=0
Register a new column.
RTaskScheduler * fTaskScheduler
virtual RNTupleMetrics & GetMetrics()=0
Page storage implementations have their own metrics.
virtual void DropColumn(ColumnHandle_t columnHandle)=0
Unregisters a column.
virtual void ReleasePage(RPage &page)=0
Every page store needs to be able to free pages it handed out.
RPageStorage(std::string_view name)
RPageStorage & operator=(const RPageStorage &other)=delete
A page is a slice of a column that is mapped into memory.
The serialization context is used for the piecewise serialization of a descriptor.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
A field translates read and write calls from/to underlying columns to/from tree values.
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
basic_string_view< char > string_view
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
constexpr DescriptorId_t kInvalidDescriptorId
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
static constexpr double s
Default I/O performance counters that get registered in fMetrics.
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuZip
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuWrite
RNTupleAtomicCounter & fNPageCommitted
RNTupleAtomicCounter & fTimeWallWrite
RNTupleAtomicCounter & fTimeWallZip
RNTupleAtomicCounter & fSzWritePayload
RNTupleAtomicCounter & fSzZip
Default I/O performance counters that get registered in fMetrics.
RNTupleAtomicCounter & fTimeWallRead
RNTupleAtomicCounter & fNReadV
RNTupleCalcPerf & fBandwidthReadUncompressed
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuUnzip
RNTupleAtomicCounter & fSzReadPayload
RNTupleAtomicCounter & fSzUnzip
RNTupleAtomicCounter & fNPagePopulated
RNTupleAtomicCounter & fNPageLoaded
RNTupleAtomicCounter & fNClusterLoaded
RNTupleAtomicCounter & fTimeWallUnzip
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuRead
RNTupleCalcPerf & fBandwidthUnzip
RNTupleAtomicCounter & fNRead
RNTupleAtomicCounter & fSzReadOverhead
RNTupleCalcPerf & fCompressionRatio
RNTupleCalcPerf & fBandwidthReadCompressed
RNTupleCalcPerf & fFractionReadOverhead
DescriptorId_t fPhysicalId
A range of sealed pages referring to the same column that can be used for vector commit.
RSealedPageGroup(DescriptorId_t d, SealedPageSequence_t::const_iterator b, SealedPageSequence_t::const_iterator e)
SealedPageSequence_t::const_iterator fLast
SealedPageSequence_t::const_iterator fFirst
DescriptorId_t fPhysicalColumnId
A sealed page contains the bytes of a page as written to storage (packed & compressed).
RSealedPage(RSealedPage &&other)=default
RSealedPage & operator=(const RSealedPage &other)=delete
RSealedPage(const void *b, std::uint32_t s, std::uint32_t n)
RSealedPage(const RSealedPage &other)=delete
Generic information about the physical location of data.