25#include <condition_variable>
32#include <unordered_map>
37namespace Experimental {
41class RNTupleColumnReader;
52 std::unique_ptr<ROOT::Experimental::Internal::RPageSource>
fSource;
80 std::vector<std::unique_ptr<ROOT::Experimental::Internal::RPageSource>>
fStagingArea;
87 std::vector<std::unique_ptr<ROOT::Experimental::RFieldBase>>
fProtoFields;
92 std::unordered_map<std::size_t, std::vector<std::unique_ptr<ROOT::Experimental::RFieldBase>>>
208namespace Experimental {
unsigned long long ULong64_t
Every RDF column is represented by exactly one RNTuple field.
The RDataSource implementation for RNTuple.
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > GetColumnReaders(unsigned int, std::string_view, const std::type_info &) final
If the other GetColumnReaders overload returns an empty vector, this overload will be called instead.
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
RNTupleDS(const RNTupleDS &)=delete
void ExecStaging()
The main function of the fThreadStaging background thread.
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
std::thread fThreadStaging
The background thread that runs StageNextSources()
void Initialize() final
Convenience method called before starting an event-loop.
RNTupleDS & operator=(const RNTupleDS &)=delete
std::size_t fNextFileIndex
Index into fFileNames to the next file to process.
std::vector< std::string > fColumnNames
std::unordered_map< ULong64_t, std::size_t > fFirstEntry2RangeIdx
Maps the first entries from the ranges of the last GetEntryRanges() call to their corresponding index...
void Finalize() final
Convenience method called after concluding an event-loop.
RNTupleDescriptor fPrincipalDescriptor
A clone of the first pages source's descriptor.
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
void InitSlot(unsigned int slot, ULong64_t firstEntry) final
Convenience method called at the start of the data processing associated to a slot.
RNTupleDS(RNTupleDS &&)=delete
void FinalizeSlot(unsigned int slot) final
Convenience method called at the end of the data processing associated to a slot.
std::vector< std::vector< Internal::RNTupleColumnReader * > > fActiveColumnReaders
List of column readers returned by GetColumnReaders() organized by slot.
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
std::vector< REntryRangeDS > fCurrentRanges
Basis for the ranges returned by the last GetEntryRanges() call.
bool fStagingThreadShouldTerminate
Is true when the I/O thread should quit.
void AddField(const RNTupleDescriptor &desc, std::string_view colName, ROOT::DescriptorId_t fieldId, std::vector< RFieldInfo > fieldInfos, bool convertToRVec=true)
Provides the RDF column "colName" given the field identified by fieldID.
bool fHasNextSources
Is true when the staging thread has populated the next batch of files to fStagingArea.
std::vector< REntryRangeDS > fNextRanges
Basis for the ranges populated by the PrepareNextRanges() call.
std::vector< std::unique_ptr< ROOT::Experimental::RFieldBase > > fProtoFields
We prepare a prototype field for every column.
std::mutex fMutexStaging
Protects the shared state between the main thread and the I/O thread.
void StageNextSources()
Starting from fNextFileIndex, opens the next fNSlots files.
std::size_t GetNFiles() const final
Returns the number of files from which the dataset is constructed.
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
std::vector< std::unique_ptr< ROOT::Experimental::Internal::RPageSource > > fStagingArea
The staging area is relevant for chains of files, i.e.
void PrepareNextRanges()
Populates fNextRanges with the next set of entry ranges.
std::unordered_map< std::size_t, std::vector< std::unique_ptr< ROOT::Experimental::RFieldBase > > > fAlternativeProtoFields
Columns may be requested with types other than with which they were initially added as proto fields.
std::vector< std::string > fFileNames
std::string fNTupleName
The data source may be constructed with an ntuple name and a list of files.
std::vector< std::string > fColumnTypes
RNTupleDS & operator=(RNTupleDS &&)=delete
RNTupleDS(std::unique_ptr< ROOT::Experimental::Internal::RPageSource > pageSource)
std::string GetLabel() final
Return a string representation of the datasource type.
bool SetEntry(unsigned int, ULong64_t) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
std::unordered_map< ROOT::DescriptorId_t, std::string > fFieldId2QualifiedName
Connects the IDs of active proto fields and their subfields to their fully qualified name (a....
ULong64_t fSeenEntries
The number of entries so far returned by GetEntryRanges()
std::condition_variable fCvStaging
Signal for the state information of fIsReadyForStaging and fHasNextSources.
bool fIsReadyForStaging
Is true when the staging thread should start working.
The on-storage meta-data of an ntuple.
Pure virtual base class for all column reader types.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::vector< void * > Record_t
Representation of an RNTuple data set in a ROOT file.
RDataFrame FromRNTuple(std::string_view ntupleName, std::string_view fileName)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
The PrepareNextRanges() method populates the fNextRanges list with REntryRangeDS records.
std::unique_ptr< ROOT::Experimental::Internal::RPageSource > fSource
ULong64_t fLastEntry
End entry index in fSource, e.g. the number of entries in the range is fLastEntry - fFirstEntry.
ULong64_t fFirstEntry
First entry index in fSource.
Holds useful information about fields added to the RNTupleDS.
std::size_t fNRepetitions
RFieldInfo(ROOT::DescriptorId_t fieldId, std::size_t nRepetitions)
ROOT::DescriptorId_t fFieldId