52namespace Experimental {
64 std::unique_ptr<ROOT::Experimental::RFieldBase>
CloneImpl(std::string_view )
const final
66 return std::make_unique<RRDFCardinalityField>();
68 void ConstructValue(
void *where)
const final { *
static_cast<std::size_t *
>(where) = 0; }
71 static std::string
TypeName() {
return "std::size_t"; }
85 return representations;
88 void GenerateColumnsImpl() final { assert(
false &&
"Cardinality fields must only be used for reading"); }
93 ROOT::Experimental::Internal::RColumn::Create<ClusterSize_t>(
RColumnModel(onDiskTypes[0]), 0));
105 *
static_cast<std::size_t *
>(to) =
size;
114 *
static_cast<std::size_t *
>(to) =
size;
128 std::unique_ptr<ROOT::Experimental::RFieldBase>
CloneImpl(std::string_view)
const final
132 void GenerateColumnsImpl() final { assert(
false &&
"RArraySizeField fields must only be used for reading"); }
152 void ConstructValue(
void *where) const final { *
static_cast<std::size_t *
>(where) = 0; }
154 std::size_t
GetAlignment() const final {
return alignof(std::size_t); }
165 std::unique_ptr<RFieldBase::RValue>
fValue;
191 auto iReal =
fField->begin();
192 for (; iReal !=
fField->end(); ++iProto, ++iReal) {
205 fValue = std::make_unique<RFieldBase::RValue>(
fField->CreateValue());
211 if (
fValue && keepValue) {
225 return fValue->GetPtr<
void>().get();
234 std::vector<RNTupleDS::RFieldInfo> fieldInfos)
272 fieldInfos.emplace_back(fieldId, nRepetitions);
280 if (fieldDesc.GetTypeName().empty()) {
282 auto cardinalityField = std::make_unique<ROOT::Experimental::Internal::RRDFCardinalityField>();
283 cardinalityField->SetOnDiskId(fieldId);
284 fColumnNames.emplace_back(
"R_rdf_sizeof_" + std::string(colName));
285 fColumnTypes.emplace_back(cardinalityField->GetTypeName());
289 AddField(desc, std::string(colName) +
"." +
f.GetFieldName(),
f.GetId(), fieldInfos);
294 AddField(desc, colName,
f.GetId(), fieldInfos);
300 }
else if (nRepetitions > 0) {
303 AddField(desc, colName,
f.GetId(), fieldInfos);
308 auto innerName = colName.empty() ?
f.GetFieldName() : (std::string(colName) +
"." +
f.GetFieldName());
309 AddField(desc, innerName,
f.GetId(), fieldInfos);
315 auto fieldOrException =
RFieldBase::Create(fieldDesc.GetFieldName(), fieldDesc.GetTypeName());
316 if (!fieldOrException)
318 auto valueField = fieldOrException.Unwrap();
319 valueField->SetOnDiskId(fieldId);
320 for (
auto &
f : *valueField) {
321 f.SetOnDiskId(desc.
FindFieldId(
f.GetFieldName(),
f.GetParent()->GetOnDiskId()));
323 std::unique_ptr<RFieldBase> cardinalityField;
325 if (!fieldInfos.empty()) {
326 const auto &info = fieldInfos.back();
327 if (info.fNRepetitions > 0) {
328 cardinalityField = std::make_unique<ROOT::Experimental::Internal::RArraySizeField>(info.fNRepetitions);
330 cardinalityField = std::make_unique<ROOT::Experimental::Internal::RRDFCardinalityField>();
332 cardinalityField->SetOnDiskId(info.fFieldId);
335 for (
auto i = fieldInfos.rbegin(); i != fieldInfos.rend(); ++i) {
336 const auto &fieldInfo = *i;
338 if (fieldInfo.fNRepetitions > 0) {
341 std::make_unique<ROOT::Experimental::RArrayAsRVecField>(
"", std::move(valueField), fieldInfo.fNRepetitions);
344 valueField = std::make_unique<ROOT::Experimental::RRVecField>(
"", std::move(valueField));
347 valueField->SetOnDiskId(fieldInfo.fFieldId);
351 if (i != fieldInfos.rbegin()) {
352 if (fieldInfo.fNRepetitions > 0) {
354 cardinalityField = std::make_unique<ROOT::Experimental::RArrayAsRVecField>(
"", std::move(cardinalityField),
355 fieldInfo.fNRepetitions);
358 cardinalityField = std::make_unique<ROOT::Experimental::RRVecField>(
"", std::move(cardinalityField));
361 cardinalityField->SetOnDiskId(fieldInfo.fFieldId);
365 if (cardinalityField) {
366 fColumnNames.emplace_back(
"R_rdf_sizeof_" + std::string(colName));
367 fColumnTypes.emplace_back(cardinalityField->GetTypeName());
371 fieldInfos.emplace_back(fieldId, nRepetitions);
377RNTupleDS::RNTupleDS(std::unique_ptr<Internal::RPageSource> pageSource) : fPrincipalSource(std::move(pageSource))
383 std::vector<ROOT::Experimental::RNTupleDS::RFieldInfo>());
394 static std::once_flag flag;
395 std::call_once(flag, []() {
396 if (
auto env =
gSystem->
Getenv(
"ROOT_RNTUPLE_CLUSTERBUNCHSIZE"); env !=
nullptr && strlen(env) > 0) {
397 std::string envStr{env};
398 auto envNum{std::stoul(envStr)};
399 envNum = envNum == 0 ? 1 : envNum;
406std::unique_ptr<ROOT::Experimental::Internal::RPageSource>
407CreatePageSource(std::string_view ntupleName, std::string_view fileName)
414 :
RNTupleDS(CreatePageSource(ntupleName, fileName))
419 :
RNTupleDS(
ROOT::Experimental::Internal::RPageSourceFile::CreateFromAnchor(*ntuple))
424 :
RNTupleDS(CreatePageSource(ntupleName, fileNames[0]))
436std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
447 for (
const auto &s : *field) {
451 auto reader = std::make_unique<Internal::RNTupleColumnReader>(
this, field);
468 if (nRemainingFiles == 0)
472 if (nRemainingFiles >=
fNSlots) {
486 auto nEntries = range.
fSource->GetNEntries();
499 unsigned int nSlotsPerFile =
fNSlots / nRemainingFiles;
501 std::unique_ptr<Internal::RPageSource> source;
512 auto nEntries = source->GetNEntries();
517 if (i == (nRemainingFiles - 1))
520 std::vector<std::pair<ULong64_t, ULong64_t>> rangesByCluster;
522 auto descriptorGuard = source->GetSharedDescriptorGuard();
523 auto clusterId = descriptorGuard->FindClusterId(0, 0);
525 const auto &clusterDesc = descriptorGuard->GetClusterDescriptor(clusterId);
526 rangesByCluster.emplace_back(std::make_pair<ULong64_t, ULong64_t>(
527 clusterDesc.GetFirstEntryIndex(), clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries()));
528 clusterId = descriptorGuard->FindNextClusterId(clusterId);
531 const unsigned int nRangesByCluster = rangesByCluster.size();
534 const auto nClustersPerSlot = nRangesByCluster / nSlotsPerFile;
535 const auto remainder = nRangesByCluster % nSlotsPerFile;
536 std::size_t iRange = 0;
537 unsigned int iSlot = 0;
538 const unsigned int N = std::min(nSlotsPerFile, nRangesByCluster);
539 for (; iSlot <
N; ++iSlot) {
540 auto start = rangesByCluster[iRange].first;
541 iRange += nClustersPerSlot +
static_cast<int>(iSlot < remainder);
543 auto end = rangesByCluster[iRange - 1].second;
547 if (iSlot ==
N - 1) {
548 range.
fSource = std::move(source);
550 range.
fSource = source->Clone();
553 range.
fSource->SetEntryRange({start, end - start});
563 std::vector<std::pair<ULong64_t, ULong64_t>> ranges;
576 r->Disconnect(
true );
597 nEntriesPerSource = 0;
601 nEntriesPerSource += end - start;
604 ranges.emplace_back(start, end);
634 r->Disconnect(
true );
664 for (
unsigned int i = 0; i <
fNSlots; ++i) {
666 r->Disconnect(
false );
689 return ROOT::RDataFrame(std::make_unique<ROOT::Experimental::RNTupleDS>(ntupleName, fileName));
695 return ROOT::RDataFrame(std::make_unique<ROOT::Experimental::RNTupleDS>(ntupleName, fileNames));
700 return ROOT::RDataFrame(std::make_unique<ROOT::Experimental::RNTupleDS>(ntuple));
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
unsigned long long ULong64_t
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
R__EXTERN TSystem * gSystem
An artificial field that provides the size of a fixed-size array.
~RArraySizeField() final=default
RArraySizeField(std::size_t arrayLength)
RArraySizeField & operator=(RArraySizeField &&other)=default
std::unique_ptr< ROOT::Experimental::RFieldBase > CloneImpl(std::string_view) const final
Called by Clone(), which additionally copies the on-disk ID.
void ReadGlobalImpl(NTupleSize_t, void *to) final
void ConstructValue(void *where) const final
Constructs value in a given location of size at least GetValueSize(). Called by the base class' Creat...
RArraySizeField(RArraySizeField &&other)=default
std::size_t GetValueSize() const final
The number of bytes taken by a value of the appropriate type.
RArraySizeField(const RArraySizeField &other)=delete
void ReadInClusterImpl(RClusterIndex, void *to) final
std::size_t GetAlignment() const final
As a rule of thumb, the alignment is equal to the size of the type.
void GenerateColumnsImpl(const ROOT::Experimental::RNTupleDescriptor &) final
Implementations in derived classes should create the backing columns corresponsing to the field type ...
RArraySizeField & operator=(const RArraySizeField &other)=delete
void GenerateColumnsImpl() final
Implementations in derived classes should create the backing columns corresponsing to the field type ...
void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
Every RDF column is represented by exactly one RNTuple field.
std::unique_ptr< RFieldBase > fField
The field backing the RDF column.
Long64_t fEntryOffset
For chains, the logical entry and the physical entry in any particular file can be different.
std::unique_ptr< RFieldBase::RValue > fValue
The memory location used to read from fField.
RNTupleColumnReader(RNTupleDS *ds, RFieldBase *protoField)
Long64_t fLastEntry
Last entry number that was read.
RNTupleDS * fDataSource
The data source that owns this column reader.
void Connect(RPageSource &source, Long64_t entryOffset)
Connect the field and its subfields to the page source.
void Disconnect(bool keepValue)
void * GetImpl(Long64_t entry) final
std::shared_ptr< void > fValuePtr
Used to reuse the object created by fValue when reconnecting sources.
RFieldBase * fProtoField
The prototype field from which fField is cloned.
~RNTupleColumnReader()=default
Abstract interface to read data from an ntuple.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
const RSharedDescriptorGuard GetSharedDescriptorGuard() const
Takes the read lock for the descriptor.
An artificial field that transforms an RNTuple column that contains the offset of collections into co...
std::unique_ptr< ROOT::Experimental::RFieldBase > CloneImpl(std::string_view) const final
Called by Clone(), which additionally copies the on-disk ID.
void ReadInClusterImpl(ROOT::Experimental::RClusterIndex clusterIndex, void *to) final
Get the number of elements of the collection identified by clusterIndex.
RRDFCardinalityField(RRDFCardinalityField &&other)=default
void GenerateColumnsImpl() final
Implementations in derived classes should create the backing columns corresponsing to the field type ...
const RColumnRepresentations & GetColumnRepresentations() const final
Implementations in derived classes should return a static RColumnRepresentations object.
static std::string TypeName()
RRDFCardinalityField & operator=(RRDFCardinalityField &&other)=default
size_t GetValueSize() const final
The number of bytes taken by a value of the appropriate type.
size_t GetAlignment() const final
As a rule of thumb, the alignment is equal to the size of the type.
void ConstructValue(void *where) const final
Constructs value in a given location of size at least GetValueSize(). Called by the base class' Creat...
void GenerateColumnsImpl(const RNTupleDescriptor &desc) final
Implementations in derived classes should create the backing columns corresponsing to the field type ...
~RRDFCardinalityField()=default
void ReadGlobalImpl(ROOT::Experimental::NTupleSize_t globalIndex, void *to) final
Get the number of elements of the collection identified by globalIndex.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Holds the static meta-data of an RNTuple column.
Some fields have multiple possible column representations, e.g.
A field translates read and write calls from/to underlying columns to/from tree values.
std::string GetFieldName() const
const ColumnRepresentation_t & EnsureCompatibleColumnTypes(const RNTupleDescriptor &desc) const
Returns the on-disk column types found in the provided descriptor for fOnDiskId.
std::vector< std::unique_ptr< Internal::RColumn > > fColumns
The columns are connected either to a sink or to a source (not to both); they are owned by the field.
RConstSchemaIterator cbegin() const
std::unique_ptr< RFieldBase > Clone(std::string_view newName) const
Copies the field and its sub fields using a possibly new name and a new, unconnected set of columns.
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &canonicalType, const std::string &typeAlias, bool fContinueOnError=false)
Factory method to resurrect a field from the stored on-disk type information.
Internal::RColumn * fPrincipalColumn
Points into fColumns.
DescriptorId_t GetOnDiskId() const
std::uint64_t GetNRepetitions() const
The RDataSource implementation for RNTuple.
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > GetColumnReaders(unsigned int, std::string_view, const std::type_info &) final
If the other GetColumnReaders overload returns an empty vector, this overload will be called instead.
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
void Initialize() final
Convenience method called before starting an event-loop.
void AddField(const RNTupleDescriptor &desc, std::string_view colName, DescriptorId_t fieldId, std::vector< RFieldInfo > fieldInfos)
Provides the RDF column "colName" given the field identified by fieldID.
std::size_t fNextFileIndex
Index into fFileNames to the next file to process.
std::vector< std::string > fColumnNames
std::unordered_map< ULong64_t, std::size_t > fFirstEntry2RangeIdx
Maps the first entries from the ranges of the last GetEntryRanges() call to their corresponding index...
void Finalize() final
Convenience method called after concluding an event-loop.
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
void InitSlot(unsigned int slot, ULong64_t firstEntry) final
Convenience method called at the start of the data processing associated to a slot.
std::unordered_map< ROOT::Experimental::DescriptorId_t, std::string > fFieldId2QualifiedName
Connects the IDs of active proto fields and their subfields to their fully qualified name (a....
void FinalizeSlot(unsigned int slot) final
Convenience method called at the end of the data processing associated to a slot.
std::vector< std::vector< Internal::RNTupleColumnReader * > > fActiveColumnReaders
List of column readers returned by GetColumnReaders() organized by slot.
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
std::unique_ptr< Internal::RPageSource > fPrincipalSource
The first source is used to extract the schema and build the prototype fields.
std::vector< REntryRangeDS > fCurrentRanges
Basis for the ranges returned by the last GetEntryRanges() call.
std::unique_ptr< RNTupleDescriptor > fPrincipalDescriptor
A clone of the first pages source's descriptor.
std::vector< REntryRangeDS > fNextRanges
Basis for the ranges populated by the PrepareNextRanges() call.
std::vector< std::unique_ptr< ROOT::Experimental::RFieldBase > > fProtoFields
We prepare a prototype field for every column.
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
void PrepareNextRanges()
Populates fNextRanges with the next set of entry ranges.
std::vector< std::string > fFileNames
std::string fNTupleName
The data source may be constructed with an ntuple name and a list of files.
std::vector< std::string > fColumnTypes
RNTupleDS(std::unique_ptr< ROOT::Experimental::Internal::RPageSource > pageSource)
ULong64_t fSeenEntries
The number of entries so far returned by GetEntryRanges()
The on-storage meta-data of an ntuple.
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
Common user-tunable settings for reading ntuples.
void SetClusterBunchSize(unsigned int val)
Representation of an RNTuple data set in a ROOT file.
std::vector< void * > Record_t
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
virtual const char * Getenv(const char *env)
Get environment variable.
void CallConnectPageSourceOnField(RFieldBase &, RPageSource &)
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
RDataFrame FromRNTuple(std::string_view ntupleName, std::string_view fileName)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Wrap the integer in a struct in order to avoid template specialization clash with std::uint64_t.
The PrepareNextRanges() method populates the fNextRanges list with REntryRangeDS records.
std::unique_ptr< ROOT::Experimental::Internal::RPageSource > fSource
ULong64_t fLastEntry
End entry index in fSource, e.g. the number of entries in the range is fLastEntry - fFirstEntry.
ULong64_t fFirstEntry
First entry index in fSource.