61   std::unique_ptr<ROOT::RFieldBase> 
CloneImpl(std::string_view ) 
const final 
   63      return std::make_unique<RRDFCardinalityField>();
 
 
  101      *
static_cast<std::size_t *
>(to) = 
size;
 
 
  110      *
static_cast<std::size_t *
>(to) = 
size;
 
 
 
  124   std::unique_ptr<ROOT::RFieldBase> 
CloneImpl(std::string_view) 
const final 
 
 
  167   std::unique_ptr<RFieldBase::RValue> 
fValue; 
 
  207         auto onDiskType = 
source.GetSharedDescriptorGuard()->GetFieldDescriptor(
fField->GetOnDiskId()).GetTypeName();
 
  208         std::string 
msg = 
"RNTupleDS: invalid type \"" + 
fField->GetTypeName() + 
"\" for column \"" +
 
  211         throw std::runtime_error(
msg);
 
  221         fValue = std::make_unique<RFieldBase::RValue>(
fField->CreateValue());
 
 
  241      return fValue->GetPtr<
void>().get();
 
 
 
  297                           fieldDesc.GetTypeName().substr(0, 12) == 
"std::vector<" || 
fieldDesc.GetTypeName() == 
"");
 
  327      f.SetOnDiskId(desc.
FindFieldId(
f.GetFieldName(), 
f.GetParent()->GetOnDiskId()));
 
  333      if (
info.fNRepetitions > 0) {
 
  336         cardinalityField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField>();
 
 
  391   fPrincipalDescriptor = 
pageSource->GetSharedDescriptorGuard()->Clone();
 
  392   fStagingArea.emplace_back(std::move(
pageSource));
 
  394   AddField(fPrincipalDescriptor, 
"", fPrincipalDescriptor.GetFieldZeroId(),
 
  395            std::vector<ROOT::RDF::RNTupleDS::RFieldInfo>());
 
 
  406   static std::once_flag 
flag;
 
  407   std::call_once(
flag, []() {
 
  418std::unique_ptr<ROOT::Internal::RPageSource> CreatePageSource(std::string_view 
ntupleName, std::string_view fileName)
 
  427   fFileNames = std::vector<std::string>{std::string{fileName}};
 
 
  439                                const std::pair<ULong64_t, ULong64_t> &
range)
 
 
  456      std::distance(fColumnNames.begin(), std::find(fColumnNames.begin(), fColumnNames.end(), 
fieldName));
 
  461      return fProtoFields[
index].get();
 
  476                                               return fld->GetTypeName() == requestedType;
 
  485         throw std::runtime_error(
"RNTupleDS: Could not create field with type \"" + 
requestedType +
 
  486                                  "\" for column \"" + std::string(
fieldName) + 
"\"");
 
  496   return fProtoFields[
index].get();
 
 
  499std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
 
  507   fFieldId2QualifiedName[
field->GetOnDiskId()] = fPrincipalDescriptor.GetQualifiedFieldName(
field->GetOnDiskId());
 
  508   for (
const auto &s : *
field) {
 
  509      fFieldId2QualifiedName[s.GetOnDiskId()] = fPrincipalDescriptor.GetQualifiedFieldName(s.GetOnDiskId());
 
  512   auto reader = std::make_unique<ROOT::Internal::RDF::RNTupleColumnReader>(
this, 
field);
 
  513   fActiveColumnReaders[
slot].emplace_back(
reader.get());
 
 
  521      std::unique_lock lock(fMutexStaging);
 
  522      fCvStaging.wait(lock, [
this] { 
return fIsReadyForStaging || fStagingThreadShouldTerminate; });
 
  523      if (fStagingThreadShouldTerminate)
 
  528      fHasNextSources = 
true;
 
  529      fIsReadyForStaging = 
false;
 
  532      fCvStaging.notify_one();
 
 
  538   const auto nFiles = fFileNames.empty() ? 1 : fFileNames.size();
 
  540   for (
auto i = fNextFileIndex; (i < 
nFiles) && ((i - fNextFileIndex) < fNSlots); ++i) {
 
  542      if (fStagingThreadShouldTerminate)
 
  545      if (fStagingArea[i]) {
 
  549         fStagingArea[i] = CreatePageSource(fNTupleName, fFileNames[i]);
 
  550         fStagingArea[i]->LoadStructure();
 
 
  557   assert(fNextRanges.empty());
 
  559   auto nFiles = fFileNames.empty() ? 1 : fFileNames.size();
 
  568      while ((fNextRanges.size() < fNSlots) && (fNextFileIndex < 
nFiles)) {
 
  571         std::swap(fStagingArea[fNextFileIndex], 
range.fSource);
 
  573         if (!
range.fSource) {
 
  576            range.fSource = CreatePageSource(fNTupleName, fFileNames[fNextFileIndex]);
 
  578         range.fFileName = fFileNames[fNextFileIndex];
 
  579         range.fSource->Attach();
 
  586         fNextRanges.emplace_back(std::move(
range));
 
  595   for (std::size_t i = 0; (fNextRanges.size() < fNSlots) && (fNextFileIndex < 
nFiles); ++i) {
 
  596      std::unique_ptr<ROOT::Internal::RPageSource> 
source;
 
  599      std::swap(fStagingArea[fNextFileIndex], 
source);
 
  602         source = CreatePageSource(fNTupleName, fFileNames[fNextFileIndex]);
 
  627      unsigned int iSlot = 0;
 
  643         range.fSource->SetEntryRange({start, end - start});
 
  644         range.fFirstEntry = start;
 
  645         range.fLastEntry = end;
 
  646         fNextRanges.emplace_back(std::move(
range));
 
 
  653   std::vector<std::pair<ULong64_t, ULong64_t>> ranges;
 
  662      for (
auto r : fActiveColumnReaders[0]) {
 
  663         r->Disconnect(
true );
 
  669   if (fCurrentRanges.empty() || fSeenEntriesNoGlobalRange > 0) {
 
  673         std::unique_lock lock(fMutexStaging);
 
  674         fCvStaging.wait(lock, [
this] { 
return fHasNextSources; });
 
  677      if (fNextRanges.empty()) {
 
  682      assert(fNextRanges.size() <= fNSlots);
 
  684      fCurrentRanges.clear();
 
  685      std::swap(fCurrentRanges, fNextRanges);
 
  690      std::lock_guard 
_(fMutexStaging);
 
  691      fIsReadyForStaging = 
true;
 
  692      fHasNextSources = 
false;
 
  694   fCvStaging.notify_one();
 
  701   fFirstEntry2RangeIdx.clear();
 
  702   fOriginalRanges.clear();
 
  706   for (std::size_t i = 0; i < fCurrentRanges.size(); ++i) {
 
  710      if (fCurrentRanges[i].fFirstEntry == 0) {
 
  716      auto start = fCurrentRanges[i].fFirstEntry + fSeenEntriesNoGlobalRange;
 
  717      auto end = fCurrentRanges[i].fLastEntry + fSeenEntriesNoGlobalRange;
 
  721      if (fGlobalEntryRange.has_value()) {
 
  739         if (fGlobalEntryRange->first >= start && fGlobalEntryRange->second <= end) {
 
  740            fOriginalRanges.emplace_back(start, end);
 
  741            fFirstEntry2RangeIdx[fGlobalEntryRange->first] = i;
 
  742            ranges.emplace_back(fGlobalEntryRange->first, fGlobalEntryRange->second);
 
  747         else if (fGlobalEntryRange->second > end && fGlobalEntryRange->first < end) {
 
  748            fOriginalRanges.emplace_back(start, end);
 
  749            fFirstEntry2RangeIdx[fGlobalEntryRange->first] = i;
 
  750            ranges.emplace_back(fGlobalEntryRange->first, end);
 
  751            std::optional<std::pair<ULong64_t, ULong64_t>> 
newvalues({end, fGlobalEntryRange->second});
 
  755         else if (fGlobalEntryRange->second < start) {
 
  760         else if (fGlobalEntryRange->first >= end) {
 
  761            fOriginalRanges.emplace_back(start, end);
 
  762            fFirstEntry2RangeIdx[start] = i;
 
  763            ranges.emplace_back(start, start);
 
  768         fFirstEntry2RangeIdx[start] = i;
 
  769         fOriginalRanges.emplace_back(start, end);
 
  770         ranges.emplace_back(start, end);
 
  776   if ((fNSlots == 1) && (fCurrentRanges[0].fSource)) {
 
  777      for (
auto r : fActiveColumnReaders[0]) {
 
  778         r->Connect(*fCurrentRanges[0].fSource, fOriginalRanges[0].first);
 
 
  789      fSlotsToRangeIdxs[0] = 0;
 
  800   fSlotsToRangeIdxs[
slot * ROOT::Internal::RDF::CacheLineStep<std::size_t>()] = 
idxRange;
 
  802   for (
auto r : fActiveColumnReaders[
slot]) {
 
  803      r->Connect(*fCurrentRanges[
idxRange].fSource,
 
 
  813   for (
auto r : fActiveColumnReaders[
slot]) {
 
  814      r->Disconnect(
true );
 
 
  823      auto msg = std::string(
"RNTupleDS: There is no column with name \"") + std::string(
colName) + 
"\"";
 
  824      throw std::runtime_error(
msg);
 
  828   return fColumnTypes[
index];
 
 
  833   return std::find(fColumnNames.begin(), fColumnNames.end(), 
colName) != fColumnNames.end();
 
 
  838   fSeenEntriesNoGlobalRange = 0;
 
  840   fIsReadyForStaging = fHasNextSources = fStagingThreadShouldTerminate = 
false;
 
  841   fThreadStaging = std::thread(&RNTupleDS::ExecStaging, 
this);
 
  842   assert(fNextRanges.empty());
 
  844   if (fCurrentRanges.empty() || (fFileNames.size() > fNSlots)) {
 
  847         std::lock_guard 
_(fMutexStaging);
 
  848         fIsReadyForStaging = 
true;
 
  850      fCvStaging.notify_one();
 
  854      fNextFileIndex = std::max(fFileNames.size(), std::size_t(1));
 
 
  860   for (
unsigned int i = 0; i < fNSlots; ++i) {
 
  861      for (
auto r : fActiveColumnReaders[i]) {
 
  862         r->Disconnect(
false );
 
  866      std::lock_guard 
_(fMutexStaging);
 
  867      fStagingThreadShouldTerminate = 
true;
 
  869   fCvStaging.notify_one();
 
  870   fThreadStaging.join();
 
  873   if (fFileNames.size() > fNSlots) {
 
  874      fCurrentRanges.clear();
 
  876      fStagingArea.clear();
 
  877      fStagingArea.resize(fFileNames.size());
 
 
  886   fActiveColumnReaders.resize(fNSlots);
 
  887   fSlotsToRangeIdxs.resize(fNSlots * ROOT::Internal::RDF::CacheLineStep<std::size_t>());
 
 
  901   unsigned int slot, 
const std::unordered_map<std::string, ROOT::RDF::Experimental::RSample *> &
sampleMap)
 const 
  908   const auto &
rangeIdx = fSlotsToRangeIdxs.at(
slot * ROOT::Internal::RDF::CacheLineStep<std::size_t>());
 
  911   if (!fCurrentRanges[
rangeIdx].fSource)
 
  923      throw std::runtime_error(
"Full sample identifier '" + 
ntupleID + 
"' cannot be found in the available samples.");
 
 
  932                                                  const std::pair<ULong64_t, ULong64_t> &
range)
 
 
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
 
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
 
long long Long64_t
Portable signed long integer 8 bytes.
 
unsigned long long ULong64_t
Portable unsigned long integer 8 bytes.
 
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
 
R__EXTERN TSystem * gSystem
 
void GetCollectionInfo(const ROOT::NTupleSize_t globalIndex, RNTupleLocalIndex *collectionStart, ROOT::NTupleSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
 
An artificial field that provides the size of a fixed-size array.
 
void GenerateColumns(const ROOT::RNTupleDescriptor &) final
Implementations in derived classes should create the backing columns corresponding to the field type ...
 
std::unique_ptr< ROOT::RFieldBase > CloneImpl(std::string_view) const final
Called by Clone(), which additionally copies the on-disk ID.
 
void ReadGlobalImpl(ROOT::NTupleSize_t, void *to) final
 
RArraySizeField(const RArraySizeField &other)=delete
 
RArraySizeField(RArraySizeField &&other)=default
 
RArraySizeField & operator=(RArraySizeField &&other)=default
 
void ReadInClusterImpl(RNTupleLocalIndex, void *to) final
 
void ReconcileOnDiskField(const RNTupleDescriptor &) final
For non-artificial fields, check compatibility of the in-memory field and the on-disk field.
 
std::size_t GetValueSize() const final
The number of bytes taken by a value of the appropriate type.
 
RArraySizeField(std::size_t arrayLength)
 
void ConstructValue(void *where) const final
Constructs value in a given location of size at least GetValueSize(). Called by the base class' Creat...
 
~RArraySizeField() final=default
 
RArraySizeField & operator=(const RArraySizeField &other)=delete
 
std::size_t GetAlignment() const final
As a rule of thumb, the alignment is equal to the size of the type.
 
void GenerateColumns() final
Implementations in derived classes should create the backing columns corresponding to the field type ...
 
Every RDF column is represented by exactly one RNTuple field.
 
void * GetImpl(Long64_t entry) final
 
void Connect(RPageSource &source, Long64_t entryOffset)
Connect the field and its subfields to the page source.
 
RNTupleColumnReader(RNTupleDS *ds, RFieldBase *protoField)
 
std::unique_ptr< RFieldBase::RValue > fValue
The memory location used to read from fField.
 
std::unique_ptr< RFieldBase > fField
The field backing the RDF column.
 
Long64_t fEntryOffset
For chains, the logical entry and the physical entry in any particular file can be different.
 
std::shared_ptr< void > fValuePtr
Used to reuse the object created by fValue when reconnecting sources.
 
RNTupleDS * fDataSource
The data source that owns this column reader.
 
~RNTupleColumnReader() override=default
 
RFieldBase * fProtoField
The prototype field from which fField is cloned.
 
Long64_t fLastEntry
Last entry number that was read.
 
void Disconnect(bool keepValue)
 
An artificial field that transforms an RNTuple column that contains the offset of collections into co...
 
RRDFCardinalityField(RRDFCardinalityField &&other)=default
 
size_t GetAlignment() const final
As a rule of thumb, the alignment is equal to the size of the type.
 
void GenerateColumns() final
Implementations in derived classes should create the backing columns corresponding to the field type ...
 
void ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) final
Get the number of elements of the collection identified by globalIndex.
 
void ReconcileOnDiskField(const RNTupleDescriptor &) final
For non-artificial fields, check compatibility of the in-memory field and the on-disk field.
 
RRDFCardinalityField & operator=(RRDFCardinalityField &&other)=default
 
void GenerateColumns(const ROOT::RNTupleDescriptor &desc) final
Implementations in derived classes should create the backing columns corresponding to the field type ...
 
~RRDFCardinalityField() override=default
 
const RColumnRepresentations & GetColumnRepresentations() const final
Implementations in derived classes should return a static RColumnRepresentations object.
 
size_t GetValueSize() const final
The number of bytes taken by a value of the appropriate type.
 
void ReadInClusterImpl(ROOT::RNTupleLocalIndex localIndex, void *to) final
Get the number of elements of the collection identified by clusterIndex.
 
std::unique_ptr< ROOT::RFieldBase > CloneImpl(std::string_view) const final
Called by Clone(), which additionally copies the on-disk ID.
 
void ConstructValue(void *where) const final
Constructs value in a given location of size at least GetValueSize(). Called by the base class' Creat...
 
static void SetClusterBunchSize(RNTupleReadOptions &options, unsigned int val)
 
Abstract interface to read data from an ntuple.
 
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
 
std::vector< void * > Record_t
 
std::optional< std::pair< ULong64_t, ULong64_t > > fGlobalEntryRange
 
The RDataSource implementation for RNTuple.
 
void AddField(const ROOT::RNTupleDescriptor &desc, std::string_view colName, ROOT::DescriptorId_t fieldId, std::vector< RFieldInfo > fieldInfos, bool convertToRVec=true)
Provides the RDF column "colName" given the field identified by fieldID.
 
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
 
void ExecStaging()
The main function of the fThreadStaging background thread.
 
std::vector< std::unique_ptr< ROOT::Internal::RPageSource > > fStagingArea
The staging area is relevant for chains of files, i.e.
 
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > GetColumnReaders(unsigned int, std::string_view, const std::type_info &) final
If the other GetColumnReaders overload returns an empty vector, this overload will be called instead.
 
std::vector< std::unique_ptr< ROOT::RFieldBase > > fProtoFields
We prepare a prototype field for every column.
 
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
 
ROOT::RFieldBase * GetFieldWithTypeChecks(std::string_view fieldName, const std::type_info &tid)
 
std::vector< std::string > fFileNames
 
void InitSlot(unsigned int slot, ULong64_t firstEntry) final
Convenience method called at the start of the data processing associated to a slot.
 
RNTupleDS(std::unique_ptr< ROOT::Internal::RPageSource > pageSource)
 
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
 
std::unordered_map< ROOT::DescriptorId_t, std::string > fFieldId2QualifiedName
Connects the IDs of active proto fields and their subfields to their fully qualified name (a....
 
std::string fNTupleName
The data source may be constructed with an ntuple name and a list of files.
 
void PrepareNextRanges()
Populates fNextRanges with the next set of entry ranges.
 
void StageNextSources()
Starting from fNextFileIndex, opens the next fNSlots files.
 
void Finalize() final
Convenience method called after concluding an event-loop.
 
std::vector< std::string > fColumnTypes
 
void Initialize() final
Convenience method called before starting an event-loop.
 
std::vector< std::string > fColumnNames
 
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
 
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
 
void FinalizeSlot(unsigned int slot) final
Convenience method called at the end of the data processing associated to a slot.
 
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
 
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
 
Base class for all ROOT issued exceptions.
 
The list of column representations a field can have.
 
A field translates read and write calls from/to underlying columns to/from tree values.
 
ROOT::Internal::RColumn * fPrincipalColumn
All fields that have columns have a distinct main column.
 
RConstSchemaIterator cbegin() const
 
const std::string & GetFieldName() const
 
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
 
ROOT::DescriptorId_t GetOnDiskId() const
 
std::unique_ptr< RFieldBase > Clone(std::string_view newName) const
Copies the field and its subfields using a possibly new name and a new, unconnected set of columns.
 
The container field for an ntuple model, which itself has no physical representation.
 
The on-storage metadata of an RNTuple.
 
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
 
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
 
ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const
 
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
 
Common user-tunable settings for reading RNTuples.
 
const_iterator begin() const
 
const_iterator end() const
 
virtual const char * Getenv(const char *env)
Get environment variable.
 
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
 
ROOT::RDataFrame FromRNTuple(std::string_view ntupleName, const std::vector< std::string > &fileNames, const std::pair< ULong64_t, ULong64_t > &range)
Internal overload of the function that allows passing a range of entries.
 
std::pair< std::vector< ROOT::Internal::RNTupleClusterBoundaries >, ROOT::NTupleSize_t > GetClustersAndEntries(std::string_view ntupleName, std::string_view location)
Retrieves the cluster boundaries and the number of entries for the input RNTuple.
 
void SetAllowFieldSubstitutions(RFieldZero &fieldZero, bool val)
 
void CallConnectPageSourceOnField(RFieldBase &, ROOT::Internal::RPageSource &)
 
std::vector< ROOT::Internal::RNTupleClusterBoundaries > GetClusterBoundaries(const RNTupleDescriptor &desc)
Return the cluster boundaries for each cluster in this RNTuple.
 
std::string GetRenormalizedTypeName(const std::string &metaNormalizedName)
Given a type name normalized by ROOT meta, renormalize it for RNTuple. E.g., insert std::prefix.
 
RDataFrame FromRNTuple(std::string_view ntupleName, std::string_view fileName)
 
std::vector< std::string > ColumnNames_t
 
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
 
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
 
ENTupleStructure
The fields in the RNTuple data model tree can carry different structural information about the type s...
 
Tag to let data sources use the native data type when creating a column reader.
 
The PrepareNextRanges() method populates the fNextRanges list with REntryRangeDS records.