Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDS.cxx
Go to the documentation of this file.
1/// \file RNTupleDS.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Enrico Guiraud <enrico.guiraud@cern.ch>
5/// \date 2018-10-04
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
18#include <ROOT/RField.hxx>
19#include <ROOT/RFieldValue.hxx>
21#include <ROOT/RNTupleDS.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RPageStorage.hxx>
24#include <ROOT/RStringView.hxx>
25
26#include <TError.h>
27
28#include <string>
29#include <vector>
30#include <typeinfo>
31#include <utility>
32
33namespace ROOT {
34namespace Experimental {
35namespace Detail {
36class RNTupleColumnReader final : public ROOT::Detail::RDF::RColumnReaderBase {
40
41 std::unique_ptr<RFieldBase> fField;
42 RFieldValue fValue;
43 Long64_t fLastEntry; ///< Last entry number that was read
44
45 std::unique_ptr<RFieldBase> MakeField(const std::string &colName, RPageSource &source)
46 {
47 const auto &descriptor = source.GetDescriptor();
48 const auto fieldId = descriptor.FindFieldId(colName);
49 const auto &fieldDescriptor = descriptor.GetFieldDescriptor(fieldId);
50 const auto typeName = fieldDescriptor.GetTypeName();
51 auto fieldBasePtr = Detail::RFieldBase::Create(fieldDescriptor.GetFieldName(), typeName).Unwrap();
52 Detail::RFieldFuse::ConnectRecursively(fieldId, source, *fieldBasePtr);
53 return fieldBasePtr;
54 }
55
56public:
57 RNTupleColumnReader(const std::string &colName, RPageSource &source)
58 : fField(MakeField(colName, source)), fValue(fField->GenerateValue()), fLastEntry(-1)
59 {
60 }
61
62 void *GetImpl(Long64_t entry) final
63 {
64 if (entry != fLastEntry) {
65 fField->Read(entry, &fValue);
66 fLastEntry = entry;
67 }
68 return fValue.GetRawPtr();
69 }
70};
71} // namespace Detail
72
74{
75 for (const auto& f : desc.GetFieldRange(parentId)) {
76 fColumnNames.emplace_back(desc.GetQualifiedFieldName(f.GetId()));
77 fColumnTypes.emplace_back(f.GetTypeName());
78 if (f.GetStructure() == ENTupleStructure::kRecord)
79 AddFields(desc, f.GetId());
80 }
81}
82
83
84RNTupleDS::RNTupleDS(std::unique_ptr<Detail::RPageSource> pageSource)
85{
86 pageSource->Attach();
87 const auto &descriptor = pageSource->GetDescriptor();
88
89 AddFields(descriptor, descriptor.GetFieldZeroId());
90
91 fSources.emplace_back(std::move(pageSource));
92}
93
94RDF::RDataSource::Record_t RNTupleDS::GetColumnReadersImpl(std::string_view /* name */, const std::type_info & /* ti */)
95{
96 // This datasource uses the GetColumnReaders2 API instead (better name in the works)
97 return {};
98}
99
100std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
101RNTupleDS::GetColumnReaders(unsigned int slot, std::string_view name, const std::type_info & /*tid*/)
102{
103 return std::make_unique<ROOT::Experimental::Detail::RNTupleColumnReader>(std::string(name), *fSources[slot]);
104}
105
107{
108 return true;
109}
110
111std::vector<std::pair<ULong64_t, ULong64_t>> RNTupleDS::GetEntryRanges()
112{
113 // TODO(jblomer): use cluster boundaries for the entry ranges
114 std::vector<std::pair<ULong64_t, ULong64_t>> ranges;
115 if (fHasSeenAllRanges) return ranges;
116
117 auto nEntries = fSources[0]->GetNEntries();
118 const auto chunkSize = nEntries / fNSlots;
119 const auto reminder = 1U == fNSlots ? 0 : nEntries % fNSlots;
120 auto start = 0UL;
121 auto end = 0UL;
122 for (auto i : ROOT::TSeqU(fNSlots)) {
123 start = end;
124 end += chunkSize;
125 ranges.emplace_back(start, end);
126 (void)i;
127 }
128 ranges.back().second += reminder;
129 fHasSeenAllRanges = true;
130 return ranges;
131}
132
133
134std::string RNTupleDS::GetTypeName(std::string_view colName) const
135{
136 const auto index = std::distance(
137 fColumnNames.begin(), std::find(fColumnNames.begin(), fColumnNames.end(), colName));
138 return fColumnTypes[index];
139}
140
141
142bool RNTupleDS::HasColumn(std::string_view colName) const
143{
144 return std::find(fColumnNames.begin(), fColumnNames.end(), colName) !=
145 fColumnNames.end();
146}
147
148
150{
151 fHasSeenAllRanges = false;
152}
153
154
156{
157}
158
159
160void RNTupleDS::SetNSlots(unsigned int nSlots)
161{
162 R__ASSERT(fNSlots == 0);
163 R__ASSERT(nSlots > 0);
164 fNSlots = nSlots;
165
166 for (unsigned int i = 1; i < fNSlots; ++i) {
167 fSources.emplace_back(fSources[0]->Clone());
168 R__ASSERT(i == (fSources.size() - 1));
169 fSources[i]->Attach();
170 }
171}
172} // ns Experimental
173} // ns ROOT
174
175
176ROOT::RDataFrame ROOT::Experimental::MakeNTupleDataFrame(std::string_view ntupleName, std::string_view fileName)
177{
178 auto pageSource = ROOT::Experimental::Detail::RPageSource::Create(ntupleName, fileName);
179 ROOT::RDataFrame rdf(std::make_unique<RNTupleDS>(std::move(pageSource)));
180 return rdf;
181}
#define f(i)
Definition RSha256.hxx:104
long long Long64_t
Definition RtypesCore.h:73
unsigned long long ULong64_t
Definition RtypesCore.h:74
#define R__ASSERT(e)
Definition TError.h:120
char name[80]
Definition TGX11.cxx:110
typedef void((*Func_t)())
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName)
Factory method to resurrect a field from the stored on-disk type information.
Definition RField.cxx:156
static void ConnectRecursively(DescriptorId_t fieldId, RPageSource &pageSource, RFieldBase &field)
Connect the field columns and all sub field columns.
Definition RField.cxx:127
Abstract interface to read data from an ntuple.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
A field translates read and write calls from/to underlying columns to/from tree values.
Represents transient storage of simple or complex C++ values.
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > GetColumnReaders(unsigned int, std::string_view, const std::type_info &) final
If the other GetColumnReaders overload returns an empty vector, this overload will be called instead.
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
std::vector< std::unique_ptr< ROOT::Experimental::Detail::RPageSource > > fSources
Clones of the first source, one for each slot.
Definition RNTupleDS.hxx:44
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
Definition RNTupleDS.cxx:94
std::vector< std::string > fColumnNames
Definition RNTupleDS.hxx:46
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
RNTupleDS(std::unique_ptr< ROOT::Experimental::Detail::RPageSource > pageSource)
Definition RNTupleDS.cxx:84
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
void Finalise() final
Convenience method called after concluding an event-loop.
std::vector< std::string > fColumnTypes
Definition RNTupleDS.hxx:47
void AddFields(const RNTupleDescriptor &desc, DescriptorId_t parentId)
Definition RNTupleDS.cxx:73
void Initialise() final
Convenience method called before starting an event-loop.
The on-storage meta-data of an ntuple.
RFieldDescriptorRange GetFieldRange(const RFieldDescriptor &fieldDesc) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
std::vector< void * > Record_t
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
RDataFrame MakeNTupleDataFrame(std::string_view ntupleName, std::string_view fileName)
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
TSeq< unsigned int > TSeqU
Definition TSeq.hxx:195