Logo ROOT  
Reference Guide
RNTupleDS.cxx
Go to the documentation of this file.
1/// \file RNTupleDS.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RNTuple.hxx>
17#include <ROOT/RNTupleDS.hxx>
18#include <ROOT/RStringView.hxx>
19
20#include <TError.h>
21
22#include <string>
23#include <vector>
24#include <typeinfo>
25#include <utility>
26
27namespace ROOT {
28namespace Experimental {
29
30ROOT::Experimental::RNTupleDS::RNTupleDS(std::unique_ptr<ROOT::Experimental::RNTupleReader> ntuple)
31{
32 fReaders.emplace_back(std::move(ntuple));
33 auto rootField = fReaders[0]->GetModel()->GetRootField();
34 for (auto &f : *rootField) {
35 if (f.GetParent() != rootField)
36 continue;
37 fColumnNames.push_back(f.GetName());
38 fColumnTypes.push_back(f.GetType());
39 }
40}
41
42const std::vector<std::string>& RNTupleDS::GetColumnNames() const
43{
44 return fColumnNames;
45}
46
47
49{
50 const auto index = std::distance(
51 fColumnNames.begin(), std::find(fColumnNames.begin(), fColumnNames.end(), name));
52 // TODO(jblomer): check expected type info like in, e.g., RRootDS.cxx
53 // There is a problem extracting the type info for std::int32_t and company though
54
55 std::vector<void*> ptrs;
56 for (unsigned i = 0; i < fNSlots; ++i)
57 ptrs.push_back(&fValuePtrs[i][index]);
58
59 return ptrs;
60}
61
62bool RNTupleDS::SetEntry(unsigned int slot, ULong64_t entryIndex)
63{
64 fReaders[slot]->LoadEntry(entryIndex, fEntries[slot].get());
65 return true;
66}
67
68std::vector<std::pair<ULong64_t, ULong64_t>> RNTupleDS::GetEntryRanges()
69{
70 // TODO(jblomer): use cluster boundaries for the entry ranges
71 std::vector<std::pair<ULong64_t, ULong64_t>> ranges;
72 if (fHasSeenAllRanges) return ranges;
73
74 auto nEntries = fReaders[0]->GetNEntries();
75 const auto chunkSize = nEntries / fNSlots;
76 const auto reminder = 1U == fNSlots ? 0 : nEntries % fNSlots;
77 auto start = 0UL;
78 auto end = 0UL;
79 for (auto i : ROOT::TSeqU(fNSlots)) {
80 start = end;
81 end += chunkSize;
82 ranges.emplace_back(start, end);
83 (void)i;
84 }
85 ranges.back().second += reminder;
86 fHasSeenAllRanges = true;
87 return ranges;
88}
89
90
91std::string RNTupleDS::GetTypeName(std::string_view colName) const
92{
93 const auto index = std::distance(
94 fColumnNames.begin(), std::find(fColumnNames.begin(), fColumnNames.end(), colName));
95 return fColumnTypes[index];
96}
97
98
100{
101 return std::find(fColumnNames.begin(), fColumnNames.end(), colName) !=
102 fColumnNames.end();
103}
104
105
107{
108 fHasSeenAllRanges = false;
109}
110
111
112void RNTupleDS::SetNSlots(unsigned int nSlots)
113{
114 R__ASSERT(fNSlots == 0);
115 R__ASSERT(nSlots > 0);
116 fNSlots = nSlots;
117
118 for (unsigned int i = 1; i < fNSlots; ++i) {
119 fReaders.emplace_back(fReaders[0]->Clone());
120 }
121
122 for (unsigned int i = 0; i < fNSlots; ++i) {
123 auto entry = fReaders[i]->GetModel()->CreateEntry();
124 fValuePtrs.emplace_back(std::vector<void*>());
125 for (unsigned j = 0; j < fColumnNames.size(); ++j) {
126 fValuePtrs[i].emplace_back(entry->GetValue(fColumnNames[j]).GetRawPtr());
127 }
128 fEntries.emplace_back(std::move(entry));
129 }
130}
131
132
134{
135 auto ntuple = RNTupleReader::Open(ntupleName, fileName);
136 ROOT::RDataFrame rdf(std::make_unique<RNTupleDS>(std::move(ntuple)));
137 return rdf;
138}
139
140} // ns Experimental
141} // ns ROOT
#define f(i)
Definition: RSha256.hxx:104
unsigned long long ULong64_t
Definition: RtypesCore.h:70
#define R__ASSERT(e)
Definition: TError.h:96
char name[80]
Definition: TGX11.cxx:109
typedef void((*Func_t)())
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
Definition: RNTupleDS.cxx:42
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
Definition: RNTupleDS.cxx:112
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Definition: RNTupleDS.cxx:62
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
Definition: RNTupleDS.cxx:48
std::vector< std::unique_ptr< ROOT::Experimental::RNTupleReader > > fReaders
Clones of the first reader, one for each slot.
Definition: RNTupleDS.hxx:37
std::vector< std::string > fColumnNames
Definition: RNTupleDS.hxx:43
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
Definition: RNTupleDS.cxx:91
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
Definition: RNTupleDS.cxx:68
RNTupleDS(std::unique_ptr< ROOT::Experimental::RNTupleReader > ntuple)
Definition: RNTupleDS.cxx:30
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
Definition: RNTupleDS.cxx:99
std::vector< std::string > fColumnTypes
Definition: RNTupleDS.hxx:44
std::vector< std::unique_ptr< ROOT::Experimental::REntry > > fEntries
Definition: RNTupleDS.hxx:38
void Initialise() final
Convenience method called before starting an event-loop.
Definition: RNTupleDS.cxx:106
std::vector< std::vector< void * > > fValuePtrs
The raw pointers wrapped by the RValue items of fEntries.
Definition: RNTupleDS.hxx:40
static std::unique_ptr< RNTupleReader > Open(std::unique_ptr< RNTupleModel > model, std::string_view ntupleName, std::string_view storage)
Definition: RNTuple.cxx:89
std::vector< void * > Record_t
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
Definition: RDataFrame.hxx:42
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
basic_string_view< char > string_view
RDataFrame MakeNTupleDataFrame(std::string_view ntupleName, std::string_view fileName)
Definition: RNTupleDS.cxx:133
VSD Structures.
Definition: StringConv.hxx:21