Logo ROOT  
Reference Guide
RNTupleDS.hxx
Go to the documentation of this file.
1/// \file RNTupleDS.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Enrico Guiraud <enrico.guiraud@cern.ch>
5/// \date 2018-10-04
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#ifndef ROOT_RNTupleDS
18#define ROOT_RNTupleDS
19
20#include <ROOT/RDataFrame.hxx>
21#include <ROOT/RDataSource.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RStringView.hxx>
24
25#include <cstdint>
26#include <memory>
27#include <string>
28#include <vector>
29
30namespace ROOT {
31namespace Experimental {
32
33class RNTupleDescriptor;
34
35namespace Detail {
36class RFieldBase;
37class RFieldValue;
38class RPageSource;
39} // namespace Detail
40
41namespace Internal {
42class RNTupleColumnReader;
43}
44
45class RNTupleDS final : public ROOT::RDF::RDataSource {
46 /// Clones of the first source, one for each slot
47 std::vector<std::unique_ptr<ROOT::Experimental::Detail::RPageSource>> fSources;
48
49 /// We prepare a column reader prototype for every column. If a column reader is actually requested
50 /// in GetColumnReaders(), we move a clone of the prototype into the hands of RDataFrame.
51 /// Only the clone connects to the backing page store and acquires I/O resources.
52 std::vector<std::unique_ptr<ROOT::Experimental::Internal::RNTupleColumnReader>> fColumnReaderPrototypes;
53 std::vector<std::string> fColumnNames;
54 std::vector<std::string> fColumnTypes;
55 std::vector<size_t> fActiveColumns;
56
57 unsigned fNSlots = 0;
58 bool fHasSeenAllRanges = false;
59
60 /// Provides the RDF column "colName" given the field identified by fieldID. For records and collections,
61 /// AddField recurses into the sub fields. The skeinIDs is the list of field IDs of the outer collections
62 /// of fieldId. For instance, if fieldId refers to an `std::vector<Jet>`, with
63 /// struct Jet {
64 /// float pt;
65 /// float eta;
66 /// };
67 /// AddField will recurse into Jet.pt and Jet.eta and provide the two inner fields as std::vector<float> each.
68 void AddField(const RNTupleDescriptor &desc,
69 std::string_view colName,
70 DescriptorId_t fieldId,
71 std::vector<DescriptorId_t> skeinIDs);
72
73public:
74 explicit RNTupleDS(std::unique_ptr<ROOT::Experimental::Detail::RPageSource> pageSource);
76 void SetNSlots(unsigned int nSlots) final;
77 const std::vector<std::string> &GetColumnNames() const final { return fColumnNames; }
78 bool HasColumn(std::string_view colName) const final;
79 std::string GetTypeName(std::string_view colName) const final;
80 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final;
81 std::string GetLabel() final { return "RNTupleDS"; }
82
83 bool SetEntry(unsigned int slot, ULong64_t entry) final;
84
85 void Initialise() final;
86 void Finalise() final;
87
88 std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
89 GetColumnReaders(unsigned int /*slot*/, std::string_view /*name*/, const std::type_info &) final;
90
91protected:
92 Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final;
93};
94
95RDataFrame MakeNTupleDataFrame(std::string_view ntupleName, std::string_view fileName);
96
97} // ns Experimental
98} // ns ROOT
99
100#endif
unsigned long long ULong64_t
Definition: RtypesCore.h:81
char name[80]
Definition: TGX11.cxx:110
A field translates read and write calls from/to underlying columns to/from tree values.
Definition: RField.hxx:58
Represents transient storage of simple or complex C++ values.
Definition: RFieldValue.hxx:28
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > GetColumnReaders(unsigned int, std::string_view, const std::type_info &) final
If the other GetColumnReaders overload returns an empty vector, this overload will be called instead.
Definition: RNTupleDS.cxx:260
void AddField(const RNTupleDescriptor &desc, std::string_view colName, DescriptorId_t fieldId, std::vector< DescriptorId_t > skeinIDs)
Provides the RDF column "colName" given the field identified by fieldID.
Definition: RNTupleDS.cxx:146
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
Definition: RNTupleDS.hxx:77
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
Definition: RNTupleDS.cxx:316
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Definition: RNTupleDS.cxx:270
std::vector< std::unique_ptr< ROOT::Experimental::Internal::RNTupleColumnReader > > fColumnReaderPrototypes
We prepare a column reader prototype for every column.
Definition: RNTupleDS.hxx:52
std::vector< std::unique_ptr< ROOT::Experimental::Detail::RPageSource > > fSources
Clones of the first source, one for each slot.
Definition: RNTupleDS.hxx:47
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
Definition: RNTupleDS.cxx:253
std::vector< std::string > fColumnNames
Definition: RNTupleDS.hxx:53
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
Definition: RNTupleDS.cxx:298
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
Definition: RNTupleDS.cxx:275
RNTupleDS(std::unique_ptr< ROOT::Experimental::Detail::RPageSource > pageSource)
Definition: RNTupleDS.cxx:244
std::vector< size_t > fActiveColumns
Definition: RNTupleDS.hxx:55
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
Definition: RNTupleDS.cxx:304
void Finalise() final
Convenience method called after concluding an event-loop.
Definition: RNTupleDS.cxx:314
std::vector< std::string > fColumnTypes
Definition: RNTupleDS.hxx:54
std::string GetLabel() final
Return a string representation of the datasource type.
Definition: RNTupleDS.hxx:81
void Initialise() final
Convenience method called before starting an event-loop.
Definition: RNTupleDS.cxx:309
The on-storage meta-data of an ntuple.
Pure virtual base class for all column reader types.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::vector< void * > Record_t
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTree,...
Definition: RDataFrame.hxx:40
basic_string_view< char > string_view
RDataFrame MakeNTupleDataFrame(std::string_view ntupleName, std::string_view fileName)
Definition: RNTupleDS.cxx:331
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...