Logo ROOT  
Reference Guide
No Matches
Go to the documentation of this file.
1/// \file RNTupleDS.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Enrico Guiraud <enrico.guiraud@cern.ch>
5/// \date 2018-10-04
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
10 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
17#ifndef ROOT_RNTupleDS
18#define ROOT_RNTupleDS
20#include <ROOT/RDataFrame.hxx>
21#include <ROOT/RDataSource.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RStringView.hxx>
25#include <cstdint>
26#include <memory>
27#include <string>
28#include <vector>
30namespace ROOT {
31namespace Experimental {
33class RNTuple;
34class RNTupleDescriptor;
36namespace Detail {
37class RFieldBase;
38class RFieldValue;
39class RPageSource;
40} // namespace Detail
42namespace Internal {
43class RNTupleColumnReader;
46class RNTupleDS final : public ROOT::RDF::RDataSource {
47 /// Clones of the first source, one for each slot
48 std::vector<std::unique_ptr<ROOT::Experimental::Detail::RPageSource>> fSources;
50 /// We prepare a column reader prototype for every column. If a column reader is actually requested
51 /// in GetColumnReaders(), we move a clone of the prototype into the hands of RDataFrame.
52 /// Only the clone connects to the backing page store and acquires I/O resources.
53 std::vector<std::unique_ptr<ROOT::Experimental::Internal::RNTupleColumnReader>> fColumnReaderPrototypes;
54 std::vector<std::string> fColumnNames;
55 std::vector<std::string> fColumnTypes;
56 std::vector<size_t> fActiveColumns;
58 unsigned fNSlots = 0;
59 bool fHasSeenAllRanges = false;
61 /// Provides the RDF column "colName" given the field identified by fieldID. For records and collections,
62 /// AddField recurses into the sub fields. The skeinIDs is the list of field IDs of the outer collections
63 /// of fieldId. For instance, if fieldId refers to an `std::vector<Jet>`, with
64 /// struct Jet {
65 /// float pt;
66 /// float eta;
67 /// };
68 /// AddField will recurse into Jet.pt and Jet.eta and provide the two inner fields as std::vector<float> each.
69 void AddField(const RNTupleDescriptor &desc,
70 std::string_view colName,
71 DescriptorId_t fieldId,
72 std::vector<DescriptorId_t> skeinIDs);
75 explicit RNTupleDS(std::unique_ptr<ROOT::Experimental::Detail::RPageSource> pageSource);
77 void SetNSlots(unsigned int nSlots) final;
78 const std::vector<std::string> &GetColumnNames() const final { return fColumnNames; }
79 bool HasColumn(std::string_view colName) const final;
80 std::string GetTypeName(std::string_view colName) const final;
81 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final;
82 std::string GetLabel() final { return "RNTupleDS"; }
84 bool SetEntry(unsigned int slot, ULong64_t entry) final;
86 void Initialize() final;
87 void Finalize() final;
89 std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
90 GetColumnReaders(unsigned int /*slot*/, std::string_view /*name*/, const std::type_info &) final;
93 Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final;
96} // ns Experimental
98namespace RDF {
99namespace Experimental {
100RDataFrame FromRNTuple(std::string_view ntupleName, std::string_view fileName);
101RDataFrame FromRNTuple(ROOT::Experimental::RNTuple *ntuple);
102} // namespace Experimental
103} // namespace RDF
105} // ns ROOT
unsigned long long ULong64_t
Definition RtypesCore.h:81
char name[80]
Definition TGX11.cxx:110
The RDataSource implementation for RNTuple.
Definition RNTupleDS.hxx:46
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > GetColumnReaders(unsigned int, std::string_view, const std::type_info &) final
If the other GetColumnReaders overload returns an empty vector, this overload will be called instead.
void AddField(const RNTupleDescriptor &desc, std::string_view colName, DescriptorId_t fieldId, std::vector< DescriptorId_t > skeinIDs)
Provides the RDF column "colName" given the field identified by fieldID.
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
Definition RNTupleDS.hxx:78
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
std::vector< std::unique_ptr< ROOT::Experimental::Internal::RNTupleColumnReader > > fColumnReaderPrototypes
We prepare a column reader prototype for every column.
Definition RNTupleDS.hxx:53
std::vector< std::unique_ptr< ROOT::Experimental::Detail::RPageSource > > fSources
Clones of the first source, one for each slot.
Definition RNTupleDS.hxx:48
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
void Initialize() final
Convenience method called before starting an event-loop.
std::vector< std::string > fColumnNames
Definition RNTupleDS.hxx:54
void Finalize() final
Convenience method called after concluding an event-loop.
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
std::vector< size_t > fActiveColumns
Definition RNTupleDS.hxx:56
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
std::vector< std::string > fColumnTypes
Definition RNTupleDS.hxx:55
std::string GetLabel() final
Return a string representation of the datasource type.
Definition RNTupleDS.hxx:82
The on-storage meta-data of an ntuple.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:512
Pure virtual base class for all column reader types.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::vector< void * > Record_t
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.