Logo ROOT  
Reference Guide
RTreeColumnReader.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud CERN 09/2020
2
3/*************************************************************************
4 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_RTREECOLUMNREADER
12#define ROOT_RDF_RTREECOLUMNREADER
13
14#include "RColumnReaderBase.hxx"
15#include <ROOT/RVec.hxx>
16#include <Rtypes.h> // Long64_t, R__CLING_PTRCHECK
17#include <TTreeReader.h>
18#include <TTreeReaderValue.h>
19#include <TTreeReaderArray.h>
20
21#include <memory>
22#include <string>
23
24namespace ROOT {
25namespace Internal {
26namespace RDF {
27
28/// RTreeColumnReader specialization for TTree values read via TTreeReaderValues
29template <typename T>
30class R__CLING_PTRCHECK(off) RTreeColumnReader final : public ROOT::Detail::RDF::RColumnReaderBase {
31 std::unique_ptr<TTreeReaderValue<T>> fTreeValue;
32
33 void *GetImpl(Long64_t) final { return fTreeValue->Get(); }
34public:
35 /// Construct the RTreeColumnReader. Actual initialization is performed lazily by the Init method.
36 RTreeColumnReader(TTreeReader &r, const std::string &colName)
37 : fTreeValue(std::make_unique<TTreeReaderValue<T>>(r, colName.c_str()))
38 {
39 }
40
41 /// The dtor resets the TTreeReaderValue object.
42 //
43 // Otherwise a race condition is present in which a TTreeReader
44 // and its TTreeReader{Value,Array}s can be deleted concurrently:
45 // - Thread #1) a task ends and pushes back processing slot
46 // - Thread #2) a task starts and overwrites thread-local TTreeReaderValues
47 // - Thread #1) first task deletes TTreeReader
48 // See https://github.com/root-project/root/commit/26e8ace6e47de6794ac9ec770c3bbff9b7f2e945
49 ~RTreeColumnReader() { fTreeValue.reset(); }
50};
51
52/// RTreeColumnReader specialization for TTree values read via TTreeReaderArrays.
53///
54/// TTreeReaderArrays are used whenever the RDF column type is RVec<T>.
55template <typename T>
56class R__CLING_PTRCHECK(off) RTreeColumnReader<RVec<T>> final : public ROOT::Detail::RDF::RColumnReaderBase {
57 std::unique_ptr<TTreeReaderArray<T>> fTreeArray;
58
59 /// Enumerator for the memory layout of the branch
60 enum class EStorageType : char { kContiguous, kUnknown, kSparse };
61
62 /// We return a reference to this RVec to clients, to guarantee a stable address and contiguous memory layout.
64
65 /// Signal whether we ever checked that the branch we are reading with a TTreeReaderArray stores array elements
66 /// in contiguous memory.
68 Long64_t fLastEntry = -1;
69
70 /// Whether we already printed a warning about performing a copy of the TTreeReaderArray contents
71 bool fCopyWarningPrinted = false;
72
73 void *GetImpl(Long64_t entry) final
74 {
75 if (entry == fLastEntry)
76 return &fRVec; // we already pointed our fRVec to the right address
77
78 auto &readerArray = *fTreeArray;
79 // We only use TTreeReaderArrays to read columns that users flagged as type `RVec`, so we need to check
80 // that the branch stores the array as contiguous memory that we can actually wrap in an `RVec`.
81 // Currently we need the first entry to have been loaded to perform the check
82 // TODO Move check to constructor once ROOT-10823 is fixed and TTreeReaderArray itself exposes this information
83 const auto readerArraySize = readerArray.GetSize();
84 if (EStorageType::kUnknown == fStorageType && readerArraySize > 1) {
85 // We can decide since the array is long enough
86 fStorageType = EStorageType::kContiguous;
87 for (auto i = 0u; i < readerArraySize - 1; ++i) {
88 if ((char *)&readerArray[i + 1] - (char *)&readerArray[i] != sizeof(T)) {
89 fStorageType = EStorageType::kSparse;
90 break;
91 }
92 }
93 }
94
95 if (EStorageType::kContiguous == fStorageType ||
96 (EStorageType::kUnknown == fStorageType && readerArray.GetSize() < 2)) {
97 if (readerArraySize > 0) {
98 // trigger loading of the contents of the TTreeReaderArray
99 // the address of the first element in the reader array is not necessarily equal to
100 // the address returned by the GetAddress method
101 auto readerArrayAddr = &readerArray.At(0);
102 RVec<T> rvec(readerArrayAddr, readerArraySize);
103 swap(fRVec, rvec);
104 } else {
105 RVec<T> emptyVec{};
106 swap(fRVec, emptyVec);
107 }
108 } else {
109 // The storage is not contiguous or we don't know yet: we cannot but copy into the rvec
110#ifndef NDEBUG
111 if (!fCopyWarningPrinted) {
112 Warning("RTreeColumnReader::Get",
113 "Branch %s hangs from a non-split branch. A copy is being performed in order "
114 "to properly read the content.",
115 readerArray.GetBranchName());
116 fCopyWarningPrinted = true;
117 }
118#else
119 (void)fCopyWarningPrinted;
120#endif
121 if (readerArraySize > 0) {
122 RVec<T> rvec(readerArray.begin(), readerArray.end());
123 swap(fRVec, rvec);
124 } else {
125 RVec<T> emptyVec{};
126 swap(fRVec, emptyVec);
127 }
128 }
129 fLastEntry = entry;
130 return &fRVec;
131 }
132
133public:
134 RTreeColumnReader(TTreeReader &r, const std::string &colName)
135 : fTreeArray(std::make_unique<TTreeReaderArray<T>>(r, colName.c_str()))
136 {
137 }
138
139 /// See the other class template specializations for an explanation.
140 ~RTreeColumnReader() { fTreeArray.reset(); }
141};
142
143/// RTreeColumnReader specialization for arrays of boolean values read via TTreeReaderArrays.
144///
145/// TTreeReaderArray<bool> is used whenever the RDF column type is RVec<bool>.
146template <>
147class R__CLING_PTRCHECK(off) RTreeColumnReader<RVec<bool>> final : public ROOT::Detail::RDF::RColumnReaderBase {
148
149 std::unique_ptr<TTreeReaderArray<bool>> fTreeArray;
150
151 /// We return a reference to this RVec to clients, to guarantee a stable address and contiguous memory layout
153
154 // We always copy the contents of TTreeReaderArray<bool> into an RVec<bool> (never take a view into the memory
155 // buffer) because the underlying memory buffer might be the one of a std::vector<bool>, which is not a contiguous
156 // slab of bool values.
157 // Note that this also penalizes the case in which the column type is actually bool[], but the possible performance
158 // gains in this edge case is probably not worth the extra complication required to differentiate the two cases.
159 void *GetImpl(Long64_t) final
160 {
161 auto &readerArray = *fTreeArray;
162 const auto readerArraySize = readerArray.GetSize();
163 if (readerArraySize > 0) {
164 // always perform a copy
165 RVec<bool> rvec(readerArray.begin(), readerArray.end());
166 swap(fRVec, rvec);
167 } else {
168 RVec<bool> emptyVec{};
169 swap(fRVec, emptyVec);
170 }
171 return &fRVec;
172 }
173
174public:
175 RTreeColumnReader(TTreeReader &r, const std::string &colName)
176 : fTreeArray(std::make_unique<TTreeReaderArray<bool>>(r, colName.c_str()))
177 {
178 }
179
180 /// See the other class template specializations for an explanation.
181 ~RTreeColumnReader() { fTreeArray.reset(); }
182};
183
184} // namespace RDF
185} // namespace Internal
186} // namespace ROOT
187
188#endif
long long Long64_t
Definition: RtypesCore.h:80
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition: TError.cxx:231
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
@ kUnknown
Definition: TStructNode.h:19
std::unique_ptr< TTreeReaderArray< T > > fTreeArray
RTreeColumnReader(TTreeReader &r, const std::string &colName)
EStorageType
Enumerator for the memory layout of the branch.
~RTreeColumnReader()
See the other class template specializations for an explanation.
RVec< T > fRVec
We return a reference to this RVec to clients, to guarantee a stable address and contiguous memory la...
RVec< bool > fRVec
We return a reference to this RVec to clients, to guarantee a stable address and contiguous memory la...
~RTreeColumnReader()
See the other class template specializations for an explanation.
std::unique_ptr< TTreeReaderArray< bool > > fTreeArray
RTreeColumnReader(TTreeReader &r, const std::string &colName)
RTreeColumnReader specialization for TTree values read via TTreeReaderValues.
RTreeColumnReader(TTreeReader &r, const std::string &colName)
Construct the RTreeColumnReader. Actual initialization is performed lazily by the Init method.
~RTreeColumnReader()
The dtor resets the TTreeReaderValue object.
std::unique_ptr< TTreeReaderValue< T > > fTreeValue
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition: RVec.hxx:1439
An interface for reading collections stored in ROOT columnar datasets.
An interface for reading values stored in ROOT columnar datasets.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition: TTreeReader.h:44
void swap(RDirectoryEntry &e1, RDirectoryEntry &e2) noexcept
void(off) SmallVectorTemplateBase< T
double T(double x)
Definition: ChebyshevPol.h:34
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.