Logo ROOT   6.18/05
Reference Guide
RColumnValue.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 09/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RCOLUMNVALUE
12#define ROOT_RCOLUMNVALUE
13
15#include <ROOT/RDF/Utils.hxx> // IsRVec_t, TypeID2TypeName
17#include <ROOT/RMakeUnique.hxx>
18#include <ROOT/RVec.hxx>
19#include <ROOT/TypeTraits.hxx> // TakeFirstParameter_t
20#include <RtypesCore.h>
21#include <TTreeReader.h>
22#include <TTreeReaderValue.h>
23#include <TTreeReaderArray.h>
24
25#include <cstring> // strcmp
26#include <initializer_list>
27#include <limits>
28#include <memory>
29#include <stdexcept>
30#include <string>
31#include <tuple>
32#include <type_traits>
33#include <vector>
34
35namespace ROOT {
36namespace Internal {
37namespace RDF {
38using namespace ROOT::VecOps;
39
40/**
41\class ROOT::Internal::RDF::RColumnValue
42\ingroup dataframe
43\brief Helper class that updates and returns TTree branches as well as RDataFrame temporary columns
44\tparam T The type of the column
45
46RDataFrame nodes must access two different types of values during the event loop:
47values of real branches, for which TTreeReader{Values,Arrays} act as proxies, or
48temporary columns whose values are generated on the fly. While the type of the
49value is known at compile time (or just-in-time), it is only at runtime that nodes
50can check whether a certain value is generated on the fly or not.
51
52RColumnValue abstracts this difference by providing the same interface for
53both cases and handling the reading or generation of new values transparently.
54Only one of the two data members fReaderProxy or fValuePtr will be non-null
55for a given RColumnValue, depending on whether the value comes from a real
56TTree branch or from a temporary column respectively.
57
58RDataFrame nodes can store tuples of RColumnValues and retrieve an updated
59value for the column via the `Get` method.
60**/
61template <typename T>
62class R__CLING_PTRCHECK(off) RColumnValue {
63// R__CLING_PTRCHECK is disabled because all pointers are hand-crafted by RDF.
64
65 using MustUseRVec_t = IsRVec_t<T>;
66
67 // ColumnValue_t is the type of the column or the type of the elements of an array column
68 using ColumnValue_t = typename std::conditional<MustUseRVec_t::value, TakeFirstParameter_t<T>, T>::type;
69 using TreeReader_t = typename std::conditional<MustUseRVec_t::value, TTreeReaderArray<ColumnValue_t>,
71
72 /// RColumnValue has a slightly different behaviour whether the column comes from a TTreeReader, a RDataFrame Define
73 /// or a RDataSource. It stores which it is as an enum.
74 enum class EColumnKind { kTree, kCustomColumn, kDataSource, kInvalid };
75 // Set to the correct value by MakeProxy or SetTmpColumn
77 /// The slot this value belongs to. Only needed when querying custom column values, it is set in `SetTmpColumn`.
78 unsigned int fSlot = std::numeric_limits<unsigned int>::max();
79
80 // Each element of the following stacks will be in use by a _single task_.
81 // Each task will push one element when it starts and pop it when it ends.
82 // Stacks will typically be very small (1-2 elements typically) and will only grow over size 1 in case of interleaved
83 // task execution i.e. when more than one task needs readers in this worker thread.
84
85 /// Owning ptrs to a TTreeReaderValue or TTreeReaderArray. Only used for Tree columns.
86 std::unique_ptr<TreeReader_t> fTreeReader;
87 /// Non-owning ptrs to the value of a custom column.
89 /// Non-owning ptrs to the value of a data-source column.
91 /// Non-owning ptrs to the node responsible for the custom column. Needed when querying custom values.
93 /// Enumerator for the different properties of the branch storage in memory
94 enum class EStorageType : char { kContiguous, kUnknown, kSparse };
95 /// Signal whether we ever checked that the branch we are reading with a TTreeReaderArray stores array elements
96 /// in contiguous memory. Only used when T == RVec<U>.
98 /// If MustUseRVec, i.e. we are reading an array, we return a reference to this RVec to clients
100 bool fCopyWarningPrinted = false;
101
102public:
104
105 void SetTmpColumn(unsigned int slot, RCustomColumnBase *customColumn)
106 {
107 fCustomColumn = customColumn;
108 // Here we compare names and not typeinfos since they may come from two different contexts: a compiled
109 // and a jitted one.
110 const auto diffTypes = (0 != strcmp(customColumn->GetTypeId().name(), typeid(T).name()));
111 auto inheritedType = [&](){
112 auto colTClass = TClass::GetClass(customColumn->GetTypeId());
113 return colTClass && colTClass->InheritsFrom(TClass::GetClass<T>());
114 };
115
116 if (diffTypes && !inheritedType()) {
117 const auto tName = TypeID2TypeName(typeid(T));
118 const auto colTypeName = TypeID2TypeName(customColumn->GetTypeId());
119 std::string errMsg = "RColumnValue: type specified for column \"" +
120 customColumn->GetName() + "\" is ";
121 if (tName.empty()) {
122 errMsg += typeid(T).name();
123 errMsg += " (extracted from type info)";
124 } else {
125 errMsg += tName;
126 }
127 errMsg += " but temporary column has type ";
128 if (colTypeName.empty()) {
129 auto &id = customColumn->GetTypeId();
130 errMsg += id.name();
131 errMsg += " (extracted from type info)";
132 } else {
133 errMsg += colTypeName;
134 }
135 throw std::runtime_error(errMsg);
136 }
137
138 if (customColumn->IsDataSourceColumn()) {
139 fColumnKind = EColumnKind::kDataSource;
140 fDSValuePtr = static_cast<T **>(customColumn->GetValuePtr(slot));
141 } else {
142 fColumnKind = EColumnKind::kCustomColumn;
143 fCustomValuePtr = static_cast<T *>(customColumn->GetValuePtr(slot));
144 }
145 fSlot = slot;
146 }
147
148 void MakeProxy(TTreeReader *r, const std::string &bn)
149 {
150 fColumnKind = EColumnKind::kTree;
151 fTreeReader = std::make_unique<TreeReader_t>(*r, bn.c_str());
152 }
153
154 /// This overload is used to return scalar quantities (i.e. types that are not read into a RVec)
155 // This method is executed inside the event-loop, many times per entry
156 // If need be, the if statement can be avoided using thunks
157 // (have both branches inside functions and have a pointer to the branch to be executed)
158 template <typename U = T, typename std::enable_if<!RColumnValue<U>::MustUseRVec_t::value, int>::type = 0>
159 T &Get(Long64_t entry)
160 {
161 if (fColumnKind == EColumnKind::kTree) {
162 return *(fTreeReader->Get());
163 } else {
164 fCustomColumn->Update(fSlot, entry);
165 return fColumnKind == EColumnKind::kCustomColumn ? *fCustomValuePtr : **fDSValuePtr;
166 }
167 }
168
169 /// This overload is used to return arrays (i.e. types that are read into a RVec).
170 /// In this case the returned T is always a RVec<ColumnValue_t>.
171 /// RVec<bool> is treated differently, in a separate overload.
172 template <typename U = T,
173 typename std::enable_if<RColumnValue<U>::MustUseRVec_t::value && !std::is_same<U, RVec<bool>>::value,
174 int>::type = 0>
175 T &Get(Long64_t entry)
176 {
177 if (fColumnKind == EColumnKind::kTree) {
178 auto &readerArray = *fTreeReader;
179 // We only use TTreeReaderArrays to read columns that users flagged as type `RVec`, so we need to check
180 // that the branch stores the array as contiguous memory that we can actually wrap in an `RVec`.
181 // Currently we need the first entry to have been loaded to perform the check
182 // TODO Move check to `MakeProxy` once Axel implements this kind of check in TTreeReaderArray using
183 // TBranchProxy
184
185 if (EStorageType::kUnknown == fStorageType && readerArray.GetSize() > 1) {
186 // We can decide since the array is long enough
187 fStorageType =
188 (1 == (&readerArray[1] - &readerArray[0])) ? EStorageType::kContiguous : EStorageType::kSparse;
189 }
190
191 const auto readerArraySize = readerArray.GetSize();
192 if (EStorageType::kContiguous == fStorageType ||
193 (EStorageType::kUnknown == fStorageType && readerArray.GetSize() < 2)) {
194 if (readerArraySize > 0) {
195 // trigger loading of the contents of the TTreeReaderArray
196 // the address of the first element in the reader array is not necessarily equal to
197 // the address returned by the GetAddress method
198 auto readerArrayAddr = &readerArray.At(0);
199 T rvec(readerArrayAddr, readerArraySize);
200 std::swap(fRVec, rvec);
201 } else {
202 T emptyVec{};
203 std::swap(fRVec, emptyVec);
204 }
205 } else {
206 // The storage is not contiguous or we don't know yet: we cannot but copy into the rvec
207#ifndef NDEBUG
208 if (!fCopyWarningPrinted) {
209 Warning("RColumnValue::Get",
210 "Branch %s hangs from a non-split branch. A copy is being performed in order "
211 "to properly read the content.",
212 readerArray.GetBranchName());
213 fCopyWarningPrinted = true;
214 }
215#else
216 (void)fCopyWarningPrinted;
217#endif
218 if (readerArraySize > 0) {
219 T rvec(readerArray.begin(), readerArray.end());
220 std::swap(fRVec, rvec);
221 } else {
222 T emptyVec{};
223 std::swap(fRVec, emptyVec);
224 }
225 }
226 return fRVec;
227
228 } else {
229 fCustomColumn->Update(fSlot, entry);
230 return fColumnKind == EColumnKind::kCustomColumn ? *fCustomValuePtr : **fDSValuePtr;
231 }
232 }
233
234 /// This overload covers the RVec<bool> case. In this case we always copy the contents of TTreeReaderArray<bool>
235 /// into RVec<bool> (never take a view into the memory buffer) because the underlying memory buffer might be the
236 /// one of a std::vector<bool>, which is not a contiguous slab of bool values.
237 /// Note that this also penalizes the case in which the column type is actually bool[], but the possible performance
238 /// gains in this edge case is probably not worth the extra complication required to differentiate the two cases.
239 template <typename U = T,
240 typename std::enable_if<RColumnValue<U>::MustUseRVec_t::value && std::is_same<U, RVec<bool>>::value,
241 int>::type = 0>
242 T &Get(Long64_t entry)
243 {
244 if (fColumnKind == EColumnKind::kTree) {
245 auto &readerArray = *fTreeReader;
246 const auto readerArraySize = readerArray.GetSize();
247 if (readerArraySize > 0) {
248 // always perform a copy
249 T rvec(readerArray.begin(), readerArray.end());
250 std::swap(fRVec, rvec);
251 } else {
252 T emptyVec{};
253 std::swap(fRVec, emptyVec);
254 }
255 return fRVec;
256 } else {
257 // business as usual
258 fCustomColumn->Update(fSlot, entry);
259 return fColumnKind == EColumnKind::kCustomColumn ? *fCustomValuePtr : **fDSValuePtr;
260 }
261 }
262
263 void Reset()
264 {
265 // This method should by all means not be removed, together with all
266 // of its callers, otherwise a race condition takes place in which a
267 // TTreeReader and its TTreeReader{Value,Array}s could be deleted
268 // concurrently:
269 // - Thread #1) a task ends and pushes back processing slot
270 // - Thread #2) a task starts and overwrites thread-local TTreeReaderValues
271 // - Thread #1) first task deletes TTreeReader
272 // See https://github.com/root-project/root/commit/26e8ace6e47de6794ac9ec770c3bbff9b7f2e945
273 if (EColumnKind::kTree == fColumnKind) {
274 fTreeReader.reset();
275 }
276 }
277};
278
279// Some extern instantiations to speed-up compilation/interpretation time
280// These are not active if c++17 is enabled because of a bug in our clang
281// See ROOT-9499.
282#if __cplusplus < 201703L
283extern template class RColumnValue<int>;
284extern template class RColumnValue<unsigned int>;
285extern template class RColumnValue<char>;
286extern template class RColumnValue<unsigned char>;
287extern template class RColumnValue<float>;
288extern template class RColumnValue<double>;
289extern template class RColumnValue<Long64_t>;
290extern template class RColumnValue<ULong64_t>;
291extern template class RColumnValue<std::vector<int>>;
292extern template class RColumnValue<std::vector<unsigned int>>;
293extern template class RColumnValue<std::vector<char>>;
294extern template class RColumnValue<std::vector<unsigned char>>;
295extern template class RColumnValue<std::vector<float>>;
296extern template class RColumnValue<std::vector<double>>;
297extern template class RColumnValue<std::vector<Long64_t>>;
298extern template class RColumnValue<std::vector<ULong64_t>>;
299#endif
300
301template <typename T>
303};
304
305template <typename... BranchTypes>
306struct TRDFValueTuple<TypeList<BranchTypes...>> {
307 using type = std::tuple<RColumnValue<BranchTypes>...>;
308};
309
310template <typename BranchType>
312
313/// Clear the proxies of a tuple of RColumnValues
314template <typename ValueTuple, std::size_t... S>
315void ResetRDFValueTuple(ValueTuple &values, std::index_sequence<S...>)
316{
317 // hack to expand a parameter pack without c++17 fold expressions.
318 std::initializer_list<int> expander{(std::get<S>(values).Reset(), 0)...};
319 (void)expander; // avoid "unused variable" warnings
320}
321
322
323} // ns RDF
324} // ns Internal
325} // ns ROOT
326
327#endif // ROOT_RCOLUMNVALUE
ROOT::R::TRInterface & r
Definition: Object.C:4
long long Long64_t
Definition: RtypesCore.h:69
void Warning(const char *location, const char *msgfmt,...)
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
@ kUnknown
Definition: TStructNode.h:19
typedef void((*Func_t)())
@ kInvalid
Definition: TSystem.h:80
virtual void Update(unsigned int slot, Long64_t entry)=0
virtual void * GetValuePtr(unsigned int slot)=0
virtual const std::type_info & GetTypeId() const =0
Helper class that updates and returns TTree branches as well as RDataFrame temporary columns.
T * fCustomValuePtr
Non-owning ptrs to the value of a custom column.
T ** fDSValuePtr
Non-owning ptrs to the value of a data-source column.
void MakeProxy(TTreeReader *r, const std::string &bn)
std::unique_ptr< TreeReader_t > fTreeReader
Owning ptrs to a TTreeReaderValue or TTreeReaderArray. Only used for Tree columns.
typename std::conditional< MustUseRVec_t::value, TakeFirstParameter_t< T >, T >::type ColumnValue_t
EColumnKind
RColumnValue has a slightly different behaviour whether the column comes from a TTreeReader,...
T & Get(Long64_t entry)
This overload is used to return scalar quantities (i.e. types that are not read into a RVec)
typename std::conditional< MustUseRVec_t::value, TTreeReaderArray< ColumnValue_t >, TTreeReaderValue< ColumnValue_t > >::type TreeReader_t
EStorageType
Enumerator for the different properties of the branch storage in memory.
void SetTmpColumn(unsigned int slot, RCustomColumnBase *customColumn)
RCustomColumnBase * fCustomColumn
Non-owning ptrs to the node responsible for the custom column. Needed when querying custom values.
RVec< ColumnValue_t > fRVec
If MustUseRVec, i.e. we are reading an array, we return a reference to this RVec to clients.
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2895
An interface for reading values stored in ROOT columnar datasets.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition: TTreeReader.h:44
void swap(RDirectoryEntry &e1, RDirectoryEntry &e2) noexcept
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:83
void ResetRDFValueTuple(std::vector< RTypeErasedColumnValue > &values, std::index_sequence< S... >, ROOT::TypeTraits::TypeList< ColTypes... >)
This overload is specialized to act on RTypeErasedColumnValues instead of RColumnValues.
Definition: RAction.hxx:86
typename TRDFValueTuple< BranchType >::type RDFValueTuple_t
double T(double x)
Definition: ChebyshevPol.h:34
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
RooArgSet S(const RooAbsArg &v1)
std::tuple< RColumnValue< BranchTypes >... > type
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:27