Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RFilterWithMissingValues.hxx
Go to the documentation of this file.
1// Author: Vincenzo Eduardo Padulano CERN 09/2024
2
3/*************************************************************************
4 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_RFilterWithMissingValues
12#define ROOT_RDF_RFilterWithMissingValues
13
17#include "ROOT/RDF/Utils.hxx"
21#include "ROOT/TypeTraits.hxx"
22#include "RtypesCore.h"
23
24#include <algorithm>
25#include <cassert>
26#include <memory>
27#include <string>
28#include <unordered_map>
29#include <utility> // std::index_sequence
30#include <vector>
31
32// fwd decls for RFilterWithMissingValues
34std::shared_ptr<GraphNode> CreateFilterNode(const ROOT::Detail::RDF::RFilterBase *filterPtr,
35 std::unordered_map<void *, std::shared_ptr<GraphNode>> &visitedMap);
36
37std::shared_ptr<GraphNode> AddDefinesToGraph(std::shared_ptr<GraphNode> node,
39 const std::vector<std::string> &prevNodeDefines,
40 std::unordered_map<void *, std::shared_ptr<GraphNode>> &visitedMap);
41} // namespace ROOT::Internal::RDF::GraphDrawing
42
43namespace ROOT::Detail::RDF {
44
46class RJittedFilter;
47
48/**
49 * \brief implementation of FilterAvailable and FilterMissing operations
50 *
51 * The filter evaluates if the entry is missing a value for the input column.
52 * Depending on which function was called by the user, the entry with the
53 * missing value:
54 * - will be discarded in case the user called FilterAvailable
55 * - will be kept in case the user called FilterMissing
56 */
57template <typename PrevNodeRaw>
59
60 // If the PrevNode is a RJittedFilter, treat it as a more generic RFilterBase: when dealing with systematic
61 // variations we'll have a RJittedFilter node for the nominal case but other "universes" will use concrete filters,
62 // so we normalize the "previous node type" to the base type RFilterBase.
63 using PrevNode_t = std::conditional_t<std::is_same<PrevNodeRaw, RJittedFilter>::value, RFilterBase, PrevNodeRaw>;
64 const std::shared_ptr<PrevNode_t> fPrevNodePtr;
65
66 // One column reader per slot
67 std::vector<RColumnReaderBase *> fValues;
68
69 // Whether the entry should be kept in case of missing value for the input column
71
72public:
73 RFilterWithMissingValues(bool discardEntry, std::shared_ptr<PrevNode_t> pd,
74 const RDFInternal::RColumnRegister &colRegister, const ColumnNames_t &columns,
75 std::string_view filterName = "", const std::string &variationName = "nominal")
76 : RFilterBase(pd->GetLoopManagerUnchecked(), filterName, pd->GetLoopManagerUnchecked()->GetNSlots(), colRegister,
77 columns, pd->GetVariations(), variationName),
78 fPrevNodePtr(std::move(pd)),
79 fValues(fPrevNodePtr->GetLoopManagerUnchecked()->GetNSlots()),
80 fDiscardEntryWithMissingValue(discardEntry)
81 {
82 fLoopManager->Register(this);
83 // We suppress errors that TTreeReader prints regarding the missing branch
84 fLoopManager->InsertSuppressErrorsForMissingBranch(fColumnNames[0]);
85 }
86
92 {
93 // must Deregister objects from the RLoopManager here, before the fPrevNodePtr data member is destroyed:
94 // otherwise if fPrevNodePtr is the RLoopManager, it will be destroyed before the calls to Deregister happen.
95 fLoopManager->Deregister(this);
96 fLoopManager->EraseSuppressErrorsForMissingBranch(fColumnNames[0]);
97 }
98
99 bool CheckFilters(unsigned int slot, Long64_t entry) final
100 {
101 constexpr static auto cacheLineStepLong64_t = RDFInternal::CacheLineStep<Long64_t>();
102 constexpr static auto cacheLineStepint = RDFInternal::CacheLineStep<int>();
103 constexpr static auto cacheLineStepULong64_t = RDFInternal::CacheLineStep<ULong64_t>();
104
105 if (entry != fLastCheckedEntry[slot * cacheLineStepLong64_t]) {
106 if (!fPrevNodePtr->CheckFilters(slot, entry)) {
107 // a filter upstream returned false, cache the result
108 fLastResult[slot * cacheLineStepint] = false;
109 } else {
110 // evaluate this filter, cache the result
111 const bool valueIsMissing = fValues[slot]->template TryGet<void>(entry) == nullptr;
112 if (fDiscardEntryWithMissingValue) {
113 valueIsMissing ? ++fRejected[slot * cacheLineStepULong64_t] : ++fAccepted[slot * cacheLineStepULong64_t];
114 fLastResult[slot * cacheLineStepint] = !valueIsMissing;
115 } else {
116 valueIsMissing ? ++fAccepted[slot * cacheLineStepULong64_t] : ++fRejected[slot * cacheLineStepULong64_t];
117 fLastResult[slot * cacheLineStepint] = valueIsMissing;
118 }
119 }
120 fLastCheckedEntry[slot * cacheLineStepLong64_t] = entry;
121 }
122 return fLastResult[slot * cacheLineStepint];
123 }
124
125 void InitSlot(TTreeReader *r, unsigned int slot) final
126 {
127 fValues[slot] =
128 RDFInternal::GetColumnReader(slot, fColRegister.GetReaderUnchecked(slot, fColumnNames[0], fVariation),
129 *fLoopManager, r, fColumnNames[0], typeid(void));
130 fLastCheckedEntry[slot * RDFInternal::CacheLineStep<Long64_t>()] = -1;
131 }
132
133 // recursive chain of `Report`s
134 void Report(ROOT::RDF::RCutFlowReport &rep) const final { PartialReport(rep); }
135
137 {
138 fPrevNodePtr->PartialReport(rep);
139 FillReport(rep);
140 }
141
143 {
144 ++fNStopsReceived;
145 if (fNStopsReceived == fNChildren)
146 fPrevNodePtr->StopProcessing();
147 }
148
150 {
151 ++fNChildren;
152 // propagate "children activation" upstream. named filters do the propagation via `TriggerChildrenCount`.
153 if (fNChildren == 1 && fName.empty())
154 fPrevNodePtr->IncrChildrenCount();
155 }
156
158 {
159 assert(!fName.empty()); // this method is to only be called on named filters
160 fPrevNodePtr->IncrChildrenCount();
161 }
162
163 void AddFilterName(std::vector<std::string> &filters) final
164 {
165 fPrevNodePtr->AddFilterName(filters);
166 auto name = (HasName() ? fName : fDiscardEntryWithMissingValue ? "FilterAvailable" : "FilterMissing");
167 filters.push_back(name);
168 }
169
170 /// Clean-up operations to be performed at the end of a task.
171 void FinalizeSlot(unsigned int slot) final { fValues[slot] = nullptr; }
172
173 std::shared_ptr<RDFGraphDrawing::GraphNode>
174 GetGraph(std::unordered_map<void *, std::shared_ptr<RDFGraphDrawing::GraphNode>> &visitedMap) final
175 {
176 // Recursively call for the previous node.
177 auto prevNode = fPrevNodePtr->GetGraph(visitedMap);
178 const auto &prevColumns = prevNode->GetDefinedColumns();
179
181
182 /* If the returned node is not new, there is no need to perform any other operation.
183 * This is a likely scenario when building the entire graph in which branches share
184 * some nodes. */
185 if (!thisNode->IsNew()) {
186 return thisNode;
187 }
188
189 auto upmostNode = AddDefinesToGraph(thisNode, fColRegister, prevColumns, visitedMap);
190
191 // Keep track of the columns defined up to this point.
192 thisNode->AddDefinedColumns(fColRegister.GenerateColumnNames());
193
194 upmostNode->SetPrevNode(prevNode);
195 return thisNode;
196 }
197
198 /// Return a clone of this Filter that works with values in the variationName "universe".
199 std::shared_ptr<RNodeBase> GetVariedFilter(const std::string &variationName) final
200 {
201 // Only the nominal filter should be asked to produce varied filters
202 assert(fVariation == "nominal");
203 // nobody should ask for a varied filter for the nominal variation: they can just
204 // use the nominal filter!
205 assert(variationName != "nominal");
206 // nobody should ask for a varied filter for a variation on which this filter does not depend:
207 // they can just use the nominal filter.
209
210 auto it = fVariedFilters.find(variationName);
211 if (it != fVariedFilters.end())
212 return it->second;
213
214 auto prevNode = fPrevNodePtr;
215 if (static_cast<RNodeBase *>(fPrevNodePtr.get()) != static_cast<RNodeBase *>(fLoopManager) &&
216 RDFInternal::IsStrInVec(variationName, prevNode->GetVariations()))
217 prevNode = std::static_pointer_cast<PrevNode_t>(prevNode->GetVariedFilter(variationName));
218
219 // the varied filters get a copy of the callable object.
220 // TODO document this
221 auto variedFilter = std::unique_ptr<RFilterBase>(new RFilterWithMissingValues<PrevNode_t>(
222 fDiscardEntryWithMissingValue, std::move(prevNode), fColRegister, fColumnNames, fName, variationName));
223 auto e = fVariedFilters.insert({variationName, std::move(variedFilter)});
224 return e.first->second;
225 }
226};
227
228} // namespace ROOT::Detail::RDF
229
230#endif // ROOT_RDF_RFilterWithMissingValues
#define e(i)
Definition RSha256.hxx:103
long long Long64_t
Definition RtypesCore.h:69
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
const char * filters[]
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
char name[80]
Definition TGX11.cxx:110
implementation of FilterAvailable and FilterMissing operations
RFilterWithMissingValues & operator=(RFilterWithMissingValues &&)=delete
const std::shared_ptr< PrevNode_t > fPrevNodePtr
bool CheckFilters(unsigned int slot, Long64_t entry) final
void FinalizeSlot(unsigned int slot) final
Clean-up operations to be performed at the end of a task.
std::shared_ptr< RDFGraphDrawing::GraphNode > GetGraph(std::unordered_map< void *, std::shared_ptr< RDFGraphDrawing::GraphNode > > &visitedMap) final
std::conditional_t< std::is_same< PrevNodeRaw, RJittedFilter >::value, RFilterBase, PrevNodeRaw > PrevNode_t
std::shared_ptr< RNodeBase > GetVariedFilter(const std::string &variationName) final
Return a clone of this Filter that works with values in the variationName "universe".
RFilterWithMissingValues(const RFilterWithMissingValues &)=delete
void InitSlot(TTreeReader *r, unsigned int slot) final
void AddFilterName(std::vector< std::string > &filters) final
RFilterWithMissingValues & operator=(const RFilterWithMissingValues &)=delete
RFilterWithMissingValues(RFilterWithMissingValues &&)=delete
void PartialReport(ROOT::RDF::RCutFlowReport &rep) const final
void Report(ROOT::RDF::RCutFlowReport &rep) const final
RFilterWithMissingValues(bool discardEntry, std::shared_ptr< PrevNode_t > pd, const RDFInternal::RColumnRegister &colRegister, const ColumnNames_t &columns, std::string_view filterName="", const std::string &variationName="nominal")
Base class for non-leaf nodes of the computational graph.
Definition RNodeBase.hxx:43
A binder for user-defined columns, variations and aliases.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
std::shared_ptr< GraphNode > CreateFilterNode(const ROOT::Detail::RDF::RFilterBase *filterPtr, std::unordered_map< void *, std::shared_ptr< GraphNode > > &visitedMap)
std::shared_ptr< GraphNode > AddDefinesToGraph(std::shared_ptr< GraphNode > node, const RColumnRegister &colRegister, const std::vector< std::string > &prevNodeDefines, std::unordered_map< void *, std::shared_ptr< GraphNode > > &visitedMap)
RDFDetail::RColumnReaderBase * GetColumnReader(unsigned int slot, RColumnReaderBase *defineOrVariationReader, RLoopManager &lm, TTreeReader *treeReader, std::string_view colName, const std::type_info &ti)
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
Definition RDFUtils.cxx:444