Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROOT::RDF::RInterface< Proxied > Class Template Reference

template<typename Proxied>
class ROOT::RDF::RInterface< Proxied >

The public interface to the RDataFrame federation of classes.

Template Parameters
ProxiedOne of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.

The documentation of each method features a one liner illustrating how to use the method, for example showing how the majority of the template parameters are automatically deduced requiring no or very little effort by the user.

Definition at line 124 of file RInterface.hxx.

Public Member Functions

 RInterface (const RInterface &)=default
 Copy-ctor for RInterface.
 
template<typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value, int>>
 RInterface (const std::shared_ptr< RLoopManager > &proxied)
 Build a RInterface from a RLoopManager.
 
 RInterface (RInterface &&)=default
 Move-ctor for RInterface.
 
template<typename T>
RInterface< Proxied > DefaultValueFor (std::string_view column, const T &defaultValue)
 In case the value in the given column is missing, provide a default value.
 
template<typename F, typename std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< Proxied > Define (std::string_view name, F expression, const ColumnNames_t &columns={})
 Define a new column.
 
RInterface< Proxied > Define (std::string_view name, std::string_view expression)
 Define a new column.
 
template<typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
RInterface< Proxied > DefinePerSample (std::string_view name, F expression)
 Define a new column that is updated when the input sample changes.
 
RInterface< Proxied > DefinePerSample (std::string_view name, std::string_view expression)
 Define a new column that is updated when the input sample changes.
 
template<typename F>
RInterface< Proxied > DefineSlot (std::string_view name, F expression, const ColumnNames_t &columns={})
 Define a new column with a value dependent on the processing slot.
 
template<typename F>
RInterface< Proxied > DefineSlotEntry (std::string_view name, F expression, const ColumnNames_t &columns={})
 Define a new column with a value dependent on the processing slot and the current entry.
 
template<typename F, std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< RDFDetail::RFilter< F, Proxied > > Filter (F f, const ColumnNames_t &columns={}, std::string_view name="")
 Append a filter to the call graph.
 
template<typename F>
RInterface< RDFDetail::RFilter< F, Proxied > > Filter (F f, const std::initializer_list< std::string > &columns)
 Append a filter to the call graph.
 
template<typename F, std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< RDFDetail::RFilter< F, Proxied > > Filter (F f, std::string_view name)
 Append a filter to the call graph.
 
RInterface< RDFDetail::RJittedFilter > Filter (std::string_view expression, std::string_view name="")
 Append a filter to the call graph.
 
RInterface< RDFDetail::RFilterWithMissingValues< Proxied > > FilterAvailable (std::string_view column)
 Discard entries with missing values.
 
RInterface< RDFDetail::RFilterWithMissingValues< Proxied > > FilterMissing (std::string_view column)
 Keep only the entries that have missing values.
 
 operator RNode () const
 Cast any RDataFrame node to a common type ROOT::RDF::RNode.
 
RInterfaceoperator= (const RInterface &)=default
 Copy-assignment operator for RInterface.
 
RInterfaceoperator= (RInterface &&)=default
 Move-assignment operator for RInterface.
 
template<typename F, std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< Proxied > Redefine (std::string_view name, F expression, const ColumnNames_t &columns={})
 Overwrite the value and/or type of an existing column.
 
RInterface< Proxied > Redefine (std::string_view name, std::string_view expression)
 Overwrite the value and/or type of an existing column.
 
template<typename F>
RInterface< Proxied > RedefineSlot (std::string_view name, F expression, const ColumnNames_t &columns={})
 Overwrite the value and/or type of an existing column.
 
template<typename F>
RInterface< Proxied > RedefineSlotEntry (std::string_view name, F expression, const ColumnNames_t &columns={})
 Overwrite the value and/or type of an existing column.
 
template<typename F>
RInterface< Proxied > Vary (const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
 Register systematic variations for multiple existing columns using custom variation tags.
 
template<typename F>
RInterface< Proxied > Vary (std::initializer_list< std::string > colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
 Register systematic variations for multiple existing columns using custom variation tags.
 
template<typename F>
RInterface< Proxied > Vary (std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName="")
 Register systematic variations for a single existing column using custom variation tags.
 
template<typename F>
RInterface< Proxied > Vary (std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName="")
 Register systematic variations for a single existing column using auto-generated variation tags.
 
- Public Member Functions inherited from ROOT::RDF::RInterfaceBase
 RInterfaceBase (RDFDetail::RLoopManager &lm, const RDFInternal::RColumnRegister &colRegister)
 
 RInterfaceBase (std::shared_ptr< RDFDetail::RLoopManager > lm)
 
RDFDescription Describe ()
 Return information about the dataframe.
 
ColumnNames_t GetColumnNames ()
 Returns the names of the available columns.
 
std::string GetColumnType (std::string_view column)
 Return the type of a given column as a string.
 
ColumnNames_t GetDatasetTopLevelFieldNames ()
 Retrieve the names of top-level field names.
 
ColumnNames_t GetDefinedColumnNames ()
 Returns the names of the defined columns.
 
unsigned int GetNFiles ()
 
unsigned int GetNRuns () const
 Gets the number of event loops run.
 
unsigned int GetNSlots () const
 Gets the number of data processing slots.
 
RVariationsDescription GetVariations () const
 Return a descriptor for the systematic variations registered in this branch of the computation graph.
 
bool HasColumn (std::string_view columnName)
 Checks if a column is present in the dataset.
 

Public Attributes

 true
 Register systematic variations for multiple existing columns using auto-generated tags.
 

Private Types

using RFilterBase = RDFDetail::RFilterBase
 
using RLoopManager = RDFDetail::RLoopManager
 
using RRangeBase = RDFDetail::RRangeBase
 

Private Attributes

std::shared_ptr< Proxied > fProxiedPtr
 Smart pointer to the graph node encapsulated by this RInterface.
 

Friends

template<typename T>
class RInterface
 
void RDFInternal::ChangeBeginAndEndEntries (const RNode &node, Long64_t start, Long64_t end)
 
void RDFInternal::ChangeEmptyEntryRange (const RNode &node, std::pair< ULong64_t, ULong64_t > &&newRange)
 
void RDFInternal::ChangeSpec (const RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec)
 
std::vector< std::pair< std::uint64_t, std::uint64_t > > RDFInternal::GetDatasetGlobalClusterBoundaries (const RNode &node)
 
class RDFInternal::GraphDrawing::GraphCreatorHelper
 
void RDFInternal::TriggerRun (RNode node)
 
std::string ROOT::Internal::RDF::GetDataSourceLabel (const RNode &node)
 
void ROOT::Internal::RDF::SetTTreeLifeline (ROOT::RDF::RNode &node, std::any lifeline)
 

Additional Inherited Members

- Protected Member Functions inherited from ROOT::RDF::RInterfaceBase
void AddDefaultColumns ()
 
template<typename... ColumnTypes>
void CheckAndFillDSColumns (ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
 
void CheckAndFillDSColumns (const std::vector< std::string > &colNames, const std::vector< const std::type_info * > &colTypeIDs)
 
void CheckIMTDisabled (std::string_view callerName)
 
template<typename ActionTag, typename... ColTypes, typename ActionResultType, typename RDFNode, typename HelperArgType = ActionResultType, std::enable_if_t< RDFInternal::RNeedJitting< ColTypes... >::value, int > = 0>
RResultPtr< ActionResultType > CreateAction (const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int nColumns=-1, const bool vector2RVec=true)
 Create RAction object, return RResultPtr for the action Overload for the case in which one or more column types were not specified (RTTI + jitting).
 
template<typename ActionTag, typename... ColTypes, typename ActionResultType, typename RDFNode, typename HelperArgType = ActionResultType, std::enable_if_t<!RDFInternal::RNeedJitting< ColTypes... >::value, int > = 0>
RResultPtr< ActionResultType > CreateAction (const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int=-1)
 Create RAction object, return RResultPtr for the action Overload for the case in which all column types were specified (no jitting).
 
std::string DescribeDataset () const
 
ColumnNames_t GetColumnTypeNamesList (const ColumnNames_t &columnList)
 
RDataSourceGetDataSource () const
 
RDFDetail::RLoopManager * GetLoopManager () const
 
ColumnNames_t GetValidatedColumnNames (const unsigned int nColumns, const ColumnNames_t &columns)
 
template<typename RetType>
void SanityChecksForVary (const std::vector< std::string > &colNames, const std::vector< std::string > &variationTags, std::string_view variationName)
 
- Protected Attributes inherited from ROOT::RDF::RInterfaceBase
RDFInternal::RColumnRegister fColRegister
 Contains the columns defined up to this node.
 
std::shared_ptr< ROOT::Detail::RDF::RLoopManagerfLoopManager
 < The RLoopManager at the root of this computation graph. Never null.
 

#include <ROOT/RDF/RInterface.hxx>

Inheritance diagram for ROOT::RDF::RInterface< Proxied >:
[legend]

Member Typedef Documentation

◆ RFilterBase

template<typename Proxied>
using ROOT::RDF::RInterface< Proxied >::RFilterBase = RDFDetail::RFilterBase
private

Definition at line 125 of file RInterface.hxx.

◆ RLoopManager

template<typename Proxied>
using ROOT::RDF::RInterface< Proxied >::RLoopManager = RDFDetail::RLoopManager
private

Definition at line 127 of file RInterface.hxx.

◆ RRangeBase

template<typename Proxied>
using ROOT::RDF::RInterface< Proxied >::RRangeBase = RDFDetail::RRangeBase
private

Definition at line 126 of file RInterface.hxx.

Constructor & Destructor Documentation

◆ RInterface() [1/3]

template<typename Proxied>
ROOT::RDF::RInterface< Proxied >::RInterface ( const RInterface< Proxied > & )
default

Copy-ctor for RInterface.

◆ RInterface() [2/3]

template<typename Proxied>
ROOT::RDF::RInterface< Proxied >::RInterface ( RInterface< Proxied > && )
default

Move-ctor for RInterface.

◆ RInterface() [3/3]

template<typename Proxied>
template<typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value, int>>
ROOT::RDF::RInterface< Proxied >::RInterface ( const std::shared_ptr< RLoopManager > & proxied)
inline

Build a RInterface from a RLoopManager.

This constructor is only available for RInterface<RLoopManager>.

Definition at line 165 of file RInterface.hxx.

Member Function Documentation

◆ DefaultValueFor()

template<typename Proxied>
template<typename T>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::DefaultValueFor ( std::string_view column,
const T & defaultValue )
inline

In case the value in the given column is missing, provide a default value.

Template Parameters
TThe type of the column
Parameters
[in]columnColumn name where missing values should be replaced by the given default value
[in]defaultValueValue to provide instead of a missing value
Returns
The node of the graph that will provide a default value

This operation is useful in case an entry of the dataset is incomplete, i.e. if one or more of the columns do not have valid values. It does not modify the values of the column, but in case any entry is missing, it will provide the default value to downstream nodes instead.

Use cases include:

  • When processing multiple files, one or more of them is missing a column
  • In horizontal joining with entry matching, a certain dataset has no match for the current entry.

Example usage:

// Assume a dataset with columns [idx, x] matching another dataset with
// columns [idx, y]. For idx == 42, the right-hand dataset has no match
ROOT::RDataFrame df{dataset};
auto df_default = df.DefaultValueFor("y", 33)
.Define("z", [](int x, int y) { return x + y; }, {"x", "y"});
auto colz = df_default.Take<int>("z");
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
df = ROOT.RDataFrame(dataset)
df_default = df.DefaultValueFor("y", 33).Define("z", "x + y")
colz = df_default.Take[int]("z")
RInterface< Proxied > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column.

Definition at line 679 of file RInterface.hxx.

◆ Define() [1/2]

template<typename Proxied>
template<typename F, typename std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::Define ( std::string_view name,
F expression,
const ColumnNames_t & columns = {} )
inline

Define a new column.

Parameters
[in]nameThe name of the defined column.
[in]expressionFunction, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. This callable must be thread safe when used with multiple threads.
[in]columnsNames of the columns/branches in input to the producer function.
Returns
the first node of the computation graph for which the new quantity is defined.

Define a column that will be visible from all subsequent nodes of the functional chain. The expression is only evaluated for entries that pass all the preceding filters. A new variable is created called name, accessible as if it was contained in the dataset from subsequent transformations/actions.

Use cases include:

  • caching the results of complex calculations for easy and efficient multiple access
  • extraction of quantities of interest from complex objects

An exception is thrown if the name of the new column is already in use in this branch of the computation graph. Note that the callable must be thread safe when called from multiple threads. Use DefineSlot() if needed.

Example usage:

// assuming a function with signature:
double myComplexCalculation(const RVec<float> &muon_pts);
// we can pass it directly to Define
auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
// alternatively, we can pass the body of the function as a string, as in Filter:
auto df_with_define = df.Define("newColumn", "x*x + y*y");
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1524
Note
If the body of the string expression contains an explicit return statement (even if it is in a nested scope), RDataFrame will not add another one in front of the expression. So this will not work:
df.Define("x2", "Map(v, [](float e) { return e*e; })")
but instead this will:
df.Define("x2", "return Map(v, [](float e) { return e*e; })")

Definition at line 450 of file RInterface.hxx.

◆ Define() [2/2]

template<typename Proxied>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::Define ( std::string_view name,
std::string_view expression )
inline

Define a new column.

Parameters
[in]nameThe name of the defined column.
[in]expressionAn expression in C++ which represents the defined value
Returns
the first node of the computation graph for which the new quantity is defined.

The expression is just-in-time compiled and used to produce the column entries. It must be valid C++ syntax in which variable names are substituted with the names of branches/columns.

Note
If the body of the string expression contains an explicit return statement (even if it is in a nested scope), RDataFrame will not add another one in front of the expression. So this will not work:
df.Define("x2", "Map(v, [](float e) { return e*e; })")
but instead this will:
df.Define("x2", "return Map(v, [](float e) { return e*e; })")

Refer to the first overload of this method for the full documentation.

Definition at line 539 of file RInterface.hxx.

◆ DefinePerSample() [1/2]

template<typename Proxied>
template<typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::DefinePerSample ( std::string_view name,
F expression )
inline

Define a new column that is updated when the input sample changes.

Parameters
[in]nameThe name of the defined column.
[in]expressionA C++ callable that computes the new value of the defined column.
Returns
the first node of the computation graph for which the new quantity is defined.

The signature of the callable passed as second argument should be T(unsigned int slot, const ROOT::RDF::RSampleInfo &id) where:

  • T is the type of the defined column
  • slot is a number in the range [0, nThreads) that is different for each processing thread. This can simplify the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame.
  • id is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is being processed (see the class docs for more information).

DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being processed or to inject a callback into the event loop that is only called when the processing of a new sample starts rather than at every entry.

The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often.

Example usage:

ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}};
df.DefinePerSample("weightbysample",
[](unsigned int slot, const ROOT::RDF::RSampleInfo &id)
{ return id.Contains("sample1") ? 1.0f : 2.0f; });
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...

Definition at line 742 of file RInterface.hxx.

◆ DefinePerSample() [2/2]

template<typename Proxied>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::DefinePerSample ( std::string_view name,
std::string_view expression )
inline

Define a new column that is updated when the input sample changes.

Parameters
[in]nameThe name of the defined column.
[in]expressionA valid C++ expression as a string, which will be used to compute the defined value.
Returns
the first node of the computation graph for which the new quantity is defined.

The expression is just-in-time compiled and used to produce the column entries. It must be valid C++ syntax and the usage of the special variable names rdfslot_ and rdfsampleinfo_ is permitted, where these variables will take the same values as the slot and id parameters described at the DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information.

Example usage:

df = ROOT.RDataFrame('mytree', ['sample1.root','sample2.root'])
df.DefinePerSample('weightbysample', 'rdfsampleinfo_.Contains("sample1") ? 1.0f : 2.0f')
Note
If you have declared some C++ function to the interpreter, the correct syntax to call that function with this overload of DefinePerSample is by calling it explicitly with the special names rdfslot_ and rdfsampleinfo_ as input parameters. This is for example the correct way to call this overload when working in PyROOT:
ROOT.gInterpreter.Declare(
"""
float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
return id.Contains("sample1") ? 1.0f : 2.0f;
}
""")
df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)")
Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain column names other than those mentioned above: the expression is evaluated once before the processing of the sample even starts, so column values are not accessible.

Definition at line 803 of file RInterface.hxx.

◆ DefineSlot()

template<typename Proxied>
template<typename F>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::DefineSlot ( std::string_view name,
F expression,
const ColumnNames_t & columns = {} )
inline

Define a new column with a value dependent on the processing slot.

Parameters
[in]nameThe name of the defined column.
[in]expressionFunction, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
[in]columnsNames of the columns/branches in input to the producer function (excluding the slot number).
Returns
the first node of the computation graph for which the new quantity is defined.

This alternative implementation of Define is meant as a helper to evaluate new column values in a thread-safe manner. The expression must be a callable of signature R(unsigned int, T1, T2, ...) where T1, T2... are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. Note that there is no guarantee as to how often each slot will be reached during the event loop.

The following two calls are equivalent, although DefineSlot is slightly more performant:

int function(unsigned int, double, double);
df.Define("x", function, {"rdfslot_", "column1", "column2"})
df.DefineSlot("x", function, {"column1", "column2"})

See Define() for more information.

Definition at line 480 of file RInterface.hxx.

◆ DefineSlotEntry()

template<typename Proxied>
template<typename F>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::DefineSlotEntry ( std::string_view name,
F expression,
const ColumnNames_t & columns = {} )
inline

Define a new column with a value dependent on the processing slot and the current entry.

Parameters
[in]nameThe name of the defined column.
[in]expressionFunction, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
[in]columnsNames of the columns/branches in input to the producer function (excluding slot and entry).
Returns
the first node of the computation graph for which the new quantity is defined.

This alternative implementation of Define is meant as a helper in writing entry-specific, thread-safe custom columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where T1, T2... are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. Note that there is no guarantee as to how often each slot will be reached during the event loop. The second parameter is reserved for a ULong64_t representing the current entry being processed by the current thread.

The following two Defines are equivalent, although DefineSlotEntry is slightly more performant:

int function(unsigned int, ULong64_t, double, double);
Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
DefineSlotEntry("x", function, {"column1", "column2"})
unsigned long long ULong64_t
Portable unsigned long integer 8 bytes.
Definition RtypesCore.h:84
RInterface< Proxied > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot and the current entry.
RInterface< Proxied > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column.

See Define() for more information.

Definition at line 511 of file RInterface.hxx.

◆ Filter() [1/4]

template<typename Proxied>
template<typename F, std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< RDFDetail::RFilter< F, Proxied > > ROOT::RDF::RInterface< Proxied >::Filter ( F f,
const ColumnNames_t & columns = {},
std::string_view name = "" )
inline

Append a filter to the call graph.

Parameters
[in]fFunction, lambda expression, functor class or any other callable object. It must return a bool signalling whether the event has passed the selection (true) or not (false).
[in]columnsNames of the columns/branches in input to the filter function.
[in]nameOptional name of this filter. See Report.
Returns
the filter node of the computation graph.

Append a filter node at the point of the call graph corresponding to the object this method is called on. The callable f should not have side-effects (e.g. modification of an external or static variable) to ensure correct results when implicit multi-threading is active.

RDataFrame only evaluates filters when necessary: if multiple filters are chained one after another, they are executed in order and the first one returning false causes the event to be discarded. Even if multiple actions or transformations depend on the same filter, it is executed once per entry. If its result is requested more than once, the cached result is served.

Example usage:

// C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
auto filtered = df.Filter(myCut, {"x", "y"});
// String: it must contain valid C++ except that column names can be used instead of variable names
auto filtered = df.Filter("x*y > 0");
Note
If the body of the string expression contains an explicit return statement (even if it is in a nested scope), RDataFrame will not add another one in front of the expression. So this will not work:
df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
but instead this will:
df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")

Definition at line 232 of file RInterface.hxx.

◆ Filter() [2/4]

template<typename Proxied>
template<typename F>
RInterface< RDFDetail::RFilter< F, Proxied > > ROOT::RDF::RInterface< Proxied >::Filter ( F f,
const std::initializer_list< std::string > & columns )
inline

Append a filter to the call graph.

Parameters
[in]fFunction, lambda expression, functor class or any other callable object. It must return a bool signalling whether the event has passed the selection (true) or not (false).
[in]columnsNames of the columns/branches in input to the filter function.
Returns
the filter node of the computation graph.

Refer to the first overload of this method for the full documentation.

Definition at line 271 of file RInterface.hxx.

◆ Filter() [3/4]

template<typename Proxied>
template<typename F, std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< RDFDetail::RFilter< F, Proxied > > ROOT::RDF::RInterface< Proxied >::Filter ( F f,
std::string_view name )
inline

Append a filter to the call graph.

Parameters
[in]fFunction, lambda expression, functor class or any other callable object. It must return a bool signalling whether the event has passed the selection (true) or not (false).
[in]nameOptional name of this filter. See Report.
Returns
the filter node of the computation graph.

Refer to the first overload of this method for the full documentation.

Definition at line 255 of file RInterface.hxx.

◆ Filter() [4/4]

template<typename Proxied>
RInterface< RDFDetail::RJittedFilter > ROOT::RDF::RInterface< Proxied >::Filter ( std::string_view expression,
std::string_view name = "" )
inline

Append a filter to the call graph.

Parameters
[in]expressionThe filter expression in C++
[in]nameOptional name of this filter. See Report.
Returns
the filter node of the computation graph.

The expression is just-in-time compiled and used to filter entries. It must be valid C++ syntax in which variable names are substituted with the names of branches/columns.

Example usage:

auto filtered_df = df.Filter("myCollection.size() > 3");
auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
Note
If the body of the string expression contains an explicit return statement (even if it is in a nested scope), RDataFrame will not add another one in front of the expression. So this will not work:
df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
but instead this will:
df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")

Definition at line 301 of file RInterface.hxx.

◆ FilterAvailable()

template<typename Proxied>
RInterface< RDFDetail::RFilterWithMissingValues< Proxied > > ROOT::RDF::RInterface< Proxied >::FilterAvailable ( std::string_view column)
inline

Discard entries with missing values.

Parameters
[in]columnColumn name whose entries with missing values should be discarded
Returns
The filter node of the computation graph

This operation is useful in case an entry of the dataset is incomplete, i.e. if one or more of the columns do not have valid values. If the value of the input column is missing for an entry, the entire entry will be discarded from the rest of this branch of the computation graph.

Use cases include:

  • When processing multiple files, one or more of them is missing a column
  • In horizontal joining with entry matching, a certain dataset has no match for the current entry.

Example usage:

# Assume a dataset with columns [idx, x] matching another dataset with
# columns [idx, y]. For idx == 42, the right-hand dataset has no match
df = ROOT.RDataFrame(dataset)
df_nomissing = df.FilterAvailable("idx").Define("z", "x + y")
colz = df_nomissing.Take[int]("z")
// Assume a dataset with columns [idx, x] matching another dataset with
// columns [idx, y]. For idx == 42, the right-hand dataset has no match
ROOT::RDataFrame df{dataset};
auto df_nomissing = df.FilterAvailable("idx")
.Define("z", [](int x, int y) { return x + y; }, {"x", "y"});
auto colz = df_nomissing.Take<int>("z");
Note
See FilterMissing() if you want to keep only the entries with missing values instead.

Definition at line 345 of file RInterface.hxx.

◆ FilterMissing()

template<typename Proxied>
RInterface< RDFDetail::RFilterWithMissingValues< Proxied > > ROOT::RDF::RInterface< Proxied >::FilterMissing ( std::string_view column)
inline

Keep only the entries that have missing values.

Parameters
[in]columnColumn name whose entries with missing values should be kept
Returns
The filter node of the computation graph

This operation is useful in case an entry of the dataset is incomplete, i.e. if one or more of the columns do not have valid values. It only keeps the entries for which the value of the input column is missing.

Use cases include:

  • When processing multiple files, one or more of them is missing a column
  • In horizontal joining with entry matching, a certain dataset has no match for the current entry.

Example usage:

# Assume a dataset made of two files vertically chained together, one has
# column "x" and the other has column "y"
df = ROOT.RDataFrame(dataset)
df_valid_col_x = df.FilterMissing("y")
df_valid_col_y = df.FilterMissing("x")
display_x = df_valid_col_x.Display(("x",))
display_y = df_valid_col_y.Display(("y",))
// Assume a dataset made of two files vertically chained together, one has
// column "x" and the other has column "y"
ROOT.RDataFrame df{dataset};
auto df_valid_col_x = df.FilterMissing("y");
auto df_valid_col_y = df.FilterMissing("x");
auto display_x = df_valid_col_x.Display<int>({"x"});
auto display_y = df_valid_col_y.Display<int>({"y"});
Note
See FilterAvailable() if you want to discard the entries in case there is a missing value instead.

Definition at line 396 of file RInterface.hxx.

◆ operator RNode()

template<typename Proxied>
ROOT::RDF::RInterface< Proxied >::operator RNode ( ) const
inline

Cast any RDataFrame node to a common type ROOT::RDF::RNode.

Different RDataFrame methods return different C++ types. All nodes, however, can be cast to this common type at the cost of a small performance penalty. This allows, for example, storing RDataFrame nodes in a vector, or passing them around via (non-template, C++11) helper functions. Example usage:

// a function that conditionally adds a Range to a RDataFrame node.
RNode MaybeAddRange(RNode df, bool mustAddRange)
{
return mustAddRange ? df.Range(1) : df;
}
// use as :
auto maybeRanged = MaybeAddRange(df, true);
RInterface<::ROOT::Detail::RDF::RNodeBase > RNode

Note that it is not a problem to pass RNode's by value.

Definition at line 187 of file RInterface.hxx.

◆ operator=() [1/2]

template<typename Proxied>
RInterface & ROOT::RDF::RInterface< Proxied >::operator= ( const RInterface< Proxied > & )
default

Copy-assignment operator for RInterface.

◆ operator=() [2/2]

template<typename Proxied>
RInterface & ROOT::RDF::RInterface< Proxied >::operator= ( RInterface< Proxied > && )
default

Move-assignment operator for RInterface.

◆ Redefine() [1/2]

template<typename Proxied>
template<typename F, std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::Redefine ( std::string_view name,
F expression,
const ColumnNames_t & columns = {} )
inline

Overwrite the value and/or type of an existing column.

Parameters
[in]nameThe name of the column to redefine.
[in]expressionFunction, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
[in]columnsNames of the columns/branches in input to the expression.
Returns
the first node of the computation graph for which the quantity is redefined.

The old value of the column can be used as an input for the expression.

An exception is thrown in case the column to redefine does not already exist. See Define() for more information.

Definition at line 569 of file RInterface.hxx.

◆ Redefine() [2/2]

template<typename Proxied>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::Redefine ( std::string_view name,
std::string_view expression )
inline

Overwrite the value and/or type of an existing column.

Parameters
[in]nameThe name of the column to redefine.
[in]expressionAn expression in C++ which represents the defined value
Returns
the first node of the computation graph for which the new quantity is defined.

The expression is just-in-time compiled and used to produce the column entries. It must be valid C++ syntax in which variable names are substituted with the names of branches/columns.

The old value of the column can be used as an input for the expression. An exception is thrown in case the column to re-define does not already exist.

Aliases cannot be overridden. See the corresponding Define() overload for more information.

Definition at line 627 of file RInterface.hxx.

◆ RedefineSlot()

template<typename Proxied>
template<typename F>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::RedefineSlot ( std::string_view name,
F expression,
const ColumnNames_t & columns = {} )
inline

Overwrite the value and/or type of an existing column.

Parameters
[in]nameThe name of the column to redefine.
[in]expressionFunction, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
[in]columnsNames of the columns/branches in input to the producer function (excluding slot).
Returns
the first node of the computation graph for which the new quantity is defined.

The old value of the column can be used as an input for the expression. An exception is thrown in case the column to redefine does not already exist.

See DefineSlot() for more information.

Definition at line 588 of file RInterface.hxx.

◆ RedefineSlotEntry()

template<typename Proxied>
template<typename F>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::RedefineSlotEntry ( std::string_view name,
F expression,
const ColumnNames_t & columns = {} )
inline

Overwrite the value and/or type of an existing column.

Parameters
[in]nameThe name of the column to redefine.
[in]expressionFunction, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
[in]columnsNames of the columns/branches in input to the producer function (excluding slot and entry).
Returns
the first node of the computation graph for which the new quantity is defined.

The old value of the column can be used as an input for the expression. An exception is thrown in case the column to re-define does not already exist.

See DefineSlotEntry() for more information.

Definition at line 607 of file RInterface.hxx.

◆ Vary() [1/4]

template<typename Proxied>
template<typename F>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::Vary ( const std::vector< std::string > & colNames,
F && expression,
const ColumnNames_t & inputColumns,
const std::vector< std::string > & variationTags,
std::string_view variationName )
inline

Register systematic variations for multiple existing columns using custom variation tags.

Parameters
[in]colNamesset of names of the columns for which varied values are provided.
[in]expressiona callable that evaluates the varied values for the specified columns. The callable can take any column values as input, similarly to what happens during Filter and Define calls. It must return an RVec of varied values, one for each variation tag, in the same order as the tags.
[in]inputColumnsthe names of the columns to be passed to the callable.
[in]variationTagsnames for each of the varied values, e.g. "up" and "down".
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation"

This overload of Vary takes a list of column names as first argument and requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each affected column. The variationTags are defined as {"down", "up"}.

Example usage:

// produce variations "ptAndEta:down" and "ptAndEta:up"
auto nominal_hx =
df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
[](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
{"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied
{"down", "up"}, // variation tags
"ptAndEta") // variation name
.Histo1D("pt", "eta");
hx["nominal"].Draw();
hx["ptAndEta:down"].Draw("SAME");
hx["ptAndEta:up"].Draw("SAME");
TPaveText * pt
RResultMap< T > VariationsFor(RResultPtr< T > resPtr)
Produce all required systematic variations for the given result.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action).
Note
See also This Vary() overload for more information.

Definition at line 954 of file RInterface.hxx.

◆ Vary() [2/4]

template<typename Proxied>
template<typename F>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::Vary ( std::initializer_list< std::string > colNames,
F && expression,
const ColumnNames_t & inputColumns,
const std::vector< std::string > & variationTags,
std::string_view variationName )
inline

Register systematic variations for multiple existing columns using custom variation tags.

Parameters
[in]colNamesset of names of the columns for which varied values are provided.
[in]expressiona callable that evaluates the varied values for the specified columns. The callable can take any column values as input, similarly to what happens during Filter and Define calls. It must return an RVec of varied values, one for each variation tag, in the same order as the tags.
[in]inputColumnsthe names of the columns to be passed to the callable.
[in]variationTagsnames for each of the varied values, e.g. "up" and "down".
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation". colName is used if none is provided.
Note
This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list is avoided.
See also This Vary() overload for more information.

Definition at line 976 of file RInterface.hxx.

◆ Vary() [3/4]

template<typename Proxied>
template<typename F>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::Vary ( std::string_view colName,
F && expression,
const ColumnNames_t & inputColumns,
const std::vector< std::string > & variationTags,
std::string_view variationName = "" )
inline

Register systematic variations for a single existing column using custom variation tags.

Parameters
[in]colNamename of the column for which varied values are provided.
[in]expressiona callable that evaluates the varied values for the specified columns. The callable can take any column values as input, similarly to what happens during Filter and Define calls. It must return an RVec of varied values, one for each variation tag, in the same order as the tags.
[in]inputColumnsthe names of the columns to be passed to the callable.
[in]variationTagsnames for each of the varied values, e.g. "up" and "down".
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation".

Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for results that depend on any varied quantity, a map/dictionary of varied results can be produced with ROOT::RDF::Experimental::VariationsFor (see the example below).

The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and values for each of the systematic variations that affected the result (via upstream Filters or via direct or indirect dependencies of the column values on some registered variations). The keys will be a composition of variation names and tags, e.g. "pt:up" and "pt:down" for the example below.

In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt. We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"):

auto nominal_hx =
df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"})
.Filter("pt > k")
.Define("x", someFunc, {"pt"})
.Histo1D("x");
hx["nominal"].Draw();
hx["pt:down"].Draw("SAME");
hx["pt:up"].Draw("SAME");
RInterface< RDFDetail::RFilter< F, Proxied > > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
ROOT::VecOps::RVec< double > RVecD
Definition RVec.hxx:3789

RDataFrame computes all variations as part of a single loop over the data. In particular, this means that I/O and computation of values shared among variations only happen once for all variations. Thus, the event loop run-time typically scales much better than linearly with the number of variations.

RDataFrame lazily computes the varied values required to produce the outputs of VariationsFor(). If VariationsFor() was not called for a result, the computations are only run for the nominal case.

See other overloads for examples when variations are added for multiple existing columns, or when the tags are auto-generated instead of being directly defined.

Definition at line 867 of file RInterface.hxx.

◆ Vary() [4/4]

template<typename Proxied>
template<typename F>
RInterface< Proxied > ROOT::RDF::RInterface< Proxied >::Vary ( std::string_view colName,
F && expression,
const ColumnNames_t & inputColumns,
std::size_t nVariations,
std::string_view variationName = "" )
inline

Register systematic variations for a single existing column using auto-generated variation tags.

Parameters
[in]colNamename of the column for which varied values are provided.
[in]expressiona callable that evaluates the varied values for the specified columns. The callable can take any column values as input, similarly to what happens during Filter and Define calls. It must return an RVec of varied values, one for each variation tag, in the same order as the tags.
[in]inputColumnsthe names of the columns to be passed to the callable.
[in]nVariationsnumber of variations returned by the expression. The corresponding tags will be "0", "1", etc.
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation". colName is used if none is provided.

This overload of Vary takes an nVariations parameter instead of a list of tag names. The varied results will be accessible via the keys of the dictionary with the form variationName:N where N is the corresponding sequential tag starting at 0 and going up to nVariations - 1.

Example usage:

auto nominal_hx =
df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, 2)
.Histo1D("x");
hx["nominal"].Draw();
hx["x:0"].Draw("SAME");
hx["x:1"].Draw("SAME");
Note
See also This Vary() overload for more information.

Definition at line 906 of file RInterface.hxx.

Friends And Related Symbol Documentation

◆ RInterface

template<typename Proxied>
template<typename T>
friend class RInterface
friend

Definition at line 132 of file RInterface.hxx.

◆ RDFInternal::ChangeBeginAndEndEntries

template<typename Proxied>
void RDFInternal::ChangeBeginAndEndEntries ( const RNode & node,
Long64_t start,
Long64_t end )
friend

◆ RDFInternal::ChangeEmptyEntryRange

template<typename Proxied>
void RDFInternal::ChangeEmptyEntryRange ( const RNode & node,
std::pair< ULong64_t, ULong64_t > && newRange )
friend

◆ RDFInternal::ChangeSpec

template<typename Proxied>
void RDFInternal::ChangeSpec ( const RNode & node,
ROOT::RDF::Experimental::RDatasetSpec && spec )
friend

◆ RDFInternal::GetDatasetGlobalClusterBoundaries

template<typename Proxied>
std::vector< std::pair< std::uint64_t, std::uint64_t > > RDFInternal::GetDatasetGlobalClusterBoundaries ( const RNode & node)
friend

◆ RDFInternal::GraphDrawing::GraphCreatorHelper

template<typename Proxied>
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
friend

Definition at line 129 of file RInterface.hxx.

◆ RDFInternal::TriggerRun

template<typename Proxied>
void RDFInternal::TriggerRun ( RNode node)
friend

◆ ROOT::Internal::RDF::GetDataSourceLabel

template<typename Proxied>
std::string ROOT::Internal::RDF::GetDataSourceLabel ( const RNode & node)
friend

◆ ROOT::Internal::RDF::SetTTreeLifeline

template<typename Proxied>
void ROOT::Internal::RDF::SetTTreeLifeline ( ROOT::RDF::RNode & node,
std::any lifeline )
friend

Member Data Documentation

◆ fProxiedPtr

template<typename Proxied>
std::shared_ptr<Proxied> ROOT::RDF::RInterface< Proxied >::fProxiedPtr
private

Smart pointer to the graph node encapsulated by this RInterface.

Definition at line 142 of file RInterface.hxx.

◆ true

template<typename Proxied>
ROOT::RDF::RInterface< Proxied >::true

Register systematic variations for multiple existing columns using auto-generated tags.

Parameters
[in]colNamesset of names of the columns for which varied values are provided.
[in]expressiona callable that evaluates the varied values for the specified columns. The callable can take any column values as input, similarly to what happens during Filter and Define calls. It must return an RVec of varied values, one for each variation tag, in the same order as the tags.
[in]inputColumnsthe names of the columns to be passed to the callable.
[in]nVariationsnumber of variations returned by the expression. The corresponding tags will be "0", "1", etc.
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation". colName is used if none is provided.

This overload of Vary takes a list of column names as first argument. It takes an nVariations parameter instead of a list of tag names (variationTags). Tag names will be auto-generated as the sequence 0...nVariations-1. / / Example usage: / ~~~{.cpp} / auto nominal_hx = / df.Vary({"pt", "eta"}, // the columns that will vary simultaneously / [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; }, / {"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied / 2, // auto-generated variation tags / "ptAndEta") // variation name / .Histo1D("pt", "eta"); / / auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); / hx["nominal"].Draw(); / hx["ptAndEta:0"].Draw("SAME"); / hx["ptAndEta:1"].Draw("SAME"); / ~~~ / /

Note
See also This Vary() overload for more information. template <typename F> RInterface<Proxied> Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName) { R__ASSERT(nVariations > 0 && "Must have at least one variation.");

std::vector<std::string> variationTags; variationTags.reserve(nVariations); for (std::size_t i = 0u; i < nVariations; ++i) variationTags.emplace_back(std::to_string(i));

return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName); }

/ Register systematic variations for for multiple existing columns using custom variation tags. /

Parameters
[in]colNamesset of names of the columns for which varied values are provided. /
[in]expressiona callable that evaluates the varied values for the specified columns. The callable can / take any column values as input, similarly to what happens during Filter and Define calls. It must / return an RVec of varied values, one for each variation tag, in the same order as the tags. /
[in]inputColumnsthe names of the columns to be passed to the callable. /
[in]inputColumnsthe names of the columns to be passed to the callable. /
[in]nVariationsnumber of variations returned by the expression. The corresponding tags will be "0", / "1", etc. /
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation". / colName is used if none is provided. / /
Note
This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list / is avoided. / /
See also This Vary() overload for more information. template <typename F> RInterface<Proxied> Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName) { return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, nVariations, variationName); }

/ Register systematic variations for a single existing column using custom variation tags. /

Parameters
[in]colNamename of the column for which varied values are provided. /
[in]expressiona string containing valid C++ code that evaluates to an RVec containing the varied / values for the specified column. /
[in]variationTagsnames for each of the varied values, e.g. "up" and "down". /
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation". / colName is used if none is provided. / / This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time / compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are / defined as {"down", "up"}. / ~~~{.cpp} / auto nominal_hx = / df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"}) / .Filter("pt > k") / .Define("x", someFunc, {"pt"}) / .Histo1D("x"); / / auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); / hx["nominal"].Draw(); / hx["pt:down"].Draw("SAME"); / hx["pt:up"].Draw("SAME"); / ~~~ / / ## Short-hand expression syntax / / For convenience, when a C++ expression is passed to Vary, the return type can be omitted if the string begins / with '{' and ends with '}' (whitespace, tab and newline characters are excluded from the search). This means that / the following is equivalent to the example above: / / ~~~{.cpp} / auto nominal_hx = / df.Vary("pt", "{pt*0.9, pt*1.1}", {"down", "up"}) / // Same as above / ~~~ / /
Note
See also This Vary() overload for more information. RInterface<Proxied> Vary(std::string_view colName, std::string_view expression, const std::vector<std::string> &variationTags, std::string_view variationName = "") { std::vector<std::string> colNames{{std::string(colName)}}; const std::string theVariationName{variationName.empty() ? colName : variationName};

return JittedVaryImpl(colNames, expression, variationTags, theVariationName, /*isSingleColumn=

Definition at line 1094 of file RInterface.hxx.

  • tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx
  • tree/dataframe/inc/ROOT/RDF/RInterface.hxx