This tutorial illustrates the basic features of the RDataFrame class, a utility which allows to interact with data stored in TTrees following a functional-chain like approach.
void fill_tree(const char *treeName, const char *fileName)
{
d.Define(
"b1", [](
ULong64_t entry) ->
double {
return entry; }, {
"rdfentry_"})
.
Define(
"b2", [](
ULong64_t entry) ->
int {
return entry * entry; }, {
"rdfentry_"})
}
{
auto fileName = "df001_introduction.root";
auto treeName = "myTree";
fill_tree(treeName, fileName);
auto cutb1 = [](double b1) { return b1 < 5.; };
auto cutb1b2 = [](int b2, double b1) { return b2 % 2 && b1 < 4.; };
auto entries1 =
d.Filter(cutb1)
.Filter(cutb1b2, {"b2", "b1"})
std::cout << *entries1 << " entries passed all filters" << std::endl;
auto entries2 =
d.Filter(
"b1 < 5.").Count();
std::cout << *entries2 << " entries passed the string filter" << std::endl;
auto b1b2_cut =
d.Filter(cutb1b2, {
"b2",
"b1"});
auto minVal = b1b2_cut.Min();
auto maxVal = b1b2_cut.Max();
auto meanVal = b1b2_cut.Mean();
auto nonDefmeanVal = b1b2_cut.Mean("b2");
std::cout << "The mean is always included between the min and the max: " << *minVal << " <= " << *meanVal
<< " <= " << *maxVal << std::endl;
auto b1_cut =
d.Filter(cutb1);
auto b1Vec = b1_cut.Take<double>();
auto b1List = b1_cut.Take<
double, std::list<double>>();
std::cout << "Selected b1 entries" << std::endl;
for (auto b1_entry : *b1List)
std::cout << b1_entry << " ";
std::cout << std::endl;
std::cout << "The type of b1Vec is " << b1VecCl->GetName() << std::endl;
auto hist =
d.Filter(cutb1).Histo1D();
std::cout << "Filled h " << hist->GetEntries() << " times, mean: " << hist->GetMean() << std::endl;
TH1F h(
"h",
"h", 12, -1, 11);
d.Filter([](
int b2) {
return b2 % 2 == 0; }, {
"b2"}).
Foreach([&
h](
double b1) {
h.Fill(b1); });
std::cout <<
"Filled h with " <<
h.GetEntries() <<
" entries" << std::endl;
auto cutb1_result =
d.Filter(cutb1);
auto cutb1b2_result =
d.Filter(cutb1b2, {
"b2",
"b1"});
auto cutb1_cutb1b2_result = cutb1_result.Filter(cutb1b2, {"b2", "b1"});
auto evts_cutb1_result = cutb1_result.Count();
auto evts_cutb1b2_result = cutb1b2_result.Count();
auto evts_cutb1_cutb1b2_result = cutb1_cutb1b2_result.Count();
std::cout << "Events passing cutb1: " << *evts_cutb1_result << std::endl
<< "Events passing cutb1b2: " << *evts_cutb1b2_result << std::endl
<< "Events passing both: " << *evts_cutb1_cutb1b2_result << std::endl;
auto entries_sum =
d.Define(
"sum", [](
double b1,
int b2) {
return b2 + b1; }, {
"b1",
"b2"})
std::cout << *entries_sum << std::endl;
auto entries_sum2 =
d.Define(
"sum2",
"b1 + b2").Filter(
"sum2 > 4.2").Count();
std::cout << *entries_sum2 << std::endl;
auto printEntrySlot = [](
ULong64_t iEntry,
unsigned int slot) {
std::cout << "Entry: " << iEntry << " Slot: " << slot << std::endl;
};
d.Foreach(printEntrySlot, {
"rdfentry_",
"rdfslot_"});
return 0;
}
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
unsigned long long ULong64_t
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
1-D histogram with a float per channel (see TH1 documentation)
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
static uint64_t sum(uint64_t i)