Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df037_TTreeEventMatching.py File Reference

Detailed Description

View in nbviewer Open in SWAN

This example shows processing of a TTree-based dataset with horizontal concatenations (friends) and event matching (based on TTreeIndex). In case the current event being processed does not match one (or more) of the friend datasets, one can use the FilterAvailable and DefaultValueFor functionalities to act upon the situation.

import array
import os
import ROOT
"""A helper class to create the dataset for the tutorial below."""
main_file = "df037_TTreeEventMatching_py_main.root"
aux_file_1 = "df037_TTreeEventMatching_py_aux_1.root"
aux_file_2 = "df037_TTreeEventMatching_py_aux_2.root"
main_tree_name = "events"
aux_tree_name_1 = "auxdata_1"
aux_tree_name_2 = "auxdata_2"
def __init__(self):
with ROOT.TFile(self.main_file, "RECREATE"):
main_tree = ROOT.TTree(self.main_tree_name, self.main_tree_name)
idx = array.array("i", [0]) # any array can also be a numpy array
x = array.array("i", [0])
main_tree.Branch("idx", idx, "idx/I")
main_tree.Branch("x", x, "x/I")
idx[0] = 1
x[0] = 1
idx[0] = 2
x[0] = 2
idx[0] = 3
x[0] = 3
# The first auxiliary file has matching indices 1 and 2, but not 3
with ROOT.TFile(self.aux_file_1, "RECREATE"):
aux_tree_1 = ROOT.TTree(self.aux_tree_name_1, self.aux_tree_name_1)
idx = array.array("i", [0]) # any array can also be a numpy array
y = array.array("i", [0])
aux_tree_1.Branch("idx", idx, "idx/I")
aux_tree_1.Branch("y", y, "y/I")
idx[0] = 1
y[0] = 4
idx[0] = 2
y[0] = 5
# The second auxiliary file has matching indices 1 and 3, but not 2
with ROOT.TFile(self.aux_file_2, "RECREATE"):
aux_tree_2 = ROOT.TTree(self.aux_tree_name_2, self.aux_tree_name_2)
idx = array.array("i", [0]) # any array can also be a numpy array
z = array.array("i", [0])
aux_tree_2.Branch("idx", idx, "idx/I")
aux_tree_2.Branch("z", z, "z/I")
idx[0] = 1
z[0] = 6
idx[0] = 3
z[0] = 7
def __enter__(self):
return self
def __exit__(self, *_):
os.remove(self.main_file)
os.remove(self.aux_file_1)
os.remove(self.aux_file_2)
def df037_TTreeEventMatching(dataset: DatasetContext):
# The input dataset has one main TTree and two auxiliary. The 'idx' branch
# is used as the index to match events between the trees.
# - The main tree has 3 entries, with 'idx' values(1, 2, 3).
# - The first auxiliary tree has 2 entries, with 'idx' values(1, 2).
# - The second auxiliary tree has 2 entries, with 'idx' values(1, 3).
# The two auxiliary trees are concatenated horizontally with the main one.
main_chain.AddFriend(aux_chain_1)
main_chain.AddFriend(aux_chain_2)
# Create an RDataFrame to process the input dataset. The DefaultValueFor and
# FilterAvailable functionalities can be used to decide what to do for
# the events that do not match entirely according to the index column 'idx'
df = ROOT.RDataFrame(main_chain)
aux_tree_1_colidx = dataset.aux_tree_name_1 + ".idx"
aux_tree_1_coly = dataset.aux_tree_name_1 + ".y"
aux_tree_2_colidx = dataset.aux_tree_name_2 + ".idx"
aux_tree_2_colz = dataset.aux_tree_name_2 + ".z"
default_value = ROOT.std.numeric_limits[int].min()
# Example 1: provide default values for all columns in case there was no
# match
display_1 = (
df.DefaultValueFor(aux_tree_1_colidx, default_value)
.DefaultValueFor(aux_tree_1_coly, default_value)
.DefaultValueFor(aux_tree_2_colidx, default_value)
.DefaultValueFor(aux_tree_2_colz, default_value)
.Display(("idx", aux_tree_1_colidx, aux_tree_2_colidx, "x", aux_tree_1_coly, aux_tree_2_colz))
)
# Example 2: skip the entire entry when there was no match for a column
# in the first auxiliary tree, but keep the entries when there is no match
# in the second auxiliary tree and provide a default value for those
display_2 = (
df.DefaultValueFor(aux_tree_2_colidx, default_value)
.DefaultValueFor(aux_tree_2_colz, default_value)
.FilterAvailable(aux_tree_1_coly)
.Display(("idx", aux_tree_1_colidx, aux_tree_2_colidx, "x", aux_tree_1_coly, aux_tree_2_colz))
)
# Example 3: Keep entries from the main tree for which there is no
# corresponding match in entries of the first auxiliary tree
display_3 = df.FilterMissing(aux_tree_1_colidx).Display(("idx", "x"))
print("Example 1: provide default values for all columns")
print("Example 2: always skip the entry when there is no match")
print("Example 3: keep entries from the main tree for which there is no match in the auxiliary tree")
if __name__ == "__main__":
with DatasetContext() as dataset:
df037_TTreeEventMatching(dataset)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Example 1: provide default values for all columns
+-----+-----+---------------+---------------+---+-------------+-------------+
| Row | idx | auxdata_1.idx | auxdata_2.idx | x | auxdata_1.y | auxdata_2.z |
+-----+-----+---------------+---------------+---+-------------+-------------+
| 0 | 1 | 1 | 1 | 1 | 4 | 6 |
+-----+-----+---------------+---------------+---+-------------+-------------+
| 1 | 2 | 2 | -2147483648 | 2 | 5 | -2147483648 |
+-----+-----+---------------+---------------+---+-------------+-------------+
| 2 | 3 | -2147483648 | 3 | 3 | -2147483648 | 7 |
+-----+-----+---------------+---------------+---+-------------+-------------+
Example 2: always skip the entry when there is no match
+-----+-----+---------------+---------------+---+-------------+-------------+
| Row | idx | auxdata_1.idx | auxdata_2.idx | x | auxdata_1.y | auxdata_2.z |
+-----+-----+---------------+---------------+---+-------------+-------------+
| 0 | 1 | 1 | 1 | 1 | 4 | 6 |
+-----+-----+---------------+---------------+---+-------------+-------------+
| 1 | 2 | 2 | -2147483648 | 2 | 5 | -2147483648 |
+-----+-----+---------------+---------------+---+-------------+-------------+
Example 3: keep entries from the main tree for which there is no match in the auxiliary tree
+-----+-----+---+
| Row | idx | x |
+-----+-----+---+
| 2 | 3 | 3 |
+-----+-----+---+
Date
September 2024
Author
Vincenzo Eduardo Padulano (CERN)

Definition in file df037_TTreeEventMatching.py.