{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a511d585",
   "metadata": {},
   "source": [
    "# df037_TTreeEventMatching\n",
    "\n",
    "This example shows processing of a TTree-based dataset with horizontal\n",
    "concatenations (friends) and event matching (based on TTreeIndex). In case\n",
    "the current event being processed does not match one (or more) of the friend\n",
    "datasets, one can use the FilterAvailable and DefaultValueFor functionalities\n",
    "to act upon the situation.\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "**Author:** Vincenzo Eduardo Padulano (CERN)  \n",
    "<i><small>This notebook tutorial was automatically generated with <a href= \"https://github.com/root-project/root/blob/master/documentation/doxygen/converttonotebook.py\">ROOTBOOK-izer</a> from the macro found in the ROOT repository  on Tuesday, May 19, 2026 at 08:10 PM.</small></i>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a6872561",
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import array\n",
    "import os\n",
    "\n",
    "import ROOT\n",
    "\n",
    "\n",
    "class DatasetContext:\n",
    "    \"\"\"A helper class to create the dataset for the tutorial below.\"\"\"\n",
    "\n",
    "    main_file = \"df037_TTreeEventMatching_py_main.root\"\n",
    "    aux_file_1 = \"df037_TTreeEventMatching_py_aux_1.root\"\n",
    "    aux_file_2 = \"df037_TTreeEventMatching_py_aux_2.root\"\n",
    "    main_tree_name = \"events\"\n",
    "    aux_tree_name_1 = \"auxdata_1\"\n",
    "    aux_tree_name_2 = \"auxdata_2\"\n",
    "\n",
    "    def __init__(self):\n",
    "        with ROOT.TFile(self.main_file, \"RECREATE\"):\n",
    "            main_tree = ROOT.TTree(self.main_tree_name, self.main_tree_name)\n",
    "            idx = array.array(\"i\", [0])  # any array can also be a numpy array\n",
    "            x = array.array(\"i\", [0])\n",
    "            main_tree.Branch(\"idx\", idx, \"idx/I\")\n",
    "            main_tree.Branch(\"x\", x, \"x/I\")\n",
    "\n",
    "            idx[0] = 1\n",
    "            x[0] = 1\n",
    "            main_tree.Fill()\n",
    "            idx[0] = 2\n",
    "            x[0] = 2\n",
    "            main_tree.Fill()\n",
    "            idx[0] = 3\n",
    "            x[0] = 3\n",
    "            main_tree.Fill()\n",
    "\n",
    "            main_tree.Write()\n",
    "\n",
    "        # The first auxiliary file has matching indices 1 and 2, but not 3\n",
    "        with ROOT.TFile(self.aux_file_1, \"RECREATE\"):\n",
    "            aux_tree_1 = ROOT.TTree(self.aux_tree_name_1, self.aux_tree_name_1)\n",
    "            idx = array.array(\"i\", [0])  # any array can also be a numpy array\n",
    "            y = array.array(\"i\", [0])\n",
    "            aux_tree_1.Branch(\"idx\", idx, \"idx/I\")\n",
    "            aux_tree_1.Branch(\"y\", y, \"y/I\")\n",
    "\n",
    "            idx[0] = 1\n",
    "            y[0] = 4\n",
    "            aux_tree_1.Fill()\n",
    "            idx[0] = 2\n",
    "            y[0] = 5\n",
    "            aux_tree_1.Fill()\n",
    "\n",
    "            aux_tree_1.Write()\n",
    "\n",
    "        # The second auxiliary file has matching indices 1 and 3, but not 2\n",
    "        with ROOT.TFile(self.aux_file_2, \"RECREATE\"):\n",
    "            aux_tree_2 = ROOT.TTree(self.aux_tree_name_2, self.aux_tree_name_2)\n",
    "            idx = array.array(\"i\", [0])  # any array can also be a numpy array\n",
    "            z = array.array(\"i\", [0])\n",
    "            aux_tree_2.Branch(\"idx\", idx, \"idx/I\")\n",
    "            aux_tree_2.Branch(\"z\", z, \"z/I\")\n",
    "\n",
    "            idx[0] = 1\n",
    "            z[0] = 6\n",
    "            aux_tree_2.Fill()\n",
    "            idx[0] = 3\n",
    "            z[0] = 7\n",
    "            aux_tree_2.Fill()\n",
    "\n",
    "            aux_tree_2.Write()\n",
    "\n",
    "    def __enter__(self):\n",
    "        return self\n",
    "\n",
    "    def __exit__(self, *_):\n",
    "        os.remove(self.main_file)\n",
    "        os.remove(self.aux_file_1)\n",
    "        os.remove(self.aux_file_2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "02330bed",
   "metadata": {},
   "source": [
    "The input dataset has one main TTree and two auxiliary. The 'idx' branch\n",
    "is used as the index to match events between the trees.\n",
    "- The main tree has 3 entries, with 'idx' values(1, 2, 3).\n",
    "- The first auxiliary tree has 2 entries, with 'idx' values(1, 2).\n",
    "- The second auxiliary tree has 2 entries, with 'idx' values(1, 3).\n",
    "The two auxiliary trees are concatenated horizontally with the main one."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "df7f98cd",
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "main_chain = ROOT.TChain(dataset.main_tree_name)\n",
    "main_chain.Add(dataset.main_file)\n",
    "\n",
    "aux_chain_1 = ROOT.TChain(dataset.aux_tree_name_1)\n",
    "aux_chain_1.Add(dataset.aux_file_1)\n",
    "aux_chain_1.BuildIndex(\"idx\")\n",
    "\n",
    "aux_chain_2 = ROOT.TChain(dataset.aux_tree_name_2)\n",
    "aux_chain_2.Add(dataset.aux_file_2)\n",
    "aux_chain_2.BuildIndex(\"idx\")\n",
    "\n",
    "main_chain.AddFriend(aux_chain_1)\n",
    "main_chain.AddFriend(aux_chain_2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6856af70",
   "metadata": {},
   "source": [
    "Create an RDataFrame to process the input dataset. The DefaultValueFor and\n",
    "FilterAvailable functionalities can be used to decide what to do for\n",
    "the events that do not match entirely according to the index column 'idx'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb5b0d58",
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df = ROOT.RDataFrame(main_chain)\n",
    "\n",
    "aux_tree_1_colidx = dataset.aux_tree_name_1 + \".idx\"\n",
    "aux_tree_1_coly = dataset.aux_tree_name_1 + \".y\"\n",
    "aux_tree_2_colidx = dataset.aux_tree_name_2 + \".idx\"\n",
    "aux_tree_2_colz = dataset.aux_tree_name_2 + \".z\"\n",
    "\n",
    "default_value = ROOT.std.numeric_limits[int].min()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b5bc5332",
   "metadata": {},
   "source": [
    "Example 1: provide default values for all columns in case there was no\n",
    "match"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f0cff288",
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "display_1 = (\n",
    "    df.DefaultValueFor(aux_tree_1_colidx, default_value)\n",
    "    .DefaultValueFor(aux_tree_1_coly, default_value)\n",
    "    .DefaultValueFor(aux_tree_2_colidx, default_value)\n",
    "    .DefaultValueFor(aux_tree_2_colz, default_value)\n",
    "    .Display((\"idx\", aux_tree_1_colidx, aux_tree_2_colidx, \"x\", aux_tree_1_coly, aux_tree_2_colz))\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "19bdc6a8",
   "metadata": {},
   "source": [
    "Example 2: skip the entire entry when there was no match for a column\n",
    "in the first auxiliary tree, but keep the entries when there is no match\n",
    "in the second auxiliary tree and provide a default value for those"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6ac3ed21",
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "display_2 = (\n",
    "    df.DefaultValueFor(aux_tree_2_colidx, default_value)\n",
    "    .DefaultValueFor(aux_tree_2_colz, default_value)\n",
    "    .FilterAvailable(aux_tree_1_coly)\n",
    "    .Display((\"idx\", aux_tree_1_colidx, aux_tree_2_colidx, \"x\", aux_tree_1_coly, aux_tree_2_colz))\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dc507f84",
   "metadata": {},
   "source": [
    "Example 3: Keep entries from the main tree for which there is no\n",
    "corresponding match in entries of the first auxiliary tree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "47eb2c28",
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "display_3 = df.FilterMissing(aux_tree_1_colidx).Display((\"idx\", \"x\"))\n",
    "\n",
    "print(\"Example 1: provide default values for all columns\")\n",
    "display_1.Print()\n",
    "print(\"Example 2: always skip the entry when there is no match\")\n",
    "display_2.Print()\n",
    "print(\"Example 3: keep entries from the main tree for which there is no match in the auxiliary tree\")\n",
    "display_3.Print()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
