{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "90329eb6",
   "metadata": {},
   "source": [
    "# df037_TTreeEventMatching\n",
    "\n",
    "This example shows processing of a TTree-based dataset with horizontal\n",
    "concatenations (friends) and event matching (based on TTreeIndex). In case\n",
    "the current event being processed does not match one (or more) of the friend\n",
    "datasets, one can use the FilterAvailable and DefaultValueFor functionalities\n",
    "to act upon the situation.\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "**Author:** Vincenzo Eduardo Padulano (CERN)  \n",
    "<i><small>This notebook tutorial was automatically generated with <a href= \"https://github.com/root-project/root/blob/master/documentation/doxygen/converttonotebook.py\">ROOTBOOK-izer</a> from the macro found in the ROOT repository  on Tuesday, May 19, 2026 at 08:10 PM.</small></i>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e8a850c0",
   "metadata": {
    "collapsed": false,
    "execution": {
     "iopub.execute_input": "2026-05-19T20:10:20.877667Z",
     "iopub.status.busy": "2026-05-19T20:10:20.877524Z",
     "iopub.status.idle": "2026-05-19T20:10:20.882083Z",
     "shell.execute_reply": "2026-05-19T20:10:20.881316Z"
    }
   },
   "outputs": [],
   "source": [
    "%%cpp -d\n",
    "#include <TChain.h>\n",
    "#include <TFile.h>\n",
    "#include <TTree.h>\n",
    "#include <TTreeIndex.h>\n",
    "\n",
    "#include <ROOT/RDataFrame.hxx>\n",
    "\n",
    "#include <iostream>\n",
    "#include <numeric>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "66568d31",
   "metadata": {},
   "source": [
    "A helper class to create the dataset for the tutorial below."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "d2687c21",
   "metadata": {
    "collapsed": false,
    "execution": {
     "iopub.execute_input": "2026-05-19T20:10:20.883419Z",
     "iopub.status.busy": "2026-05-19T20:10:20.883301Z",
     "iopub.status.idle": "2026-05-19T20:10:21.276708Z",
     "shell.execute_reply": "2026-05-19T20:10:21.258746Z"
    }
   },
   "outputs": [],
   "source": [
    "struct Dataset {\n",
    "\n",
    "   constexpr static auto fMainFile{\"df037_TTreeEventMatching_C_main.root\"};\n",
    "   constexpr static auto fAuxFile1{\"df037_TTreeEventMatching_C_aux_1.root\"};\n",
    "   constexpr static auto fAuxFile2{\"df037_TTreeEventMatching_C_aux_2.root\"};\n",
    "   constexpr static auto fMainTreeName{\"events\"};\n",
    "   constexpr static auto fAuxTreeName1{\"auxdata_1\"};\n",
    "   constexpr static auto fAuxTreeName2{\"auxdata_2\"};\n",
    "\n",
    "   Dataset()\n",
    "   {\n",
    "      {\n",
    "         TFile f(fMainFile, \"RECREATE\");\n",
    "         TTree mainTree(fMainTreeName, fMainTreeName);\n",
    "         int idx;\n",
    "         int x;\n",
    "         mainTree.Branch(\"idx\", &idx, \"idx/I\");\n",
    "         mainTree.Branch(\"x\", &x, \"x/I\");\n",
    "\n",
    "         idx = 1;\n",
    "         x = 1;\n",
    "         mainTree.Fill();\n",
    "         idx = 2;\n",
    "         x = 2;\n",
    "         mainTree.Fill();\n",
    "         idx = 3;\n",
    "         x = 3;\n",
    "         mainTree.Fill();\n",
    "\n",
    "         mainTree.Write();\n",
    "      }\n",
    "      {\n",
    "         // The first auxiliary file has matching indices 1 and 2, but not 3\n",
    "         TFile f(fAuxFile1, \"RECREATE\");\n",
    "         TTree auxTree(fAuxTreeName1, fAuxTreeName1);\n",
    "         int y;\n",
    "         int idx;\n",
    "         auxTree.Branch(\"idx\", &idx, \"idx/I\");\n",
    "         auxTree.Branch(\"y\", &y, \"y/I\");\n",
    "\n",
    "         idx = 1;\n",
    "         y = 4;\n",
    "         auxTree.Fill();\n",
    "         idx = 2;\n",
    "         y = 5;\n",
    "         auxTree.Fill();\n",
    "\n",
    "         auxTree.Write();\n",
    "      }\n",
    "      {\n",
    "         // The second auxiliary file has matching indices 1 and 3, but not 2\n",
    "         TFile f(fAuxFile2, \"RECREATE\");\n",
    "         TTree auxTree(fAuxTreeName2, fAuxTreeName2);\n",
    "         int z;\n",
    "         int idx;\n",
    "         auxTree.Branch(\"idx\", &idx, \"idx/I\");\n",
    "         auxTree.Branch(\"z\", &z, \"z/I\");\n",
    "\n",
    "         idx = 1;\n",
    "         z = 6;\n",
    "         auxTree.Fill();\n",
    "         idx = 3;\n",
    "         z = 7;\n",
    "         auxTree.Fill();\n",
    "\n",
    "         auxTree.Write();\n",
    "      }\n",
    "   }\n",
    "\n",
    "   ~Dataset()\n",
    "   {\n",
    "      std::remove(fMainFile);\n",
    "      std::remove(fAuxFile1);\n",
    "      std::remove(fAuxFile2);\n",
    "   }\n",
    "};"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f9bf5bbf",
   "metadata": {},
   "source": [
    "Create the dataset: one main TTree and two auxiliary. The 'idx' branch\n",
    "is used as the index to match events between the trees.\n",
    "- The main tree has 3 entries, with 'idx' values (1, 2, 3).\n",
    "- The first auxiliary tree has 2 entries, with 'idx' values (1, 2).\n",
    "- The second auxiliary tree has 2 entries, with 'idx' values (1, 3).\n",
    "The two auxiliary trees are concatenated horizontally with the main one."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "7667308a",
   "metadata": {
    "collapsed": false,
    "execution": {
     "iopub.execute_input": "2026-05-19T20:10:21.286326Z",
     "iopub.status.busy": "2026-05-19T20:10:21.286158Z",
     "iopub.status.idle": "2026-05-19T20:10:21.489153Z",
     "shell.execute_reply": "2026-05-19T20:10:21.488708Z"
    }
   },
   "outputs": [],
   "source": [
    "Dataset dataset{};\n",
    "TChain mainChain{dataset.fMainTreeName};\n",
    "mainChain.Add(dataset.fMainFile);\n",
    "\n",
    "TChain auxChain1(dataset.fAuxTreeName1);\n",
    "auxChain1.Add(dataset.fAuxFile1);\n",
    "auxChain1.BuildIndex(\"idx\");\n",
    "\n",
    "TChain auxChain2(dataset.fAuxTreeName2);\n",
    "auxChain2.Add(dataset.fAuxFile2);\n",
    "auxChain2.BuildIndex(\"idx\");\n",
    "\n",
    "mainChain.AddFriend(&auxChain1);\n",
    "mainChain.AddFriend(&auxChain2);"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a292e73c",
   "metadata": {},
   "source": [
    "Create an RDataFrame to process the input dataset. The DefaultValueFor and\n",
    "FilterAvailable functionalities can be used to decide what to do for\n",
    "the events that do not match entirely according to the index column 'idx'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a377ed45",
   "metadata": {
    "collapsed": false,
    "execution": {
     "iopub.execute_input": "2026-05-19T20:10:21.499170Z",
     "iopub.status.busy": "2026-05-19T20:10:21.499046Z",
     "iopub.status.idle": "2026-05-19T20:10:21.702469Z",
     "shell.execute_reply": "2026-05-19T20:10:21.701528Z"
    }
   },
   "outputs": [],
   "source": [
    "ROOT::RDataFrame df{mainChain};\n",
    "\n",
    "const std::string auxTree1ColIdx = std::string(dataset.fAuxTreeName1) + \".idx\";\n",
    "const std::string auxTree1ColY = std::string(dataset.fAuxTreeName1) + \".y\";\n",
    "const std::string auxTree2ColIdx = std::string(dataset.fAuxTreeName2) + \".idx\";\n",
    "const std::string auxTree2ColZ = std::string(dataset.fAuxTreeName2) + \".z\";\n",
    "\n",
    "constexpr static auto defaultValue = std::numeric_limits<int>::min();"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d8015770",
   "metadata": {},
   "source": [
    "Example 1: provide default values for all columns in case there was no\n",
    "match"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "19ec2894",
   "metadata": {
    "collapsed": false,
    "execution": {
     "iopub.execute_input": "2026-05-19T20:10:21.703888Z",
     "iopub.status.busy": "2026-05-19T20:10:21.703770Z",
     "iopub.status.idle": "2026-05-19T20:10:23.177676Z",
     "shell.execute_reply": "2026-05-19T20:10:23.158649Z"
    }
   },
   "outputs": [],
   "source": [
    "auto display1 = df.DefaultValueFor(auxTree1ColIdx, defaultValue)\n",
    "                   .DefaultValueFor(auxTree1ColY, defaultValue)\n",
    "                   .DefaultValueFor(auxTree2ColIdx, defaultValue)\n",
    "                   .DefaultValueFor(auxTree2ColZ, defaultValue)\n",
    "                   .Display<int, int, int, int, int, int>(\n",
    "                      {\"idx\", auxTree1ColIdx, auxTree2ColIdx, \"x\", auxTree1ColY, auxTree2ColZ});"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a21dde56",
   "metadata": {},
   "source": [
    "Example 2: skip the entire entry when there was no match for a column\n",
    "in the first auxiliary tree, but keep the entries when there is no match\n",
    "in the second auxiliary tree and provide a default value for those"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a8581a18",
   "metadata": {
    "collapsed": false,
    "execution": {
     "iopub.execute_input": "2026-05-19T20:10:23.210155Z",
     "iopub.status.busy": "2026-05-19T20:10:23.209979Z",
     "iopub.status.idle": "2026-05-19T20:10:24.153192Z",
     "shell.execute_reply": "2026-05-19T20:10:24.152756Z"
    }
   },
   "outputs": [],
   "source": [
    "auto display2 = df.DefaultValueFor(auxTree2ColIdx, defaultValue)\n",
    "                   .DefaultValueFor(auxTree2ColZ, defaultValue)\n",
    "                   .FilterAvailable(auxTree1ColY)\n",
    "                   .Display<int, int, int, int, int, int>(\n",
    "                      {\"idx\", auxTree1ColIdx, auxTree2ColIdx, \"x\", auxTree1ColY, auxTree2ColZ});"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "35d9c89c",
   "metadata": {},
   "source": [
    "Example 3: Keep entries from the main tree for which there is no\n",
    "corresponding match in entries of the first auxiliary tree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b44a8b43",
   "metadata": {
    "collapsed": false,
    "execution": {
     "iopub.execute_input": "2026-05-19T20:10:24.159720Z",
     "iopub.status.busy": "2026-05-19T20:10:24.159553Z",
     "iopub.status.idle": "2026-05-19T20:10:24.836060Z",
     "shell.execute_reply": "2026-05-19T20:10:24.830413Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Example 1: provide default values for all columns\n",
      "+-----+-----+---------------+---------------+---+-------------+-------------+\n",
      "| Row | idx | auxdata_1.idx | auxdata_2.idx | x | auxdata_1.y | auxdata_2.z | \n",
      "+-----+-----+---------------+---------------+---+-------------+-------------+\n",
      "| 0   | 1   | 1             | 1             | 1 | 4           | 6           | \n",
      "+-----+-----+---------------+---------------+---+-------------+-------------+\n",
      "| 1   | 2   | 2             | -2147483648   | 2 | 5           | -2147483648 | \n",
      "+-----+-----+---------------+---------------+---+-------------+-------------+\n",
      "| 2   | 3   | -2147483648   | 3             | 3 | -2147483648 | 7           | \n",
      "+-----+-----+---------------+---------------+---+-------------+-------------+\n",
      "Example 2: skip the entry only when the first auxiliary tree does not match\n",
      "+-----+-----+---------------+---------------+---+-------------+-------------+\n",
      "| Row | idx | auxdata_1.idx | auxdata_2.idx | x | auxdata_1.y | auxdata_2.z | \n",
      "+-----+-----+---------------+---------------+---+-------------+-------------+\n",
      "| 0   | 1   | 1             | 1             | 1 | 4           | 6           | \n",
      "+-----+-----+---------------+---------------+---+-------------+-------------+\n",
      "| 1   | 2   | 2             | -2147483648   | 2 | 5           | -2147483648 | \n",
      "+-----+-----+---------------+---------------+---+-------------+-------------+\n",
      "Example 3: keep entries from the main tree for which there is no match in the auxiliary tree\n",
      "+-----+-----+---+\n",
      "| Row | idx | x | \n",
      "+-----+-----+---+\n",
      "| 2   | 3   | 3 | \n",
      "+-----+-----+---+\n"
     ]
    }
   ],
   "source": [
    "auto display3 = df.FilterMissing(auxTree1ColIdx).Display<int, int>({\"idx\", \"x\"});\n",
    "\n",
    "std::cout << \"Example 1: provide default values for all columns\\n\";\n",
    "display1->Print();\n",
    "std::cout << \"Example 2: skip the entry only when the first auxiliary tree does not match\\n\";\n",
    "display2->Print();\n",
    "std::cout << \"Example 3: keep entries from the main tree for which there is no match in the auxiliary tree\\n\";\n",
    "display3->Print();"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ROOT C++",
   "language": "c++",
   "name": "root"
  },
  "language_info": {
   "codemirror_mode": "text/x-c++src",
   "file_extension": ".C",
   "mimetype": " text/x-c++src",
   "name": "c++"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}