{ "cells": [ { "cell_type": "markdown", "id": "6221c36a", "metadata": {}, "source": [ "# rf404_categories\n", "Data and categories: working with RooCategory objects to describe discrete variables\n", "\n", "\n", "\n", "\n", "**Author:** Wouter Verkerke \n", "This notebook tutorial was automatically generated with ROOTBOOK-izer from the macro found in the ROOT repository on Tuesday, May 19, 2026 at 08:31 PM." ] }, { "cell_type": "code", "execution_count": 1, "id": "5b60baf1", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:41.441029Z", "iopub.status.busy": "2026-05-19T20:31:41.440921Z", "iopub.status.idle": "2026-05-19T20:31:41.455022Z", "shell.execute_reply": "2026-05-19T20:31:41.454518Z" } }, "outputs": [], "source": [ "%%cpp -d\n", "#include \"RooRealVar.h\"\n", "#include \"RooDataSet.h\"\n", "#include \"RooPolynomial.h\"\n", "#include \"RooCategory.h\"\n", "#include \"Roo1DTable.h\"\n", "#include \"RooGaussian.h\"\n", "#include \"TCanvas.h\"\n", "#include \"TAxis.h\"\n", "#include \"RooPlot.h\"\n", "#include \n", "using namespace RooFit;" ] }, { "cell_type": "markdown", "id": "fbfa77fc", "metadata": {}, "source": [ "Construct a category with labels\n", "----------------------------------------------------------------" ] }, { "cell_type": "markdown", "id": "9bf027d5", "metadata": {}, "source": [ "Define a category with labels only" ] }, { "cell_type": "code", "execution_count": 2, "id": "5d5586e9", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:41.456690Z", "iopub.status.busy": "2026-05-19T20:31:41.456560Z", "iopub.status.idle": "2026-05-19T20:31:41.813254Z", "shell.execute_reply": "2026-05-19T20:31:41.812653Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "RooCategory::tagCat = Lepton(idx = 0)\n", "\n" ] } ], "source": [ "RooCategory tagCat(\"tagCat\", \"Tagging category\");\n", "tagCat.defineType(\"Lepton\");\n", "tagCat.defineType(\"Kaon\");\n", "tagCat.defineType(\"NetTagger-1\");\n", "tagCat.defineType(\"NetTagger-2\");\n", "tagCat.Print();" ] }, { "cell_type": "markdown", "id": "928f99e2", "metadata": {}, "source": [ "Construct a category with labels and indices\n", "----------------------------------------------------------------------------------------" ] }, { "cell_type": "markdown", "id": "95ce7242", "metadata": {}, "source": [ "Define a category with explicitly numbered states" ] }, { "cell_type": "code", "execution_count": 3, "id": "06ac4811", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:41.814693Z", "iopub.status.busy": "2026-05-19T20:31:41.814559Z", "iopub.status.idle": "2026-05-19T20:31:42.025945Z", "shell.execute_reply": "2026-05-19T20:31:42.025365Z" } }, "outputs": [], "source": [ "RooCategory b0flav(\"b0flav\", \"B0 flavour eigenstate\");\n", "b0flav[\"B0\"] = -1;\n", "b0flav[\"B0bar\"] = 1;" ] }, { "cell_type": "markdown", "id": "67b63a9b", "metadata": {}, "source": [ "Print it in \"verbose\" mode to see all states." ] }, { "cell_type": "code", "execution_count": 4, "id": "ac10d785", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:42.027446Z", "iopub.status.busy": "2026-05-19T20:31:42.027327Z", "iopub.status.idle": "2026-05-19T20:31:42.232675Z", "shell.execute_reply": "2026-05-19T20:31:42.232210Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- RooAbsArg ---\n", " Value State: clean\n", " Shape State: clean\n", " Attributes: \n", " Address: 0x7f70162ca000\n", " Clients: \n", " Servers: \n", " Proxies: \n", "--- RooAbsCategory ---\n", " Value = -1 \"B0)\n", " Possible states:\n", " B0\t-1\n", " B0bar\t1\n" ] } ], "source": [ "b0flav.Print(\"V\");" ] }, { "cell_type": "markdown", "id": "200bb1ab", "metadata": {}, "source": [ "Alternatively, define many states at once. The function takes\n", "a map with std::string --> index mapping." ] }, { "cell_type": "code", "execution_count": 5, "id": "af8a1fe2", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:42.234152Z", "iopub.status.busy": "2026-05-19T20:31:42.234043Z", "iopub.status.idle": "2026-05-19T20:31:42.436261Z", "shell.execute_reply": "2026-05-19T20:31:42.435844Z" } }, "outputs": [], "source": [ "RooCategory largeCat(\"largeCat\", \"A category with many states\");\n", "largeCat.defineTypes({\n", " {\"A\", 0}, {\"b\", 2}, {\"c\", 8}, {\"dee\", 4},\n", " {\"F\", 133}, {\"g\", 15}, {\"H\", -20}\n", "});" ] }, { "cell_type": "markdown", "id": "5059d6ff", "metadata": {}, "source": [ "Iterate, query and set states\n", "--------------------------------------------------------" ] }, { "cell_type": "markdown", "id": "71de3e88", "metadata": {}, "source": [ "One can iterate through the {index,name} pair of category objects" ] }, { "cell_type": "code", "execution_count": 6, "id": "ca9a0108", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:42.438335Z", "iopub.status.busy": "2026-05-19T20:31:42.438209Z", "iopub.status.idle": "2026-05-19T20:31:42.664047Z", "shell.execute_reply": "2026-05-19T20:31:42.655655Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "This is the for loop over states of 'largeCat':\n", "\tA\t0\n", "\tF\t133\n", "\tH\t-20\n", "\tb\t2\n", "\tc\t8\n", "\tdee\t4\n", "\tg\t15\n", "\n" ] } ], "source": [ "std::cout << \"\\nThis is the for loop over states of 'largeCat':\";\n", "for (const auto& idxAndName : largeCat)\n", " std::cout << \"\\n\\t\" << idxAndName.first << \"\\t\" << idxAndName.second;\n", "std::cout << '\\n' << std::endl;" ] }, { "cell_type": "markdown", "id": "17e148db", "metadata": {}, "source": [ "To ask whether a state is valid use:" ] }, { "cell_type": "code", "execution_count": 7, "id": "c477a6ce", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:42.665789Z", "iopub.status.busy": "2026-05-19T20:31:42.665642Z", "iopub.status.idle": "2026-05-19T20:31:42.873857Z", "shell.execute_reply": "2026-05-19T20:31:42.873310Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Has label 'A': 1\n", "Has index '-20': 1" ] } ], "source": [ "std::cout << \"Has label 'A': \" << largeCat.hasLabel(\"A\");\n", "std::cout << \"\\nHas index '-20': \" << largeCat.hasIndex(-20);" ] }, { "cell_type": "markdown", "id": "dbdd0bc0", "metadata": {}, "source": [ "To retrieve names or state numbers:" ] }, { "cell_type": "code", "execution_count": 8, "id": "ab5e7da0", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:42.875380Z", "iopub.status.busy": "2026-05-19T20:31:42.875267Z", "iopub.status.idle": "2026-05-19T20:31:43.094137Z", "shell.execute_reply": "2026-05-19T20:31:43.081382Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Label corresponding to '2' is b\n", "Index corresponding to 'A' is 0" ] } ], "source": [ "std::cout << \"\\nLabel corresponding to '2' is \" << largeCat.lookupName(2);\n", "std::cout << \"\\nIndex corresponding to 'A' is \" << largeCat.lookupIndex(\"A\");" ] }, { "cell_type": "markdown", "id": "526361f5", "metadata": {}, "source": [ "To get the current state:" ] }, { "cell_type": "code", "execution_count": 9, "id": "d926543a", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:43.096365Z", "iopub.status.busy": "2026-05-19T20:31:43.096231Z", "iopub.status.idle": "2026-05-19T20:31:43.306340Z", "shell.execute_reply": "2026-05-19T20:31:43.305842Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Current index is 0\n", "Current label is A\n" ] } ], "source": [ "std::cout << \"\\nCurrent index is \" << largeCat.getCurrentIndex();\n", "std::cout << \"\\nCurrent label is \" << largeCat.getCurrentLabel();\n", "std::cout << std::endl;" ] }, { "cell_type": "markdown", "id": "c0f83f33", "metadata": {}, "source": [ "To set the state, use one of the two:" ] }, { "cell_type": "code", "execution_count": 10, "id": "3db24b0c", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:43.307852Z", "iopub.status.busy": "2026-05-19T20:31:43.307738Z", "iopub.status.idle": "2026-05-19T20:31:43.513048Z", "shell.execute_reply": "2026-05-19T20:31:43.512390Z" } }, "outputs": [], "source": [ "largeCat.setIndex(8);\n", "largeCat.setLabel(\"c\");" ] }, { "cell_type": "markdown", "id": "ee078bfd", "metadata": {}, "source": [ "Generate dummy data for tabulation demo\n", "----------------------------------------------------------------------------" ] }, { "cell_type": "markdown", "id": "0020ae8e", "metadata": {}, "source": [ "Generate a dummy dataset" ] }, { "cell_type": "code", "execution_count": 11, "id": "be9da4ef", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:43.515103Z", "iopub.status.busy": "2026-05-19T20:31:43.514988Z", "iopub.status.idle": "2026-05-19T20:31:43.722947Z", "shell.execute_reply": "2026-05-19T20:31:43.722656Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "input_line_58:3:1: warning: 'data' shadows a declaration with the same name in the 'std' namespace; use '::data' to reference this declaration\n", "std::unique_ptr data{RooPolynomial(\"p\", \"p\", x).generate({x, b0flav, tagCat}, 10000)};\n", "^\n" ] } ], "source": [ "RooRealVar x(\"x\", \"x\", 0, 10);\n", "std::unique_ptr data{RooPolynomial(\"p\", \"p\", x).generate({x, b0flav, tagCat}, 10000)};" ] }, { "cell_type": "markdown", "id": "c3d25817", "metadata": {}, "source": [ "Print tables of category contents of datasets\n", "------------------------------------------------------------------------------------------" ] }, { "cell_type": "markdown", "id": "7c3b5dbb", "metadata": {}, "source": [ "Tables are equivalent of plots for categories" ] }, { "cell_type": "code", "execution_count": 12, "id": "a5166e06", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:43.724637Z", "iopub.status.busy": "2026-05-19T20:31:43.724501Z", "iopub.status.idle": "2026-05-19T20:31:43.930675Z", "shell.execute_reply": "2026-05-19T20:31:43.930124Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "input_line_59:2:23: error: reference to 'data' is ambiguous\n", " Roo1DTable *btable = data->table(b0flav);\n", " ^\n", "input_line_58:3:29: note: candidate found by name lookup is 'data'\n", "std::unique_ptr data{RooPolynomial(\"p\", \"p\", x).generate({x, b0flav, tagCat}, 10000)};\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:344:5: note: candidate found by name lookup is 'std::data'\n", " data(initializer_list<_Tp> __il) noexcept\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:312:5: note: candidate found by name lookup is 'std::data'\n", " data(_Container& __cont) noexcept(noexcept(__cont.data()))\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:323:5: note: candidate found by name lookup is 'std::data'\n", " data(const _Container& __cont) noexcept(noexcept(__cont.data()))\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:334:5: note: candidate found by name lookup is 'std::data'\n", " data(_Tp (&__array)[_Nm]) noexcept\n", " ^\n" ] } ], "source": [ "Roo1DTable *btable = data->table(b0flav);\n", "btable->Print();\n", "btable->Print(\"v\");" ] }, { "cell_type": "markdown", "id": "94dafa54", "metadata": {}, "source": [ "Create table for subset of events matching cut expression" ] }, { "cell_type": "code", "execution_count": 13, "id": "fd090c44", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:43.932338Z", "iopub.status.busy": "2026-05-19T20:31:43.932225Z", "iopub.status.idle": "2026-05-19T20:31:44.138385Z", "shell.execute_reply": "2026-05-19T20:31:44.137862Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "input_line_60:2:23: error: reference to 'data' is ambiguous\n", " Roo1DTable *ttable = data->table(tagCat, \"x>8.23\");\n", " ^\n", "input_line_58:3:29: note: candidate found by name lookup is 'data'\n", "std::unique_ptr data{RooPolynomial(\"p\", \"p\", x).generate({x, b0flav, tagCat}, 10000)};\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:344:5: note: candidate found by name lookup is 'std::data'\n", " data(initializer_list<_Tp> __il) noexcept\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:312:5: note: candidate found by name lookup is 'std::data'\n", " data(_Container& __cont) noexcept(noexcept(__cont.data()))\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:323:5: note: candidate found by name lookup is 'std::data'\n", " data(const _Container& __cont) noexcept(noexcept(__cont.data()))\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:334:5: note: candidate found by name lookup is 'std::data'\n", " data(_Tp (&__array)[_Nm]) noexcept\n", " ^\n" ] } ], "source": [ "Roo1DTable *ttable = data->table(tagCat, \"x>8.23\");\n", "ttable->Print();\n", "ttable->Print(\"v\");" ] }, { "cell_type": "markdown", "id": "a705ad8a", "metadata": {}, "source": [ "Create table for all (tagCat x b0flav) state combinations" ] }, { "cell_type": "code", "execution_count": 14, "id": "329e09b2", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:44.139903Z", "iopub.status.busy": "2026-05-19T20:31:44.139794Z", "iopub.status.idle": "2026-05-19T20:31:44.346068Z", "shell.execute_reply": "2026-05-19T20:31:44.345502Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "input_line_61:2:24: error: reference to 'data' is ambiguous\n", " Roo1DTable *bttable = data->table(RooArgSet(tagCat, b0flav));\n", " ^\n", "input_line_58:3:29: note: candidate found by name lookup is 'data'\n", "std::unique_ptr data{RooPolynomial(\"p\", \"p\", x).generate({x, b0flav, tagCat}, 10000)};\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:344:5: note: candidate found by name lookup is 'std::data'\n", " data(initializer_list<_Tp> __il) noexcept\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:312:5: note: candidate found by name lookup is 'std::data'\n", " data(_Container& __cont) noexcept(noexcept(__cont.data()))\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:323:5: note: candidate found by name lookup is 'std::data'\n", " data(const _Container& __cont) noexcept(noexcept(__cont.data()))\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:334:5: note: candidate found by name lookup is 'std::data'\n", " data(_Tp (&__array)[_Nm]) noexcept\n", " ^\n" ] } ], "source": [ "Roo1DTable *bttable = data->table(RooArgSet(tagCat, b0flav));\n", "bttable->Print(\"v\");" ] }, { "cell_type": "markdown", "id": "4afb39ef", "metadata": {}, "source": [ "Retrieve number of events from table\n", "Number can be non-integer if source dataset has weighed events" ] }, { "cell_type": "code", "execution_count": 15, "id": "94aac0ff", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:44.347756Z", "iopub.status.busy": "2026-05-19T20:31:44.347641Z", "iopub.status.idle": "2026-05-19T20:31:44.578748Z", "shell.execute_reply": "2026-05-19T20:31:44.578147Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "input_line_63:2:3: error: use of undeclared identifier 'btable'\n", " (btable->get(\"B0\"))\n", " ^\n", "Error in : Error evaluating expression (btable->get(\"B0\"))\n", "Execution of your code was aborted.\n" ] } ], "source": [ "double nb0 = btable->get(\"B0\");\n", "std::cout << \"Number of events with B0 flavor is \" << nb0 << std::endl;" ] }, { "cell_type": "markdown", "id": "aec8671d", "metadata": {}, "source": [ "Retrieve fraction of events with \"Lepton\" tag" ] }, { "cell_type": "code", "execution_count": 16, "id": "7d373c1c", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:44.580331Z", "iopub.status.busy": "2026-05-19T20:31:44.580214Z", "iopub.status.idle": "2026-05-19T20:31:44.788428Z", "shell.execute_reply": "2026-05-19T20:31:44.787852Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "input_line_65:2:3: error: use of undeclared identifier 'ttable'\n", " (ttable->getFrac(\"Lepton\"))\n", " ^\n", "Error in : Error evaluating expression (ttable->getFrac(\"Lepton\"))\n", "Execution of your code was aborted.\n" ] } ], "source": [ "double fracLep = ttable->getFrac(\"Lepton\");\n", "std::cout << \"Fraction of events tagged with Lepton tag is \" << fracLep << std::endl;" ] }, { "cell_type": "markdown", "id": "b93b9f5b", "metadata": {}, "source": [ "Defining ranges for plotting, fitting on categories\n", "------------------------------------------------------------------------------------------------------" ] }, { "cell_type": "markdown", "id": "1ab4993d", "metadata": {}, "source": [ "Define named range as comma separated list of labels" ] }, { "cell_type": "code", "execution_count": 17, "id": "2c8a34aa", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:44.789753Z", "iopub.status.busy": "2026-05-19T20:31:44.789641Z", "iopub.status.idle": "2026-05-19T20:31:44.994919Z", "shell.execute_reply": "2026-05-19T20:31:44.994457Z" } }, "outputs": [], "source": [ "tagCat.setRange(\"good\", \"Lepton,Kaon\");" ] }, { "cell_type": "markdown", "id": "2ea043c0", "metadata": {}, "source": [ "Or add state names one by one" ] }, { "cell_type": "code", "execution_count": 18, "id": "2fe65abb", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:45.016625Z", "iopub.status.busy": "2026-05-19T20:31:45.016467Z", "iopub.status.idle": "2026-05-19T20:31:45.221933Z", "shell.execute_reply": "2026-05-19T20:31:45.221269Z" } }, "outputs": [], "source": [ "tagCat.addToRange(\"soso\", \"NetTagger-1\");\n", "tagCat.addToRange(\"soso\", \"NetTagger-2\");" ] }, { "cell_type": "markdown", "id": "aba6e60d", "metadata": {}, "source": [ "Use category range in dataset reduction specification" ] }, { "cell_type": "code", "execution_count": 19, "id": "16cbeede", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:31:45.223716Z", "iopub.status.busy": "2026-05-19T20:31:45.223582Z", "iopub.status.idle": "2026-05-19T20:31:45.429766Z", "shell.execute_reply": "2026-05-19T20:31:45.429194Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "input_line_68:2:39: error: reference to 'data' is ambiguous\n", " std::unique_ptr goodData{data->reduce(CutRange(\"good\"))};\n", " ^\n", "input_line_58:3:29: note: candidate found by name lookup is 'data'\n", "std::unique_ptr data{RooPolynomial(\"p\", \"p\", x).generate({x, b0flav, tagCat}, 10000)};\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:344:5: note: candidate found by name lookup is 'std::data'\n", " data(initializer_list<_Tp> __il) noexcept\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:312:5: note: candidate found by name lookup is 'std::data'\n", " data(_Container& __cont) noexcept(noexcept(__cont.data()))\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:323:5: note: candidate found by name lookup is 'std::data'\n", " data(const _Container& __cont) noexcept(noexcept(__cont.data()))\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:334:5: note: candidate found by name lookup is 'std::data'\n", " data(_Tp (&__array)[_Nm]) noexcept\n", " ^\n" ] } ], "source": [ "std::unique_ptr goodData{data->reduce(CutRange(\"good\"))};\n", "static_cast(*goodData).table(tagCat)->Print(\"v\");" ] } ], "metadata": { "kernelspec": { "display_name": "ROOT C++", "language": "c++", "name": "root" }, "language_info": { "codemirror_mode": "text/x-c++src", "file_extension": ".C", "mimetype": " text/x-c++src", "name": "c++" } }, "nbformat": 4, "nbformat_minor": 5 }