{ "cells": [ { "cell_type": "markdown", "id": "f469ec00", "metadata": {}, "source": [ "# TMVARegressionApplication\n", "This macro provides a simple example on how to use the trained regression MVAs\n", "within an analysis module\n", "\n", " - Project : TMVA - a Root-integrated toolkit for multivariate data analysis\n", " - Package : TMVA\n", " - Executable: TMVARegressionApplication\n", "\n", "\n", "\n", "**Author:** Andreas Hoecker \n", "This notebook tutorial was automatically generated with ROOTBOOK-izer from the macro found in the ROOT repository on Tuesday, May 19, 2026 at 08:24 PM." ] }, { "cell_type": "code", "execution_count": 1, "id": "af4b5da2", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:17.479561Z", "iopub.status.busy": "2026-05-19T20:24:17.479441Z", "iopub.status.idle": "2026-05-19T20:24:17.490512Z", "shell.execute_reply": "2026-05-19T20:24:17.490052Z" } }, "outputs": [], "source": [ "%%cpp -d\n", "#include \n", "#include \n", "#include \n", "#include \n", "#include \n", "\n", "#include \"TFile.h\"\n", "#include \"TTree.h\"\n", "#include \"TString.h\"\n", "#include \"TSystem.h\"\n", "#include \"TROOT.h\"\n", "#include \"TStopwatch.h\"\n", "\n", "#include \"TMVA/Tools.h\"\n", "#include \"TMVA/Reader.h\"\n", "\n", "using namespace TMVA;" ] }, { "cell_type": "markdown", "id": "b0f6b05b", "metadata": {}, "source": [ " Arguments are defined. " ] }, { "cell_type": "code", "execution_count": 2, "id": "5deb6837", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:17.492262Z", "iopub.status.busy": "2026-05-19T20:24:17.492146Z", "iopub.status.idle": "2026-05-19T20:24:17.802922Z", "shell.execute_reply": "2026-05-19T20:24:17.802427Z" } }, "outputs": [], "source": [ "TString myMethodList = \"\";" ] }, { "cell_type": "markdown", "id": "63ce2dba", "metadata": {}, "source": [ "---------------------------------------------------------------\n", "This loads the library" ] }, { "cell_type": "code", "execution_count": 3, "id": "c6b8d7c3", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:17.812839Z", "iopub.status.busy": "2026-05-19T20:24:17.812679Z", "iopub.status.idle": "2026-05-19T20:24:18.020386Z", "shell.execute_reply": "2026-05-19T20:24:18.019778Z" } }, "outputs": [], "source": [ "TMVA::Tools::Instance();" ] }, { "cell_type": "markdown", "id": "1cdee9d2", "metadata": {}, "source": [ "Default MVA methods to be trained + tested" ] }, { "cell_type": "code", "execution_count": 4, "id": "baa74604", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:18.022463Z", "iopub.status.busy": "2026-05-19T20:24:18.022346Z", "iopub.status.idle": "2026-05-19T20:24:18.228754Z", "shell.execute_reply": "2026-05-19T20:24:18.227969Z" } }, "outputs": [], "source": [ "std::map Use;" ] }, { "cell_type": "markdown", "id": "4086073e", "metadata": {}, "source": [ "--- Mutidimensional likelihood and Nearest-Neighbour methods" ] }, { "cell_type": "code", "execution_count": 5, "id": "e45f9f0e", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:18.230837Z", "iopub.status.busy": "2026-05-19T20:24:18.230717Z", "iopub.status.idle": "2026-05-19T20:24:18.437008Z", "shell.execute_reply": "2026-05-19T20:24:18.436381Z" } }, "outputs": [], "source": [ "Use[\"PDERS\"] = 0;\n", "Use[\"PDEFoam\"] = 1;\n", "Use[\"KNN\"] = 1;" ] }, { "cell_type": "markdown", "id": "33f67cb4", "metadata": {}, "source": [ "--- Linear Discriminant Analysis" ] }, { "cell_type": "code", "execution_count": 6, "id": "91beae3b", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:18.439166Z", "iopub.status.busy": "2026-05-19T20:24:18.439047Z", "iopub.status.idle": "2026-05-19T20:24:18.645235Z", "shell.execute_reply": "2026-05-19T20:24:18.644640Z" } }, "outputs": [], "source": [ "Use[\"LD\"] = 1;" ] }, { "cell_type": "markdown", "id": "098486a2", "metadata": {}, "source": [ "--- Function Discriminant analysis" ] }, { "cell_type": "code", "execution_count": 7, "id": "2080dadc", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:18.657231Z", "iopub.status.busy": "2026-05-19T20:24:18.657101Z", "iopub.status.idle": "2026-05-19T20:24:18.865055Z", "shell.execute_reply": "2026-05-19T20:24:18.864641Z" } }, "outputs": [], "source": [ "Use[\"FDA_GA\"] = 0;\n", "Use[\"FDA_MC\"] = 0;\n", "Use[\"FDA_MT\"] = 0;\n", "Use[\"FDA_GAMT\"] = 0;" ] }, { "cell_type": "markdown", "id": "eb022dda", "metadata": {}, "source": [ "--- Neural Network" ] }, { "cell_type": "code", "execution_count": 8, "id": "52b43f1b", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:18.878882Z", "iopub.status.busy": "2026-05-19T20:24:18.878745Z", "iopub.status.idle": "2026-05-19T20:24:19.086110Z", "shell.execute_reply": "2026-05-19T20:24:19.085655Z" } }, "outputs": [], "source": [ "Use[\"MLP\"] = 0;" ] }, { "cell_type": "markdown", "id": "1908e82e", "metadata": {}, "source": [ "Deep neural network" ] }, { "cell_type": "code", "execution_count": 9, "id": "06fb2954", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:19.093207Z", "iopub.status.busy": "2026-05-19T20:24:19.093075Z", "iopub.status.idle": "2026-05-19T20:24:19.298145Z", "shell.execute_reply": "2026-05-19T20:24:19.297587Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unbalanced braces. This cell was not processed.\n" ] } ], "source": [ "#ifdef R__HAS_TMVAGPU\n", "Use[\"DNN_GPU\"] = 1;\n", "Use[\"DNN_CPU\"] = 0;\n", "#else\n", "Use[\"DNN_GPU\"] = 0;\n", "#ifdef R__HAS_TMVACPU\n", "Use[\"DNN_CPU\"] = 1;\n", "#else\n", "Use[\"DNN_CPU\"] = 0;\n", "#endif\n", "#endif" ] }, { "cell_type": "markdown", "id": "9f7f2b2b", "metadata": {}, "source": [ "--- Support Vector Machine" ] }, { "cell_type": "code", "execution_count": 10, "id": "b7fb426a", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:19.300461Z", "iopub.status.busy": "2026-05-19T20:24:19.300335Z", "iopub.status.idle": "2026-05-19T20:24:19.506880Z", "shell.execute_reply": "2026-05-19T20:24:19.505895Z" } }, "outputs": [], "source": [ "Use[\"SVM\"] = 0;" ] }, { "cell_type": "markdown", "id": "47fe2587", "metadata": {}, "source": [ "--- Boosted Decision Trees" ] }, { "cell_type": "code", "execution_count": 11, "id": "1494eefd", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:19.508402Z", "iopub.status.busy": "2026-05-19T20:24:19.508278Z", "iopub.status.idle": "2026-05-19T20:24:19.736769Z", "shell.execute_reply": "2026-05-19T20:24:19.723345Z" } }, "outputs": [], "source": [ "Use[\"BDT\"] = 0;\n", "Use[\"BDTG\"] = 1;" ] }, { "cell_type": "markdown", "id": "c4f5e5c4", "metadata": {}, "source": [ "---------------------------------------------------------------" ] }, { "cell_type": "code", "execution_count": 12, "id": "a542e2fc", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:19.741343Z", "iopub.status.busy": "2026-05-19T20:24:19.741208Z", "iopub.status.idle": "2026-05-19T20:24:19.951175Z", "shell.execute_reply": "2026-05-19T20:24:19.950628Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "==> Start TMVARegressionApplication\n" ] } ], "source": [ "std::cout << std::endl;\n", "std::cout << \"==> Start TMVARegressionApplication\" << std::endl;" ] }, { "cell_type": "markdown", "id": "067bcaad", "metadata": {}, "source": [ "Select methods (don't look at this code - not of interest)" ] }, { "cell_type": "code", "execution_count": 13, "id": "e3b83cae", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:19.955798Z", "iopub.status.busy": "2026-05-19T20:24:19.955664Z", "iopub.status.idle": "2026-05-19T20:24:20.184468Z", "shell.execute_reply": "2026-05-19T20:24:20.176364Z" } }, "outputs": [], "source": [ "if (myMethodList != \"\") {\n", " for (std::map::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;\n", "\n", " std::vector mlist = gTools().SplitString( myMethodList, ',' );\n", " for (UInt_t i=0; i::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << \" \";\n", " std::cout << std::endl;\n", " return;\n", " }\n", " Use[regMethod] = 1;\n", " }\n", "}" ] }, { "cell_type": "markdown", "id": "ab6a0881", "metadata": {}, "source": [ "--------------------------------------------------------------------------------------------------" ] }, { "cell_type": "markdown", "id": "9299fb38", "metadata": {}, "source": [ "--- Create the Reader object" ] }, { "cell_type": "code", "execution_count": 14, "id": "588078db", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:20.187975Z", "iopub.status.busy": "2026-05-19T20:24:20.187849Z", "iopub.status.idle": "2026-05-19T20:24:20.398860Z", "shell.execute_reply": "2026-05-19T20:24:20.398419Z" } }, "outputs": [], "source": [ "TMVA::Reader *reader = new TMVA::Reader( \"!Color:!Silent\" );" ] }, { "cell_type": "markdown", "id": "d45d5b36", "metadata": {}, "source": [ "Create a set of variables and declare them to the reader\n", "- the variable names MUST corresponds in name and type to those given in the weight file(s) used" ] }, { "cell_type": "code", "execution_count": 15, "id": "3d470cfd", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:20.425426Z", "iopub.status.busy": "2026-05-19T20:24:20.425266Z", "iopub.status.idle": "2026-05-19T20:24:20.627690Z", "shell.execute_reply": "2026-05-19T20:24:20.627006Z" } }, "outputs": [], "source": [ "Float_t var1, var2;\n", "reader->AddVariable( \"var1\", &var1 );\n", "reader->AddVariable( \"var2\", &var2 );" ] }, { "cell_type": "markdown", "id": "7ca8f49b", "metadata": {}, "source": [ "Spectator variables declared in the training have to be added to the reader, too" ] }, { "cell_type": "code", "execution_count": 16, "id": "6339cc36", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:20.637415Z", "iopub.status.busy": "2026-05-19T20:24:20.637282Z", "iopub.status.idle": "2026-05-19T20:24:20.846277Z", "shell.execute_reply": "2026-05-19T20:24:20.845640Z" } }, "outputs": [], "source": [ "Float_t spec1,spec2;\n", "reader->AddSpectator( \"spec1:=var1*2\", &spec1 );\n", "reader->AddSpectator( \"spec2:=var1*3\", &spec2 );" ] }, { "cell_type": "markdown", "id": "377e663f", "metadata": {}, "source": [ "--- Book the MVA methods" ] }, { "cell_type": "code", "execution_count": 17, "id": "1b33085a", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:20.867479Z", "iopub.status.busy": "2026-05-19T20:24:20.867313Z", "iopub.status.idle": "2026-05-19T20:24:21.071599Z", "shell.execute_reply": "2026-05-19T20:24:21.070974Z" } }, "outputs": [], "source": [ "TString dir = \"datasetreg/weights/\";\n", "TString prefix = \"TMVARegression\";" ] }, { "cell_type": "markdown", "id": "e00036b0", "metadata": {}, "source": [ "Book method(s)" ] }, { "cell_type": "code", "execution_count": 18, "id": "7f22469a", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:21.074400Z", "iopub.status.busy": "2026-05-19T20:24:21.074279Z", "iopub.status.idle": "2026-05-19T20:24:21.577403Z", "shell.execute_reply": "2026-05-19T20:24:21.573282Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " : Booking \"BDTG method\" of type \"BDT\" from datasetreg/weights/TMVARegression_BDTG.weights.xml.\n", " : Reading weight file: datasetreg/weights/TMVARegression_BDTG.weights.xml\n", "
DataSetInfo : [Default] : Added class \"Regression\"\n", " : Booked classifier \"BDTG\" of type: \"BDT\"\n", " : Booking \"KNN method\" of type \"KNN\" from datasetreg/weights/TMVARegression_KNN.weights.xml.\n", " : Reading weight file: datasetreg/weights/TMVARegression_KNN.weights.xml\n", " : Creating kd-tree with 1000 events\n", " : Computing scale factor for 1d distributions: (ifrac, bottom, top) = (80%, 10%, 90%)\n", "
ModulekNN : Optimizing tree for 2 variables with 1000 values\n", " : Class 1 has 1000 events\n", " : Booked classifier \"KNN\" of type: \"KNN\"\n", " : Booking \"LD method\" of type \"LD\" from datasetreg/weights/TMVARegression_LD.weights.xml.\n", " : Reading weight file: datasetreg/weights/TMVARegression_LD.weights.xml\n", " : Booked classifier \"LD\" of type: \"LD\"\n", " : Booking \"PDEFoam method\" of type \"PDEFoam\" from datasetreg/weights/TMVARegression_PDEFoam.weights.xml.\n", " : Reading weight file: datasetreg/weights/TMVARegression_PDEFoam.weights.xml\n", " : Read foams from file: datasetreg/weights/TMVARegression_PDEFoam.weights_foams.root\n", " : Booked classifier \"PDEFoam\" of type: \"PDEFoam\"\n" ] } ], "source": [ "for (std::map::iterator it = Use.begin(); it != Use.end(); it++) {\n", " if (it->second) {\n", " TString methodName = it->first + \" method\";\n", " TString weightfile = dir + prefix + \"_\" + TString(it->first) + \".weights.xml\";\n", " reader->BookMVA( methodName, weightfile );\n", " }\n", "}" ] }, { "cell_type": "markdown", "id": "f18be2bc", "metadata": {}, "source": [ "Book output histograms" ] }, { "cell_type": "code", "execution_count": 19, "id": "4b32d1c6", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:21.593060Z", "iopub.status.busy": "2026-05-19T20:24:21.592905Z", "iopub.status.idle": "2026-05-19T20:24:21.805979Z", "shell.execute_reply": "2026-05-19T20:24:21.805532Z" } }, "outputs": [], "source": [ "TH1* hists[100];\n", "Int_t nhists = -1;\n", "for (std::map::iterator it = Use.begin(); it != Use.end(); it++) {\n", " TH1* h = new TH1F( it->first.c_str(), TString(it->first) + \" method\", 100, -100, 600 );\n", " if (it->second) hists[++nhists] = h;\n", "}\n", "nhists++;" ] }, { "cell_type": "markdown", "id": "30f3a119", "metadata": {}, "source": [ "Prepare input tree (this must be replaced by your data source)\n", "in this example, there is a toy tree with signal and one with background events\n", "we'll later on use only the \"signal\" events for the test in this example." ] }, { "cell_type": "code", "execution_count": 20, "id": "102109f5", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:21.822361Z", "iopub.status.busy": "2026-05-19T20:24:21.822223Z", "iopub.status.idle": "2026-05-19T20:24:22.047741Z", "shell.execute_reply": "2026-05-19T20:24:22.039626Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- TMVARegressionApp : Using input file: /github/home/ROOT-CI/build/tutorials/machine_learning/data/tmva_reg_example.root\n" ] } ], "source": [ "TFile *input(nullptr);\n", "TString fname = gROOT->GetTutorialDir() + \"/machine_learning/data/tmva_reg_example.root\";\n", "if (!gSystem->AccessPathName( fname )) {\n", " input = TFile::Open( fname ); // check if file in local directory exists\n", "}\n", "if (!input) {\n", " std::cout << \"ERROR: could not open data file\" << std::endl;\n", " exit(1);\n", "}\n", "std::cout << \"--- TMVARegressionApp : Using input file: \" << input->GetName() << std::endl;" ] }, { "cell_type": "markdown", "id": "6e94bdaa", "metadata": {}, "source": [ "--- Event loop" ] }, { "cell_type": "markdown", "id": "64ef7ba6", "metadata": {}, "source": [ "Prepare the tree\n", "- here the variable names have to corresponds to your tree\n", "- you can use the same variables as above which is slightly faster,\n", "but of course you can use different ones and copy the values inside the event loop" ] }, { "cell_type": "code", "execution_count": 21, "id": "eef4649c", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:22.057337Z", "iopub.status.busy": "2026-05-19T20:24:22.057203Z", "iopub.status.idle": "2026-05-19T20:24:23.426913Z", "shell.execute_reply": "2026-05-19T20:24:23.425678Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- Select signal sample\n", "--- Processing: 10000 events\n", "--- ... Processing event: 0\n", " : Rebuilding Dataset Default\n", "--- ... Processing event: 1000\n", "--- ... Processing event: 2000\n", "--- ... Processing event: 3000\n", "--- ... Processing event: 4000\n", "--- ... Processing event: 5000\n", "--- ... Processing event: 6000\n", "--- ... Processing event: 7000\n", "--- ... Processing event: 8000\n", "--- ... Processing event: 9000\n", "--- End of event loop: Real time 0:00:01, CP time 1.130\n" ] } ], "source": [ "TTree* theTree = (TTree*)input->Get(\"TreeR\");\n", "std::cout << \"--- Select signal sample\" << std::endl;\n", "theTree->SetBranchAddress( \"var1\", &var1 );\n", "theTree->SetBranchAddress( \"var2\", &var2 );\n", "\n", "std::cout << \"--- Processing: \" << theTree->GetEntries() << \" events\" << std::endl;\n", "TStopwatch sw;\n", "sw.Start();\n", "for (Long64_t ievt=0; ievtGetEntries();ievt++) {\n", "\n", " if (ievt%1000 == 0) {\n", " std::cout << \"--- ... Processing event: \" << ievt << std::endl;\n", " }\n", "\n", " theTree->GetEntry(ievt);\n", "\n", " // Retrieve the MVA target values (regression outputs) and fill into histograms\n", " // NOTE: EvaluateRegression(..) returns a vector for multi-target regression\n", "\n", " for (Int_t ih=0; ihGetTitle();\n", " Float_t val = (reader->EvaluateRegression( title ))[0];\n", " hists[ih]->Fill( val );\n", " }\n", "}\n", "sw.Stop();\n", "std::cout << \"--- End of event loop: \"; sw.Print();" ] }, { "cell_type": "markdown", "id": "0a2b13a2", "metadata": {}, "source": [ "--- Write histograms" ] }, { "cell_type": "code", "execution_count": 22, "id": "d5d535a2", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:23.428497Z", "iopub.status.busy": "2026-05-19T20:24:23.428362Z", "iopub.status.idle": "2026-05-19T20:24:23.634998Z", "shell.execute_reply": "2026-05-19T20:24:23.634512Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- Created root file: \"TMVARegApp.root\" containing the MVA output histograms\n", "==> TMVARegressionApplication is done!\n", "\n" ] } ], "source": [ "TFile *target = new TFile( \"TMVARegApp.root\",\"RECREATE\" );\n", "for (Int_t ih=0; ihWrite();\n", "target->Close();\n", "\n", "std::cout << \"--- Created root file: \\\"\" << target->GetName()\n", " << \"\\\" containing the MVA output histograms\" << std::endl;\n", "\n", "delete reader;\n", "\n", "std::cout << \"==> TMVARegressionApplication is done!\" << std::endl << std::endl;" ] } ], "metadata": { "kernelspec": { "display_name": "ROOT C++", "language": "c++", "name": "root" }, "language_info": { "codemirror_mode": "text/x-c++src", "file_extension": ".C", "mimetype": " text/x-c++src", "name": "c++" } }, "nbformat": 4, "nbformat_minor": 5 }