{ "cells": [ { "cell_type": "markdown", "id": "93fa2560", "metadata": {}, "source": [ "# TMVACrossValidationApplication\n", "This macro provides an example of how to use TMVA for k-folds cross\n", "evaluation in application.\n", "\n", "This requires that CrossValidation was run with a deterministic split, such\n", "as `\"...:splitExpr=int([eventID])%int([numFolds]):...\"`.\n", "\n", "- Project : TMVA - a ROOT-integrated toolkit for multivariate data analysis\n", "- Package : TMVA\n", "- Root Macro: TMVACrossValidationApplication\n", "\n", "\n", "\n", "**Author:** Kim Albertsson (adapted from code originally by Andreas Hoecker) \n", "This notebook tutorial was automatically generated with ROOTBOOK-izer from the macro found in the ROOT repository on Tuesday, May 19, 2026 at 08:24 PM." ] }, { "cell_type": "code", "execution_count": 1, "id": "38c7526c", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:09.970871Z", "iopub.status.busy": "2026-05-19T20:24:09.970754Z", "iopub.status.idle": "2026-05-19T20:24:09.974751Z", "shell.execute_reply": "2026-05-19T20:24:09.974385Z" } }, "outputs": [], "source": [ "%%cpp -d\n", "#include \n", "#include \n", "#include \n", "#include \n", "\n", "#include \"TChain.h\"\n", "#include \"TFile.h\"\n", "#include \"TTree.h\"\n", "#include \"TString.h\"\n", "#include \"TObjString.h\"\n", "#include \"TSystem.h\"\n", "#include \"TROOT.h\"\n", "\n", "#include \"TMVA/Factory.h\"\n", "#include \"TMVA/DataLoader.h\"\n", "#include \"TMVA/Tools.h\"\n", "#include \"TMVA/TMVAGui.h\"" ] }, { "cell_type": "markdown", "id": "66ec7df1", "metadata": {}, "source": [ " Helper function to load data into TTrees.\n", " " ] }, { "cell_type": "code", "execution_count": 2, "id": "3c56e28e", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:09.979535Z", "iopub.status.busy": "2026-05-19T20:24:09.979397Z", "iopub.status.idle": "2026-05-19T20:24:09.992596Z", "shell.execute_reply": "2026-05-19T20:24:09.991421Z" } }, "outputs": [], "source": [ "%%cpp -d\n", "TTree *fillTree(TTree * tree, Int_t nPoints, Double_t offset, Double_t scale, UInt_t seed = 100)\n", "{\n", " TRandom3 rng(seed);\n", " Float_t x = 0;\n", " Float_t y = 0;\n", " Int_t eventID = 0;\n", "\n", " tree->SetBranchAddress(\"x\", &x);\n", " tree->SetBranchAddress(\"y\", &y);\n", " tree->SetBranchAddress(\"eventID\", &eventID);\n", "\n", " for (Int_t n = 0; n < nPoints; ++n) {\n", " x = rng.Gaus(offset, scale);\n", " y = rng.Gaus(offset, scale);\n", "\n", " // For our simple example it is enough that the id's are uniformly\n", " // distributed and independent of the data.\n", " ++eventID;\n", "\n", " tree->Fill();\n", " }\n", "\n", " // Important: Disconnects the tree from the memory locations of x and y.\n", " tree->ResetBranchAddresses();\n", " return tree;\n", "}" ] }, { "cell_type": "markdown", "id": "935f9584", "metadata": {}, "source": [ "This loads the library" ] }, { "cell_type": "code", "execution_count": 3, "id": "a1d66938", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:09.994477Z", "iopub.status.busy": "2026-05-19T20:24:09.994355Z", "iopub.status.idle": "2026-05-19T20:24:10.338370Z", "shell.execute_reply": "2026-05-19T20:24:10.337206Z" } }, "outputs": [], "source": [ "TMVA::Tools::Instance();" ] }, { "cell_type": "markdown", "id": "34ca56f5", "metadata": {}, "source": [ "Set up the TMVA::Reader" ] }, { "cell_type": "code", "execution_count": 4, "id": "8ccc2ba5", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:10.355918Z", "iopub.status.busy": "2026-05-19T20:24:10.355780Z", "iopub.status.idle": "2026-05-19T20:24:10.582028Z", "shell.execute_reply": "2026-05-19T20:24:10.569652Z" } }, "outputs": [], "source": [ "TMVA::Reader *reader = new TMVA::Reader(\"!Color:!Silent:!V\");\n", "\n", "Float_t x;\n", "Float_t y;\n", "Int_t eventID;\n", "\n", "reader->AddVariable(\"x\", &x);\n", "reader->AddVariable(\"y\", &y);\n", "reader->AddSpectator(\"eventID\", &eventID);" ] }, { "cell_type": "markdown", "id": "71b50b8d", "metadata": {}, "source": [ "Book the serialised methods" ] }, { "cell_type": "code", "execution_count": 5, "id": "eeadba9a", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:10.589130Z", "iopub.status.busy": "2026-05-19T20:24:10.588990Z", "iopub.status.idle": "2026-05-19T20:24:10.793109Z", "shell.execute_reply": "2026-05-19T20:24:10.792686Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " : Booking \"BDTG\" of type \"CrossValidation\" from datasetcv/weights/TMVACrossValidation_BDTG.weights.xml.\n", " : Reading weight file: datasetcv/weights/TMVACrossValidation_BDTG.weights.xml\n", "
DataSetInfo : [Default] : Added class \"Signal\"\n", "
DataSetInfo : [Default] : Added class \"Background\"\n", " : Reading weightfile: datasetcv/weights/TMVACrossValidation_BDTG_fold1.weights.xml\n", " : Reading weight file: datasetcv/weights/TMVACrossValidation_BDTG_fold1.weights.xml\n", " : Reading weightfile: datasetcv/weights/TMVACrossValidation_BDTG_fold2.weights.xml\n", " : Reading weight file: datasetcv/weights/TMVACrossValidation_BDTG_fold2.weights.xml\n", " : Booked classifier \"BDTG\" of type: \"CrossValidation\"\n", " : Booking \"Fisher\" of type \"CrossValidation\" from datasetcv/weights/TMVACrossValidation_Fisher.weights.xml.\n", " : Reading weight file: datasetcv/weights/TMVACrossValidation_Fisher.weights.xml\n", " : Reading weightfile: datasetcv/weights/TMVACrossValidation_Fisher_fold1.weights.xml\n", " : Reading weight file: datasetcv/weights/TMVACrossValidation_Fisher_fold1.weights.xml\n", " : Reading weightfile: datasetcv/weights/TMVACrossValidation_Fisher_fold2.weights.xml\n", " : Reading weight file: datasetcv/weights/TMVACrossValidation_Fisher_fold2.weights.xml\n", " : Booked classifier \"Fisher\" of type: \"CrossValidation\"\n" ] } ], "source": [ "TString jobname(\"TMVACrossValidation\");\n", "{\n", " TString methodName = \"BDTG\";\n", " TString weightfile = TString(\"datasetcv/weights/\") + jobname + \"_\" + methodName + TString(\".weights.xml\");\n", "\n", " Bool_t weightfileExists = (gSystem->AccessPathName(weightfile) == kFALSE);\n", " if (weightfileExists) {\n", " reader->BookMVA(methodName, weightfile);\n", " } else {\n", " std::cout << \"Weightfile for method \" << methodName << \" not found.\"\n", " \" Did you run TMVACrossValidation with a specified\"\n", " \" splitExpr?\" << std::endl;\n", " exit(0);\n", " }\n", "\n", "}\n", "{\n", " TString methodName = \"Fisher\";\n", " TString weightfile = TString(\"datasetcv/weights/\") + jobname + \"_\" + methodName + TString(\".weights.xml\");\n", "\n", " Bool_t weightfileExists = (gSystem->AccessPathName(weightfile) == kFALSE);\n", " if (weightfileExists) {\n", " reader->BookMVA(methodName, weightfile);\n", " } else {\n", " std::cout << \"Weightfile for method \" << methodName << \" not found.\"\n", " \" Did you run TMVACrossValidation with a specified\"\n", " \" splitExpr?\" << std::endl;\n", " exit(0);\n", " }\n", "}" ] }, { "cell_type": "markdown", "id": "afabaad2", "metadata": {}, "source": [ "Load data" ] }, { "cell_type": "code", "execution_count": 6, "id": "b2267d9c", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:10.794853Z", "iopub.status.busy": "2026-05-19T20:24:10.794729Z", "iopub.status.idle": "2026-05-19T20:24:10.999403Z", "shell.execute_reply": "2026-05-19T20:24:10.998444Z" } }, "outputs": [], "source": [ "TTree *tree = new TTree();\n", "tree->Branch(\"x\", &x, \"x/F\");\n", "tree->Branch(\"y\", &y, \"y/F\");\n", "tree->Branch(\"eventID\", &eventID, \"eventID/I\");\n", "\n", "fillTree(tree, 1000, 1.0, 1.0, 100);\n", "fillTree(tree, 1000, -1.0, 1.0, 101);\n", "tree->SetBranchAddress(\"x\", &x);\n", "tree->SetBranchAddress(\"y\", &y);\n", "tree->SetBranchAddress(\"eventID\", &eventID);" ] }, { "cell_type": "markdown", "id": "0430b57e", "metadata": {}, "source": [ "Prepare histograms" ] }, { "cell_type": "code", "execution_count": 7, "id": "62452b52", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:11.001080Z", "iopub.status.busy": "2026-05-19T20:24:11.000952Z", "iopub.status.idle": "2026-05-19T20:24:11.207267Z", "shell.execute_reply": "2026-05-19T20:24:11.206668Z" } }, "outputs": [], "source": [ "Int_t nbin = 100;\n", "TH1F histBDTG{\"BDTG\", \"BDTG\", nbin, -1, 1};\n", "TH1F histFisher{\"Fisher\", \"Fisher\", nbin, -1, 1};" ] }, { "cell_type": "markdown", "id": "81881927", "metadata": {}, "source": [ "Evaluate classifiers" ] }, { "cell_type": "code", "execution_count": 8, "id": "88dee993", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:24:11.209317Z", "iopub.status.busy": "2026-05-19T20:24:11.209197Z", "iopub.status.idle": "2026-05-19T20:24:11.416325Z", "shell.execute_reply": "2026-05-19T20:24:11.415792Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " : Rebuilding Dataset Default\n" ] } ], "source": [ "for (Long64_t ievt = 0; ievt < tree->GetEntries(); ievt++) {\n", " tree->GetEntry(ievt);\n", "\n", " Double_t valBDTG = reader->EvaluateMVA(\"BDTG\");\n", " Double_t valFisher = reader->EvaluateMVA(\"Fisher\");\n", "\n", " histBDTG.Fill(valBDTG);\n", " histFisher.Fill(valFisher);\n", "}\n", "\n", "tree->ResetBranchAddresses();\n", "delete tree;\n", "\n", "if (!gROOT->IsBatch()) {\n", " auto c = new TCanvas();\n", " c->Divide(2,1);\n", " c->cd(1);\n", " histBDTG.DrawClone();\n", " c->cd(2);\n", " histFisher.DrawClone();\n", "}\n", "else\n", "{ // Write histograms to output file\n", " TFile *target = new TFile(\"TMVACrossEvaluationApp.root\", \"RECREATE\");\n", " histBDTG.Write();\n", " histFisher.Write();\n", " target->Close();\n", " delete target;\n", "}\n", "\n", "delete reader;\n", "\n", "return 0;" ] } ], "metadata": { "kernelspec": { "display_name": "ROOT C++", "language": "c++", "name": "root" }, "language_info": { "codemirror_mode": "text/x-c++src", "file_extension": ".C", "mimetype": " text/x-c++src", "name": "c++" } }, "nbformat": 4, "nbformat_minor": 5 }