{ "cells": [ { "cell_type": "markdown", "id": "abfbd710", "metadata": {}, "source": [ "# MultivariateGaussianTest\n", "Comparison of MCMC and PLC in a multi-variate gaussian problem\n", "\n", "This tutorial produces an N-dimensional multivariate Gaussian\n", "with a non-trivial covariance matrix. By default N=4 (called \"dim\").\n", "\n", "A subset of these are considered parameters of interest.\n", "This problem is tractable analytically.\n", "\n", "We use this mainly as a test of Markov Chain Monte Carlo\n", "and we compare the result to the profile likelihood ratio.\n", "\n", "We use the proposal helper to create a customized\n", "proposal function for this problem.\n", "\n", "For N=4 and 2 parameters of interest it takes about 10-20 seconds\n", "and the acceptance rate is 37%\n", "\n", "\n", "\n", "\n", "**Author:** Kevin Belasco, Kyle Cranmer \n", "This notebook tutorial was automatically generated with ROOTBOOK-izer from the macro found in the ROOT repository on Tuesday, May 19, 2026 at 08:35 PM." ] }, { "cell_type": "code", "execution_count": 1, "id": "1d63c3b8", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:35:43.761757Z", "iopub.status.busy": "2026-05-19T20:35:43.761637Z", "iopub.status.idle": "2026-05-19T20:35:43.779420Z", "shell.execute_reply": "2026-05-19T20:35:43.778809Z" } }, "outputs": [], "source": [ "%%cpp -d\n", "#include \"RooGlobalFunc.h\"\n", "#include \n", "#include \"TMatrixDSym.h\"\n", "#include \"RooMultiVarGaussian.h\"\n", "#include \"RooArgList.h\"\n", "#include \"RooRealVar.h\"\n", "#include \"TH2F.h\"\n", "#include \"TCanvas.h\"\n", "#include \"RooAbsReal.h\"\n", "#include \"RooFitResult.h\"\n", "#include \"TStopwatch.h\"\n", "#include \"RooStats/MCMCCalculator.h\"\n", "#include \"RooStats/MetropolisHastings.h\"\n", "#include \"RooStats/MarkovChain.h\"\n", "#include \"RooStats/ConfInterval.h\"\n", "#include \"RooStats/MCMCInterval.h\"\n", "#include \"RooStats/MCMCIntervalPlot.h\"\n", "#include \"RooStats/ModelConfig.h\"\n", "#include \"RooStats/ProposalHelper.h\"\n", "#include \"RooStats/ProposalFunction.h\"\n", "#include \"RooStats/PdfProposal.h\"\n", "#include \"RooStats/ProfileLikelihoodCalculator.h\"\n", "#include \"RooStats/LikelihoodIntervalPlot.h\"\n", "#include \"RooStats/LikelihoodInterval.h\"\n", "\n", "using std::cout, std::endl;\n", "using namespace RooFit;\n", "using namespace RooStats;" ] }, { "cell_type": "markdown", "id": "0d03ea50", "metadata": {}, "source": [ " Arguments are defined. " ] }, { "cell_type": "code", "execution_count": 2, "id": "d7ee9932", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:35:43.780735Z", "iopub.status.busy": "2026-05-19T20:35:43.780597Z", "iopub.status.idle": "2026-05-19T20:35:44.106333Z", "shell.execute_reply": "2026-05-19T20:35:44.105966Z" } }, "outputs": [], "source": [ "Int_t dim = 4;\n", "Int_t nPOI = 2;" ] }, { "cell_type": "markdown", "id": "dda66240", "metadata": {}, "source": [ "let's time this challenging example" ] }, { "cell_type": "code", "execution_count": 3, "id": "569169e6", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:35:44.111111Z", "iopub.status.busy": "2026-05-19T20:35:44.110995Z", "iopub.status.idle": "2026-05-19T20:35:44.319906Z", "shell.execute_reply": "2026-05-19T20:35:44.319393Z" } }, "outputs": [], "source": [ "TStopwatch t;\n", "t.Start();\n", "\n", "RooArgList xVec;\n", "RooArgList muVec;\n", "RooArgSet poi;" ] }, { "cell_type": "markdown", "id": "eb430045", "metadata": {}, "source": [ "make the observable and means" ] }, { "cell_type": "code", "execution_count": 4, "id": "e026fe7c", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:35:44.323135Z", "iopub.status.busy": "2026-05-19T20:35:44.323021Z", "iopub.status.idle": "2026-05-19T20:35:44.525024Z", "shell.execute_reply": "2026-05-19T20:35:44.524564Z" } }, "outputs": [], "source": [ "Int_t i, j;\n", "RooRealVar *x;\n", "RooRealVar *mu_x;\n", "for (i = 0; i < dim; i++) {\n", " char *name = Form(\"x%d\", i);\n", " x = new RooRealVar(name, name, 0, -3, 3);\n", " xVec.add(*x);\n", "\n", " char *mu_name = Form(\"mu_x%d\", i);\n", " mu_x = new RooRealVar(mu_name, mu_name, 0, -2, 2);\n", " muVec.add(*mu_x);\n", "}" ] }, { "cell_type": "markdown", "id": "bd9e4672", "metadata": {}, "source": [ "put them into the list of parameters of interest" ] }, { "cell_type": "code", "execution_count": 5, "id": "747f1f71", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:35:44.544255Z", "iopub.status.busy": "2026-05-19T20:35:44.544122Z", "iopub.status.idle": "2026-05-19T20:35:44.746204Z", "shell.execute_reply": "2026-05-19T20:35:44.745645Z" } }, "outputs": [], "source": [ "for (i = 0; i < nPOI; i++) {\n", " poi.add(*muVec.at(i));\n", "}" ] }, { "cell_type": "markdown", "id": "f7c561b3", "metadata": {}, "source": [ "make a covariance matrix that is all 1's" ] }, { "cell_type": "code", "execution_count": 6, "id": "23399663", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:35:44.747547Z", "iopub.status.busy": "2026-05-19T20:35:44.747431Z", "iopub.status.idle": "2026-05-19T20:35:44.949573Z", "shell.execute_reply": "2026-05-19T20:35:44.949025Z" } }, "outputs": [], "source": [ "TMatrixDSym cov(dim);\n", "for (i = 0; i < dim; i++) {\n", " for (j = 0; j < dim; j++) {\n", " if (i == j)\n", " cov(i, j) = 3.;\n", " else\n", " cov(i, j) = 1.0;\n", " }\n", "}" ] }, { "cell_type": "markdown", "id": "b9db7061", "metadata": {}, "source": [ "now make the multivariate Gaussian" ] }, { "cell_type": "code", "execution_count": 7, "id": "ede4ac77", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:35:44.950919Z", "iopub.status.busy": "2026-05-19T20:35:44.950807Z", "iopub.status.idle": "2026-05-19T20:35:45.159656Z", "shell.execute_reply": "2026-05-19T20:35:45.159097Z" } }, "outputs": [], "source": [ "RooMultiVarGaussian mvg(\"mvg\", \"mvg\", xVec, muVec, cov);" ] }, { "cell_type": "markdown", "id": "c4563dcb", "metadata": {}, "source": [ "--------------------\n", "make a toy dataset" ] }, { "cell_type": "code", "execution_count": 8, "id": "6fadac92", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:35:45.160939Z", "iopub.status.busy": "2026-05-19T20:35:45.160821Z", "iopub.status.idle": "2026-05-19T20:35:45.382293Z", "shell.execute_reply": "2026-05-19T20:35:45.381728Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "input_line_55:2:2: warning: 'data' shadows a declaration with the same name in the 'std' namespace; use '::data' to reference this declaration\n", " std::unique_ptr data{mvg.generate(xVec, 100)};\n", " ^\n" ] } ], "source": [ "std::unique_ptr data{mvg.generate(xVec, 100)};" ] }, { "cell_type": "markdown", "id": "b9774161", "metadata": {}, "source": [ "now create the model config for this problem" ] }, { "cell_type": "code", "execution_count": 9, "id": "6777dd02", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:35:45.387332Z", "iopub.status.busy": "2026-05-19T20:35:45.387193Z", "iopub.status.idle": "2026-05-19T20:35:45.615905Z", "shell.execute_reply": "2026-05-19T20:35:45.606878Z" } }, "outputs": [], "source": [ "RooWorkspace *w = new RooWorkspace(\"MVG\");\n", "ModelConfig modelConfig(w);\n", "modelConfig.SetPdf(mvg);\n", "modelConfig.SetParametersOfInterest(poi);" ] }, { "cell_type": "markdown", "id": "69f0d9ea", "metadata": {}, "source": [ "-------------------------------------------------------\n", "Setup calculators" ] }, { "cell_type": "markdown", "id": "4dfeb206", "metadata": {}, "source": [ "MCMC\n", "we want to setup an efficient proposal function\n", "using the covariance matrix from a fit to the data" ] }, { "cell_type": "code", "execution_count": 10, "id": "c85c1127", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:35:45.617508Z", "iopub.status.busy": "2026-05-19T20:35:45.617384Z", "iopub.status.idle": "2026-05-19T20:35:45.864880Z", "shell.execute_reply": "2026-05-19T20:35:45.846879Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "input_line_57:2:47: error: reference to 'data' is ambiguous\n", " std::unique_ptr fit{mvg.fitTo(*data, Save(true))};\n", " ^\n", "input_line_55:2:30: note: candidate found by name lookup is 'data'\n", " std::unique_ptr data{mvg.generate(xVec, 100)};\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:344:5: note: candidate found by name lookup is 'std::data'\n", " data(initializer_list<_Tp> __il) noexcept\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:312:5: note: candidate found by name lookup is 'std::data'\n", " data(_Container& __cont) noexcept(noexcept(__cont.data()))\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:323:5: note: candidate found by name lookup is 'std::data'\n", " data(const _Container& __cont) noexcept(noexcept(__cont.data()))\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:334:5: note: candidate found by name lookup is 'std::data'\n", " data(_Tp (&__array)[_Nm]) noexcept\n", " ^\n" ] } ], "source": [ "std::unique_ptr fit{mvg.fitTo(*data, Save(true))};\n", "ProposalHelper ph;\n", "ph.SetVariables((RooArgSet &)fit->floatParsFinal());\n", "ph.SetCovMatrix(fit->covarianceMatrix());\n", "ph.SetUpdateProposalParameters(true);\n", "ph.SetCacheSize(100);\n", "ProposalFunction *pdfProp = ph.GetProposalFunction();" ] }, { "cell_type": "markdown", "id": "83d1c178", "metadata": {}, "source": [ "now create the calculator" ] }, { "cell_type": "code", "execution_count": 11, "id": "5f15ea40", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:35:45.881386Z", "iopub.status.busy": "2026-05-19T20:35:45.881196Z", "iopub.status.idle": "2026-05-19T20:35:46.119729Z", "shell.execute_reply": "2026-05-19T20:35:46.116203Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "input_line_58:2:21: error: reference to 'data' is ambiguous\n", " MCMCCalculator mc(*data, modelConfig);\n", " ^\n", "input_line_55:2:30: note: candidate found by name lookup is 'data'\n", " std::unique_ptr data{mvg.generate(xVec, 100)};\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:344:5: note: candidate found by name lookup is 'std::data'\n", " data(initializer_list<_Tp> __il) noexcept\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:312:5: note: candidate found by name lookup is 'std::data'\n", " data(_Container& __cont) noexcept(noexcept(__cont.data()))\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:323:5: note: candidate found by name lookup is 'std::data'\n", " data(const _Container& __cont) noexcept(noexcept(__cont.data()))\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:334:5: note: candidate found by name lookup is 'std::data'\n", " data(_Tp (&__array)[_Nm]) noexcept\n", " ^\n", "input_line_58:7:25: error: use of undeclared identifier 'pdfProp'\n", "mc.SetProposalFunction(*pdfProp);\n", " ^\n" ] } ], "source": [ "MCMCCalculator mc(*data, modelConfig);\n", "mc.SetConfidenceLevel(0.95);\n", "mc.SetNumBurnInSteps(100);\n", "mc.SetNumIters(10000);\n", "mc.SetNumBins(50);\n", "mc.SetProposalFunction(*pdfProp);\n", "\n", "MCMCInterval *mcInt = mc.GetInterval();\n", "RooArgList *poiList = mcInt->GetAxes();" ] }, { "cell_type": "markdown", "id": "76a5a8dd", "metadata": {}, "source": [ "now setup the profile likelihood calculator" ] }, { "cell_type": "code", "execution_count": 12, "id": "81f05363", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:35:46.130071Z", "iopub.status.busy": "2026-05-19T20:35:46.129922Z", "iopub.status.idle": "2026-05-19T20:35:46.360301Z", "shell.execute_reply": "2026-05-19T20:35:46.354649Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "input_line_59:2:35: error: reference to 'data' is ambiguous\n", " ProfileLikelihoodCalculator plc(*data, modelConfig);\n", " ^\n", "input_line_55:2:30: note: candidate found by name lookup is 'data'\n", " std::unique_ptr data{mvg.generate(xVec, 100)};\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:344:5: note: candidate found by name lookup is 'std::data'\n", " data(initializer_list<_Tp> __il) noexcept\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:312:5: note: candidate found by name lookup is 'std::data'\n", " data(_Container& __cont) noexcept(noexcept(__cont.data()))\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:323:5: note: candidate found by name lookup is 'std::data'\n", " data(const _Container& __cont) noexcept(noexcept(__cont.data()))\n", " ^\n", "/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/range_access.h:334:5: note: candidate found by name lookup is 'std::data'\n", " data(_Tp (&__array)[_Nm]) noexcept\n", " ^\n" ] } ], "source": [ "ProfileLikelihoodCalculator plc(*data, modelConfig);\n", "plc.SetConfidenceLevel(0.95);\n", "LikelihoodInterval *plInt = (LikelihoodInterval *)plc.GetInterval();" ] }, { "cell_type": "markdown", "id": "48703f6c", "metadata": {}, "source": [ "make some plots" ] }, { "cell_type": "code", "execution_count": 13, "id": "f4540ca0", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:35:46.373253Z", "iopub.status.busy": "2026-05-19T20:35:46.373066Z", "iopub.status.idle": "2026-05-19T20:35:46.594994Z", "shell.execute_reply": "2026-05-19T20:35:46.594680Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "input_line_60:12:5: error: use of undeclared identifier 'poiList'\n", "if (poiList->getSize() == 1) {\n", " ^\n", "input_line_60:13:34: error: use of undeclared identifier 'poiList'\n", " RooRealVar *p = (RooRealVar *)poiList->at(0);\n", " ^\n", "input_line_60:14:18: error: use of undeclared identifier 'mcInt'\n", " Double_t ll = mcInt->LowerLimit(*p);\n", " ^\n", "input_line_60:15:18: error: use of undeclared identifier 'mcInt'\n", " Double_t ul = mcInt->UpperLimit(*p);\n", " ^\n", "input_line_60:19:5: error: use of undeclared identifier 'poiList'\n", "if (poiList->getSize() == 2) {\n", " ^\n", "input_line_60:20:35: error: use of undeclared identifier 'poiList'\n", " RooRealVar *p0 = (RooRealVar *)poiList->at(0);\n", " ^\n", "input_line_60:21:35: error: use of undeclared identifier 'poiList'\n", " RooRealVar *p1 = (RooRealVar *)poiList->at(1);\n", " ^\n", "input_line_60:23:18: error: use of undeclared identifier 'mcInt'\n", " Double_t ll = mcInt->LowerLimit(*p0);\n", " ^\n", "input_line_60:24:18: error: use of undeclared identifier 'mcInt'\n", " Double_t ul = mcInt->UpperLimit(*p0);\n", " ^\n", "input_line_60:26:9: error: use of undeclared identifier 'mcInt'\n", " ll = mcInt->LowerLimit(*p1);\n", " ^\n", "input_line_60:27:9: error: use of undeclared identifier 'mcInt'\n", " ul = mcInt->UpperLimit(*p1);\n", " ^\n" ] } ], "source": [ "MCMCIntervalPlot mcPlot(*mcInt);\n", "\n", "TCanvas *c1 = new TCanvas();\n", "mcPlot.SetLineColor(kGreen);\n", "mcPlot.SetLineWidth(2);\n", "mcPlot.Draw();\n", "\n", "LikelihoodIntervalPlot plPlot(plInt);\n", "plPlot.Draw(\"same\");\n", "\n", "if (poiList->getSize() == 1) {\n", " RooRealVar *p = (RooRealVar *)poiList->at(0);\n", " Double_t ll = mcInt->LowerLimit(*p);\n", " Double_t ul = mcInt->UpperLimit(*p);\n", " cout << \"MCMC interval: [\" << ll << \", \" << ul << \"]\" << endl;\n", "}\n", "\n", "if (poiList->getSize() == 2) {\n", " RooRealVar *p0 = (RooRealVar *)poiList->at(0);\n", " RooRealVar *p1 = (RooRealVar *)poiList->at(1);\n", " TCanvas *scatter = new TCanvas();\n", " Double_t ll = mcInt->LowerLimit(*p0);\n", " Double_t ul = mcInt->UpperLimit(*p0);\n", " cout << \"MCMC interval on p0: [\" << ll << \", \" << ul << \"]\" << endl;\n", " ll = mcInt->LowerLimit(*p1);\n", " ul = mcInt->UpperLimit(*p1);\n", " cout << \"MCMC interval on p1: [\" << ll << \", \" << ul << \"]\" << endl;\n", "\n", " // MCMC interval on p0: [-0.2, 0.6]\n", " // MCMC interval on p1: [-0.2, 0.6]\n", "\n", " mcPlot.DrawChainScatter(*p0, *p1);\n", " scatter->Update();\n", "}\n", "\n", "t.Print();" ] }, { "cell_type": "markdown", "id": "ae39920b", "metadata": {}, "source": [ "Draw all canvases " ] }, { "cell_type": "code", "execution_count": 14, "id": "52bfdfb3", "metadata": { "collapsed": false, "execution": { "iopub.execute_input": "2026-05-19T20:35:46.598985Z", "iopub.status.busy": "2026-05-19T20:35:46.598859Z", "iopub.status.idle": "2026-05-19T20:35:46.807924Z", "shell.execute_reply": "2026-05-19T20:35:46.807355Z" } }, "outputs": [], "source": [ "gROOT->GetListOfCanvases()->Draw()" ] } ], "metadata": { "kernelspec": { "display_name": "ROOT C++", "language": "c++", "name": "root" }, "language_info": { "codemirror_mode": "text/x-c++src", "file_extension": ".C", "mimetype": " text/x-c++src", "name": "c++" } }, "nbformat": 4, "nbformat_minor": 5 }