Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
rf617_simulation_based_inference_multidimensional.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_roofit
3## \notebook
4## Use Simulation Based Inference (SBI) in multiple dimensions in RooFit.
5##
6## This tutorial shows how to use SBI in higher dimension in ROOT.
7## This tutorial transfers the simple concepts of the 1D case introduced in
8## rf615_simulation_based_inference.py onto the higher dimensional case.
9##
10## Again as reference distribution we
11## choose a simple uniform distribution. The target distribution is chosen to
12## be Gaussian with different mean values.
13## The classifier is trained to discriminate between the reference and target
14## distribution.
15## We see how the neural networks generalize to unknown mean values.
16##
17## Furthermore, we compare SBI to the approach of moment morphing. In this case,
18## we can conclude, that SBI is way more sample eficcient when it comes to
19## estimating the negative log likelihood ratio.
20##
21## For an introductory background see rf615_simulation_based_inference.py
22##
23## \macro_image
24## \macro_code
25## \macro_output
26##
27## \date July 2024
28## \author Robin Syring
29
30import ROOT
31import numpy as np
32from sklearn.neural_network import MLPClassifier
33import itertools
34
35# Kills warning messages
36ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.WARNING)
37
38n_samples_morph = 10000 # Number of samples for morphing
39n_bins = 4 # Number of 'sampled' Gaussians
40n_samples_train = n_samples_morph * n_bins # To have a fair comparison
41
42
43# Morphing as baseline
44def morphing(setting, n_dimensions):
45 # Define binning for morphing
46
47 binning = [ROOT.RooBinning(n_bins, 0.0, n_bins - 1.0) for dim in range(n_dimensions)]
48 grid = ROOT.RooMomentMorphFuncND.Grid(*binning)
49
50 # Set bins for each x variable
51 for x_var in x_vars:
52 x_var.setBins(50)
53
54 # Define mu values as input for morphing for each dimension
55 mu_helps = [ROOT.RooRealVar(f"mu{i}", f"mu{i}", 0.0) for i in range(n_dimensions)]
56
57 # Create a product of Gaussians for all dimensions
58 gaussians = []
59 for j in range(n_dimensions):
60 gaussian = ROOT.RooGaussian(f"gdim{j}", f"gdim{j}", x_vars[j], mu_helps[j], sigmas[j])
61 gaussians.append(gaussian)
62
63 # Create a product PDF for the multidimensional Gaussian
64 gauss_product = ROOT.RooProdPdf("gauss_product", "gauss_product", ROOT.RooArgList(*gaussians))
65
66 templates = dict()
67
68 # Iterate through each tuple
69 for idx, nd_idx in enumerate(itertools.product(range(n_bins), repeat=n_dimensions)):
70 for i_dim in range(n_dimensions):
71 mu_helps[i_dim].setVal(nd_idx[i_dim])
72
73 # Fill the histograms
74 hist = gauss_product.generateBinned(ROOT.RooArgSet(*x_vars), n_samples_morph)
75
76 # Ensure that every bin is filled and there are no zero probabilities
77 for i_bin in range(hist.numEntries()):
78 hist.add(hist.get(i_bin), 1.0)
79
80 templates[nd_idx] = ROOT.RooHistPdf(f"histpdf{idx}", f"histpdf{idx}", ROOT.RooArgSet(*x_vars), hist, 1)
81
82 # Add the PDF to the grid
83 grid.addPdf(templates[nd_idx], *nd_idx)
84
85 # Create the morphing function and add it to the ws
86 morph_func = ROOT.RooMomentMorphFuncND("morph_func", "morph_func", [*mu_vars], [*x_vars], grid, setting)
87 morph_func.setPdfMode(True)
88 morph = ROOT.RooWrapperPdf("morph", "morph", morph_func, True)
89
90 ws.Import(morph)
91
92 # Uncomment to see input plots for the first dimension (you might need to increase the morphed samples)
93 # f1 = x_vars[0].frame()
94 # for i in range(n_bins):
95 # templates[(i, 0)].plotOn(f1)
96 # ws["morph"].plotOn(f1, LineColor="r")
97 # c0 = ROOT.TCanvas()
98 # f1.Draw()
99 # input() # Wait for user input to proceed
100
101
102# Define the observed mean values for the Gaussian distributions
103mu_observed = [2.5, 2.0]
104sigmas = [1.5, 1.5]
105
106
107# Class used in this case to demonstrate the use of SBI in Root
108class SBI:
109 # Initializing the class SBI
110 def __init__(self, ws, n_vars):
111 # Choose the hyperparameters for training the neural network
112 self.classifier = MLPClassifier(hidden_layer_sizes=(20, 20), max_iter=1000, random_state=42)
113 self.data_model = None
114 self.data_ref = None
115 self.X_train = None
116 self.y_train = None
117 self.ws = ws
118 self.n_vars = n_vars
119 self._training_mus = None
120 self._reference_mu = None
121
122 # Defining the target / training data for different values of mean value mu
123 def model_data(self, model, x_vars, mu_vars, n_samples):
124 ws = self.ws
125 samples_gaussian = (
126 ws[model].generate([ws[x] for x in x_vars] + [ws[mu] for mu in mu_vars], n_samples).to_numpy()
127 )
128
129 self._training_mus = np.array([samples_gaussian[mu] for mu in mu_vars]).T
130 data_test_model = np.array([samples_gaussian[x] for x in x_vars]).T
131
132 self.data_model = data_test_model.reshape(-1, self.n_vars)
133
134 # Generating samples for the reference distribution
135 def reference_data(self, model, x_vars, mu_vars, n_samples, help_model):
136 ws = self.ws
137 # Ensuring the normalization with generating as many reference data as target data
138 samples_uniform = ws[model].generate([ws[x] for x in x_vars], n_samples)
139 data_reference_model = np.array(
140 [samples_uniform.get(i).getRealValue(x) for x in x_vars for i in range(samples_uniform.numEntries())]
141 )
142
143 self.data_ref = data_reference_model.reshape(-1, self.n_vars)
144
145 samples_mu = ws[help_model].generate([ws[mu] for mu in mu_vars], n_samples)
146 mu_data = np.array(
147 [samples_mu.get(i).getRealValue(mu) for mu in mu_vars for i in range(samples_mu.numEntries())]
148 )
149
150 self._reference_mu = mu_data.reshape(-1, self.n_vars)
151
152 # Bringing the data in the right format for training
153 def preprocessing(self):
154 thetas = np.concatenate((self._training_mus, self._reference_mu))
155 X = np.concatenate([self.data_model, self.data_ref])
156
157 self.y_train = np.concatenate([np.ones(len(self.data_model)), np.zeros(len(self.data_ref))])
158 self.X_train = np.concatenate([X, thetas], axis=1)
159
160 # Train the classifier
161 def train_classifier(self):
162 self.classifier.fit(self.X_train, self.y_train)
163
164
165# Define the "observed" data in a workspace
166def build_ws(mu_observed):
167 n_vars = len(mu_observed)
168 x_vars = [ROOT.RooRealVar(f"x{i}", f"x{i}", -5, 15) for i in range(n_vars)]
169 mu_vars = [ROOT.RooRealVar(f"mu{i}", f"mu{i}", mu_observed[i], 0, 4) for i in range(n_vars)]
170 gaussians = [ROOT.RooGaussian(f"gauss{i}", f"gauss{i}", x_vars[i], mu_vars[i], sigmas[i]) for i in range(n_vars)]
171 uniforms = [ROOT.RooUniform(f"uniform{i}", f"uniform{i}", x_vars[i]) for i in range(n_vars)]
172 uniforms_help = [ROOT.RooUniform(f"uniformh{i}", f"uniformh{i}", mu_vars[i]) for i in range(n_vars)]
173 # Create multi-dimensional PDFs
174 gauss = ROOT.RooProdPdf("gauss", "gauss", ROOT.RooArgList(*gaussians))
175 uniform = ROOT.RooProdPdf("uniform", "uniform", ROOT.RooArgList(*uniforms))
176 uniform_help = ROOT.RooProdPdf("uniform_help", "uniform_help", ROOT.RooArgList(*uniforms_help))
177 obs_data = gauss.generate(ROOT.RooArgSet(*x_vars), n_samples_morph)
178 obs_data.SetName("obs_data")
179
180 # Create and return the workspace
181 ws = ROOT.RooWorkspace()
182 ws.Import(x_vars)
183 ws.Import(mu_vars)
184 ws.Import(gauss)
185 ws.Import(uniform)
186 ws.Import(uniform_help)
187 ws.Import(obs_data)
188
189 return ws
190
191
192# Build the workspace and extract variables
193ws = build_ws(mu_observed)
194
195
196# Export the varibles from ws
197x_vars = [ws[f"x{i}"] for i in range(len(mu_observed))]
198mu_vars = [ws[f"mu{i}"] for i in range(len(mu_observed))]
199
200# Do the morphing
201morphing(ROOT.RooMomentMorphFuncND.Linear, len(mu_observed))
202
203# Calculate the nll for the moprhed distribution
204# TODO: Fix RooAddPdf::fixCoefNormalization(nset) warnings with new CPU backend
205nll_morph = ws["morph"].createNLL(ws["obs_data"], EvalBackend="legacy")
206
207# Initialize the SBI model
208model = SBI(ws, len(mu_observed))
209
210# Generate and preprocess training data
211model.model_data("gauss", [x.GetName() for x in x_vars], [mu.GetName() for mu in mu_vars], n_samples_train)
212model.reference_data(
213 "uniform", [x.GetName() for x in x_vars], [mu.GetName() for mu in mu_vars], n_samples_train, "uniform_help"
214)
215model.preprocessing()
216
217# Train the neural network classifier
218model.train_classifier()
219sbi_model = model
220
221
222# Function to compute the likelihood ratio using the trained classifier
223def learned_likelihood_ratio(*args):
224 n = max(*(len(a) for a in args))
225 X = np.zeros((n, len(args)))
226 for i in range(len(args)):
227 X[:, i] = args[i]
228 prob = sbi_model.classifier.predict_proba(X)[:, 1]
229 return prob / (1.0 - prob)
230
231
232# Create combined variable list for ROOT
233combined_vars = ROOT.RooArgList()
234for var in x_vars + mu_vars:
235 combined_vars.add(var)
236
237# Create a custom likelihood ratio function using the trained classifier
238lhr_learned = ROOT.RooFit.bindFunction("MyBinFunc", learned_likelihood_ratio, combined_vars)
239
240# Calculate the 'analytical' likelihood ratio
241lhr_calc = ROOT.RooFormulaVar("lhr_calc", "x[0] / x[1]", [ws["gauss"], ws["uniform"]])
242
243# Define the 'analytical' negative logarithmic likelihood ratio
244nll_gauss = ws["gauss"].createNLL(ws["obs_data"])
245
246# Create the learned pdf and NLL sum based on the learned likelihood ratio
247pdf_learned = ROOT.RooWrapperPdf("learned_pdf", "learned_pdf", lhr_learned, True)
248
249nllr_learned = pdf_learned.createNLL(ws["obs_data"])
250
251# Plot the learned and analytical summed negativelogarithmic likelihood
252frame1 = mu_vars[0].frame(
253 Title="NLL of SBI vs. Morphing;#mu_{1};NLL",
254 Range=(mu_observed[0] - 1, mu_observed[0] + 1),
255)
256nll_gauss.plotOn(frame1, ShiftToZero=True, LineColor="kP6Blue+1", Name="gauss")
257ROOT.RooAbsReal.setEvalErrorLoggingMode("Ignore") # Silence some warnings
258nll_morph.plotOn(frame1, ShiftToZero=True, LineColor="kP6Blue+2", Name="morph")
259ROOT.RooAbsReal.setEvalErrorLoggingMode("PrintErrors")
260nllr_learned.plotOn(frame1, LineColor="kP6Blue", ShiftToZero=True, Name="learned")
261
262
263# Declare a helper function in ROOT to dereference unique_ptr
264ROOT.gInterpreter.Declare(
265 """
266RooAbsArg &my_deref(std::unique_ptr<RooAbsArg> const& ptr) { return *ptr; }
267"""
268)
269
270# Choose normalization set for lhr_calc to plot over
271norm_set = ROOT.RooArgSet(x_vars)
272lhr_calc_final_ptr = ROOT.RooFit.Detail.compileForNormSet(lhr_calc, norm_set)
273lhr_calc_final = ROOT.my_deref(lhr_calc_final_ptr)
274lhr_calc_final.recursiveRedirectServers(norm_set)
275
276# Plot the likelihood ratio functions
277frame2 = x_vars[0].frame(Title="Likelihood ratio r(x_{1}|#mu_{1}=2.5);x_{1};p_{gauss}/p_{uniform}")
278lhr_learned.plotOn(frame2, LineColor="kP6Blue", Name="learned_ratio")
279lhr_calc_final.plotOn(frame2, LineColor="kP6Blue+1", Name="exact")
280
281# Write the plots into one canvas to show, or into separate canvases for saving.
282single_canvas = True
283
284c = ROOT.TCanvas("", "", 1200 if single_canvas else 600, 600)
285if single_canvas:
286 c.Divide(2)
287 c.cd(1)
288 ROOT.gPad.SetLeftMargin(0.15)
289 frame1.GetYaxis().SetTitleOffset(1.8)
290frame1.Draw()
291
292legend1 = ROOT.TLegend(0.43, 0.63, 0.8, 0.87)
293legend1.SetFillColor(ROOT.kWhite)
294legend1.SetLineColor(ROOT.kWhite)
295legend1.SetTextSize(0.04)
296legend1.AddEntry("learned", "learned (SBI)", "L")
297legend1.AddEntry("gauss", "true NLL", "L")
298legend1.AddEntry("morphed", "moment morphing", "L")
299legend1.Draw()
300
301if single_canvas:
302 c.cd(2)
303 ROOT.gPad.SetLeftMargin(0.15)
304 frame2.GetYaxis().SetTitleOffset(1.8)
305else:
306 c.SaveAs("rf617_plot_1.png")
307 c = ROOT.TCanvas("", "", 600, 600)
308
309frame2.Draw()
310
311legend2 = ROOT.TLegend(0.53, 0.73, 0.87, 0.87)
312legend2.SetFillColor(ROOT.kWhite)
313legend2.SetLineColor(ROOT.kWhite)
314legend2.SetTextSize(0.04)
315legend2.AddEntry("learned_ratio", "learned (SBI)", "L")
316legend2.AddEntry("exact", "true ratio", "L")
317legend2.Draw()
318
319if not single_canvas:
320 c.SaveAs("rf617_plot_2.png")
321
322
323# Use ROOT's minimizer to compute the minimum and display the results
324for nll in [nll_gauss, nllr_learned, nll_morph]:
325 minimizer = ROOT.RooMinimizer(nll)
326 minimizer.setErrorLevel(0.5)
327 minimizer.setPrintLevel(-1)
328 minimizer.minimize("Minuit2")
329 result = minimizer.save()
330 result.Print()
331
332import sys
333
334# Hack to bypass ClearProxiedObjects()
335del sys.modules["libROOTPythonizations"]
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len