Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
rf615_simulation_based_inference.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_roofit
3## \notebook
4## Use Simulation Based Inference (SBI) in RooFit.
5##
6## This tutorial shows how to use SBI in ROOT. As reference distribution we
7## choose a simple uniform distribution. The target distribution is chosen to
8## be gaussian with different mean values.
9## The classifier is trained to discriminate between the reference and target
10## distribution.
11## We see how the neural networks generalize to unknown mean values.
12##
13## We compare the approach of using the likelihood ratio trick to morphing.
14##
15## An introduction of SBI can be found in https://arxiv.org/pdf/2010.06439.
16##
17## A short recap:
18## The idea of SBI is to fit a surrogate model to the data, in order to really
19## learn the likelihood function instead of calculating it. Therefore, a classifier is trained to discriminate between
20## samples from a target distribution (here the Gaussian) $$x\sim p(x|\theta)$$ and a reference distribution (here the Uniform)
21## $$x\sim p_{ref}(x|\theta)$$.
22##
23## The output of the classifier $$\hat{s}(\theta)$$ is a monotonic function of the likelihood ration and can be turned into an estimate of the likelihood ratio
24## via $$\hat{r}(\theta)=\frac{1-\hat{s}(\theta)}{\hat{s}(\theta)}.$$
25## This is called the likelihood ratio trick.
26##
27## In the end we compare the negative logarithmic likelihoods of the learned, morphed and analytical likelihood with minuit and as a plot.
28##
29## \macro_image
30## \macro_code
31## \macro_output
32##
33## \date July 2024
34## \author Robin Syring
35
36import ROOT
37import numpy as np
38from sklearn.neural_network import MLPClassifier
39
40# The samples used for training the classifier in this tutorial / rescale for more accuracy
41n_samples = 10000
42
43# Kills warning messages
44ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.WARNING)
45
46
47# Morphing as a baseline
48def morphing(setting):
49
50 # Define binning for morphing
51 grid = ROOT.RooMomentMorphFuncND.Grid(ROOT.RooBinning(4, 0.0, 4.0))
52 x_var.setBins(50)
53
54 # Number of 'sampled' gaussians, if you change it, adjust the binning properly
55 n_grid = 5
56
57 for i in range(n_grid):
58 # Define the sampled gausians
59 mu_help = ROOT.RooRealVar(f"mu{i}", f"mu{i}", i)
60 help = ROOT.RooGaussian(f"g{i}", f"g{i}", x_var, mu_help, sigma)
61 workspace.Import(help, Silence=True)
62
63 # Fill the histograms
64 hist = workspace[f"g{i}"].generateBinned([x_var], n_samples)
65
66 # Make sure that every bin is filled and we don't get zero probability
67 for i_bin in range(hist.numEntries()):
68 hist.add(hist.get(i_bin), 1.0)
69
70 # Add the pdf to the workspace
71 workspace.Import(ROOT.RooHistPdf(f"histpdf{i}", f"histpdf{i}", [x_var], hist, 1), Silence=True)
72
73 # Add the pdf to the grid
74 grid.addPdf(workspace[f"histpdf{i}"], i)
75
76 # Create the morphing and add it to the workspace
77 morph_func = ROOT.RooMomentMorphFuncND("morph_func", "morph_func", [mu_var], [x_var], grid, setting)
78 morph = ROOT.RooWrapperPdf("morph", "morph", morph_func, True)
79 workspace.Import(morph, Silence=True)
80
81 # Uncomment to see input plots for the first dimension (you might need to increase the morphed samples)
82 # f1 = x_var.frame(Title="linear morphing;x;pdf", Range=(-4, 8))
83 # for i in range(n_grid):
84 # workspace[f"histpdf{i}"].plotOn(f1)
85 # workspace["morph"].plotOn(f1, LineColor="r")
86 # c0 = ROOT.TCanvas()
87 # f1.Draw()
88 # input() # Wait for user input to proceed
89
90
91# Class used in this case to demonstrate the use of SBI in Root
92class SBI:
93 # Initializing the class SBI
94 def __init__(self, workspace):
95 # Choose the hyperparameters for training the neural network
96 self.classifier = MLPClassifier(hidden_layer_sizes=(20, 20), max_iter=1000, random_state=42)
97 self.data_model = None
98 self.data_ref = None
99 self.X_train = None
100 self.y_train = None
101 self.workspace = workspace
102
103 # Defining the target / training data for different values of mean value mu
104 def model_data(self, model, x, mu, n_samples):
105 ws = self.workspace
106 data_test_model = []
107 samples_gaussian = ws[model].generate([ws[x], ws[mu]], n_samples).to_numpy()
108 self._training_mus = samples_gaussian[mu]
109 data_test_model.extend(samples_gaussian[x])
110
111 self.data_model = np.array(data_test_model).reshape(-1, 1)
112
113 # Generating samples for the reference distribution
114 def reference_data(self, model, x, n_samples):
115 ws = self.workspace
116 # Ensuring the normalization with generating as many reference data as target data
117 samples_uniform = ws[model].generate(ws[x], n_samples)
118 data_reference_model = np.array(
119 [samples_uniform.get(i).getRealValue("x") for i in range(samples_uniform.numEntries())]
120 )
121 self.data_ref = data_reference_model.reshape(-1, 1)
122
123 # Bringing the data in the right format for training
124 def preprocessing(self):
125 thetas_model = self._training_mus.reshape(-1, 1)
126 thetas_reference = self._training_mus.reshape(-1, 1)
127 thetas = np.concatenate((thetas_model, thetas_reference), axis=0)
128 X = np.concatenate([self.data_model, self.data_ref])
129 self.y_train = np.concatenate([np.ones(len(self.data_model)), np.zeros(len(self.data_ref))])
130 self.X_train = np.concatenate([X, thetas], axis=1)
131
132 # Train the classifier
133 def train_classifier(self):
134 self.classifier.fit(self.X_train, self.y_train)
135
136
137# Setting the training and toy data samples; the factor 5 to enable a fair comparison to morphing
138n_samples_train = n_samples * 5
139
140
141# Define the "observed" data in a workspace
142def build_ws(mu_observed, sigma):
143 # using a workspace for easier processing inside the class
144 ws = ROOT.RooWorkspace()
145 ws.factory(f"Gaussian::gauss(x[-5,15], mu[0,4], {sigma})")
146 ws.factory("Uniform::uniform(x)")
147 ws["mu"].setVal(mu_observed)
148 ws.Print("v")
149 obs_data = ws["gauss"].generate(ws["x"], 1000)
150 obs_data.SetName("obs_data")
151 ws.Import(obs_data, Silence=True)
152
153 return ws
154
155
156# The "observed" data
157mu_observed = 2.5
158sigma = 1.5
159workspace = build_ws(mu_observed, sigma)
160x_var = workspace["x"]
161mu_var = workspace["mu"]
162gauss = workspace["gauss"]
163uniform = workspace["uniform"]
164obs_data = workspace["obs_data"]
165
166# Training the model
167model = SBI(workspace)
168model.model_data("gauss", "x", "mu", n_samples_train)
169model.reference_data("uniform", "x", n_samples_train)
170model.preprocessing()
171model.train_classifier()
172sbi_model = model
173
174
175# Compute the likelihood ratio of the classifier for analysis purposes
176def learned_likelihood_ratio(x, mu):
177 n = max(len(x), len(mu))
178 X = np.zeros((n, 2))
179 X[:, 0] = x
180 X[:, 1] = mu
181 prob = sbi_model.classifier.predict_proba(X)[:, 1]
182 return prob / (1 - prob)
183
184
185# Compute the learned likelihood ratio
186llhr_learned = ROOT.RooFit.bindFunction("MyBinFunc", learned_likelihood_ratio, x_var, mu_var)
187
188# Compute the real likelihood ratio
189llhr_calc = ROOT.RooFormulaVar("llhr_calc", "x[0] / x[1]", [gauss, uniform])
190
191# Create the exact negative log likelihood functions for Gaussian model
192nll_gauss = gauss.createNLL(obs_data)
193ROOT.SetOwnership(nll_gauss, True)
194
195# Create the learned pdf and NLL sum based on the learned likelihood ratio
196pdf_learned = ROOT.RooWrapperPdf("learned_pdf", "learned_pdf", llhr_learned, True)
197
198nllr_learned = pdf_learned.createNLL(obs_data)
199ROOT.SetOwnership(nllr_learned, True)
200
201# Compute the morphed nll
202morphing(ROOT.RooMomentMorphFuncND.Linear)
203nll_morph = workspace["morph"].createNLL(obs_data)
204ROOT.SetOwnership(nll_morph, True)
205
206# Plot the negative logarithmic summed likelihood
207frame1 = mu_var.frame(Title="NLL of SBI vs. Morphing;mu;NLL", Range=(2.2, 2.8))
208nllr_learned.plotOn(frame1, LineColor="kP6Blue", ShiftToZero=True, Name="learned")
209nll_gauss.plotOn(frame1, LineColor="kP6Blue+1", ShiftToZero=True, Name="gauss")
210ROOT.RooAbsReal.setEvalErrorLoggingMode("Ignore") # Silence some warnings
211nll_morph.plotOn(frame1, LineColor="kP6Blue+2", ShiftToZero=True, Name="morphed")
212ROOT.RooAbsReal.setEvalErrorLoggingMode("PrintErrors")
213
214# Plot the likelihood functions
215frame2 = x_var.frame(Title="Likelihood ratio r(x|#mu=2.5);x;p_{gauss}/p_{uniform}")
216llhr_learned.plotOn(frame2, LineColor="kP6Blue", Name="learned_ratio")
217llhr_calc.plotOn(frame2, LineColor="kP6Blue+1", Name="exact")
218
219# Write the plots into one canvas to show, or into separate canvases for saving.
220single_canvas = True
221
222c = ROOT.TCanvas("", "", 1200 if single_canvas else 600, 600)
223if single_canvas:
224 c.Divide(2)
225 c.cd(1)
226 ROOT.gPad.SetLeftMargin(0.15)
227 frame1.GetYaxis().SetTitleOffset(1.8)
228frame1.Draw()
229
230legend1 = ROOT.TLegend(0.43, 0.63, 0.8, 0.87)
231legend1.SetFillColor(ROOT.kWhite)
232legend1.SetLineColor(ROOT.kWhite)
233legend1.SetTextSize(0.04)
234legend1.AddEntry("learned", "learned (SBI)", "L")
235legend1.AddEntry("gauss", "true NLL", "L")
236legend1.AddEntry("morphed", "moment morphing", "L")
237legend1.Draw()
238
239if single_canvas:
240 c.cd(2)
241 ROOT.gPad.SetLeftMargin(0.15)
242 frame2.GetYaxis().SetTitleOffset(1.8)
243else:
244 c.SaveAs("rf615_plot_1.png")
245 c = ROOT.TCanvas("", "", 600, 600)
246
247frame2.Draw()
248
249legend2 = ROOT.TLegend(0.53, 0.73, 0.87, 0.87)
250legend2.SetFillColor(ROOT.kWhite)
251legend2.SetLineColor(ROOT.kWhite)
252legend2.SetTextSize(0.04)
253legend2.AddEntry("learned_ratio", "learned (SBI)", "L")
254legend2.AddEntry("exact", "true ratio", "L")
255legend2.Draw()
256
257if not single_canvas:
258 c.SaveAs("rf615_plot_2.png")
259
260# Compute the minimum via minuit and display the results
261for nll in [nll_gauss, nllr_learned, nll_morph]:
262 minimizer = ROOT.RooMinimizer(nll)
263 minimizer.setErrorLevel(0.5) # Adjust the error level in the minimization to work with likelihoods
264 minimizer.setPrintLevel(-1)
265 minimizer.minimize("Minuit2")
266 result = minimizer.save()
267 ROOT.SetOwnership(result, True)
268 result.Print()
269
270del nll_morph
271del nllr_learned
272del nll_gauss
273del workspace
274
275import sys
276
277# Hack to bypass ClearProxiedObjects()
278del sys.modules["libROOTPythonizations"]
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len