Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
JSONTool.cxx
Go to the documentation of this file.
1/// \cond ROOFIT_INTERNAL
2
3/*
4 * Project: RooFit
5 * Authors:
6 * Carsten D. Burgard, DESY/ATLAS, Dec 2021
7 *
8 * Copyright (c) 2022, CERN
9 *
10 * Redistribution and use in source and binary forms,
11 * with or without modification, are permitted according to the terms
12 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
13 */
14
15/** \class RooStats::HistFactory::JSONTool
16 * \ingroup HistFactory
17The RooStats::HistFactory::JSONTool can be used to export a HistFactory
18measurement to HS3 JSON. It is not part of the public user interface, but a
19pretty useful tool for unit test, validating if a measurement object can be
20directly translated to HS3 without going over RooFit. If this translation turns
21out to be important for users, it can be considered in the future to make the
22class part of the public interface.
23*/
24
25#include "./JSONTool.h"
26
28
32
33#include <TH1.h>
34#include <TROOT.h>
35
37
38namespace {
39
40JSONNode &appendNamedChild(JSONNode &node, std::string const &name)
41{
42 static constexpr bool useListsInsteadOfDicts = true;
43
44 if (!useListsInsteadOfDicts) {
45 return node.set_map()[name].set_map();
46 }
47 JSONNode &child = node.set_seq().append_child().set_map();
48 child["name"] << name;
49 return child;
50}
51
52class Domains {
53public:
54 void readVariable(const char *name, double min, double max);
55
56 void writeJSON(RooFit::Detail::JSONNode &) const;
57
58private:
59 class ProductDomain {
60 public:
61 void readVariable(const char *name, double min, double max);
62
63 void writeJSON(RooFit::Detail::JSONNode &) const;
64
65 private:
66 struct ProductDomainElement {
67 double min = 0.0;
68 double max = 0.0;
69 };
70
71 std::map<std::string, ProductDomainElement> _map;
72 };
73
74 std::map<std::string, ProductDomain> _map;
75};
76
77void Domains::readVariable(const char *name, double min, double max)
78{
79 _map["default_domain"].readVariable(name, min, max);
80}
81
82void Domains::writeJSON(RooFit::Detail::JSONNode &node) const
83{
84 for (auto const &domain : _map) {
85 domain.second.writeJSON(appendNamedChild(node, domain.first));
86 }
87}
88void Domains::ProductDomain::readVariable(const char *name, double min, double max)
89{
90 auto &elem = _map[name];
91
92 elem.min = min;
93 elem.max = max;
94}
95void Domains::ProductDomain::writeJSON(RooFit::Detail::JSONNode &node) const
96{
97 node.set_map();
98 node["type"] << "product_domain";
99
100 auto &variablesNode = node["axes"];
101
102 for (auto const &item : _map) {
103 auto const &elem = item.second;
104 RooFit::Detail::JSONNode &varnode = appendNamedChild(variablesNode, item.first);
105 varnode["min"] << elem.min;
106 varnode["max"] << elem.max;
107 }
108}
109
110bool checkRegularBins(const TAxis &ax)
111{
112 double w = ax.GetXmax() - ax.GetXmin();
113 double bw = w / ax.GetNbins();
114 for (int i = 0; i <= ax.GetNbins(); ++i) {
115 if (std::abs(ax.GetBinUpEdge(i) - (ax.GetXmin() + (bw * i))) > w * 1e-6)
116 return false;
117 }
118 return true;
119}
120
121inline void writeAxis(JSONNode &axis, const TAxis &ax)
122{
123 bool regular = (!ax.IsVariableBinSize()) || checkRegularBins(ax);
124 axis.set_map();
125 if (regular) {
126 axis["nbins"] << ax.GetNbins();
127 axis["min"] << ax.GetXmin();
128 axis["max"] << ax.GetXmax();
129 } else {
130 auto &edges = axis["edges"];
131 edges.set_seq();
132 for (int i = 0; i <= ax.GetNbins(); ++i) {
133 edges.append_child() << ax.GetBinUpEdge(i);
134 }
135 }
136}
137
138std::vector<std::string> getObsnames(RooStats::HistFactory::Channel const &c)
139{
140 std::vector<std::string> obsnames{"obs_x_" + c.GetName(), "obs_y_" + c.GetName(), "obs_z_" + c.GetName()};
141 obsnames.resize(c.GetData().GetHisto()->GetDimension());
142 return obsnames;
143}
144
145void writeObservables(const TH1 &h, JSONNode &node, const std::vector<std::string> &varnames)
146{
147 // axes need to be ordered, so this is a sequence and not a map
148 auto &observables = node["axes"].set_seq();
149 auto &x = observables.append_child().set_map();
150 x["name"] << varnames[0];
151 writeAxis(x, *h.GetXaxis());
152 if (h.GetDimension() > 1) {
153 auto &y = observables.append_child().set_map();
154 y["name"] << varnames[1];
155 writeAxis(y, *(h.GetYaxis()));
156 if (h.GetDimension() > 2) {
157 auto &z = observables.append_child().set_map();
158 z["name"] << varnames[2];
159 writeAxis(z, *(h.GetZaxis()));
160 }
161 }
162}
163
164void exportSimpleHistogram(const TH1 &histo, JSONNode &node)
165{
166 node.set_seq();
167 const int nBins = histo.GetNbinsX() * histo.GetNbinsY() * histo.GetNbinsZ();
168 for (int i = 1; i <= nBins; ++i) {
169 const double val = histo.GetBinContent(i);
170 node.append_child() << val;
171 }
172}
173
174void exportHistogram(const TH1 &histo, JSONNode &node, const std::vector<std::string> &varnames,
175 const TH1 *errH = nullptr, bool doWriteObservables = true, bool writeErrors = true)
176{
177 node.set_map();
178 auto &weights = node["contents"].set_seq();
179 JSONNode *errors = nullptr;
180 if (writeErrors) {
181 errors = &node["errors"].set_seq();
182 }
183 if (doWriteObservables) {
184 writeObservables(histo, node, varnames);
185 }
186 const int nBins = histo.GetNbinsX() * histo.GetNbinsY() * histo.GetNbinsZ();
187 for (int i = 1; i <= nBins; ++i) {
188 const double val = histo.GetBinContent(i);
189 weights.append_child() << val;
190 if (writeErrors) {
191 const double err = errH ? val * errH->GetBinContent(i) : histo.GetBinError(i);
192 errors->append_child() << err;
193 }
194 }
195}
196
197void exportSample(const RooStats::HistFactory::Sample &sample, JSONNode &channelNode,
198 std::vector<std::string> const &obsnames)
199{
200 auto &s = appendNamedChild(channelNode["samples"], sample.GetName());
201
202 if (!sample.GetOverallSysList().empty()) {
203 auto &modifiers = s["modifiers"].set_seq();
204 for (const auto &sys : sample.GetOverallSysList()) {
205 auto &node = modifiers.append_child().set_map();
206 node["name"] << sys.GetName();
207 node["type"] << "normsys";
208 auto &data = node["data"];
209 data.set_map();
210 data["lo"] << sys.GetLow();
211 data["hi"] << sys.GetHigh();
212 }
213 }
214
215 if (!sample.GetNormFactorList().empty()) {
216 auto &modifiers = s["modifiers"].set_seq();
217 for (const auto &nf : sample.GetNormFactorList()) {
218 auto &mod = modifiers.append_child().set_map();
219 mod["name"] << nf.GetName();
220 mod["type"] << "normfactor";
221 }
222 auto &mod = modifiers.append_child().set_map();
223 mod["name"] << "Lumi";
224 mod["type"] << "normfactor";
225 mod["constraint_name"] << "lumiConstraint";
226 }
227
228 if (!sample.GetHistoSysList().empty()) {
229 auto &modifiers = s["modifiers"].set_seq();
230 for (size_t i = 0; i < sample.GetHistoSysList().size(); ++i) {
231 auto &sys = sample.GetHistoSysList()[i];
232 auto &node = modifiers.append_child().set_map();
233 node["name"] << sys.GetName();
234 node["type"] << "histosys";
235 auto &data = node["data"].set_map();
236 exportSimpleHistogram(*sys.GetHistoLow(), data["lo"].set_map()["contents"]);
237 exportSimpleHistogram(*sys.GetHistoHigh(), data["hi"].set_map()["contents"]);
238 }
239 }
240
241 if (!sample.GetShapeSysList().empty()) {
242 auto &modifiers = s["modifiers"].set_seq();
243 for (size_t i = 0; i < sample.GetShapeSysList().size(); ++i) {
244 auto &sys = sample.GetShapeSysList()[i];
245 auto &node = modifiers.append_child().set_map();
246 node["name"] << sys.GetName();
247 node["type"] << "shapesys";
248 if (sys.GetConstraintType() == RooStats::HistFactory::Constraint::Gaussian)
249 node["constraint"] << "Gauss";
250 if (sys.GetConstraintType() == RooStats::HistFactory::Constraint::Poisson)
251 node["constraint"] << "Poisson";
252 auto &data = node["data"].set_map();
253 exportSimpleHistogram(*sys.GetErrorHist(), data["vals"]);
254 }
255 }
256
257 auto &tags = s["dict"].set_map();
258 tags["normalizeByTheory"] << sample.GetNormalizeByTheory();
259
260 if (sample.GetStatError().GetActivate()) {
261 RooStats::HistFactory::JSONTool::activateStatError(s);
262 }
263
264 auto &data = s["data"];
265 const bool useStatError = sample.GetStatError().GetActivate() && sample.GetStatError().GetUseHisto();
266 TH1 const *errH = useStatError ? sample.GetStatError().GetErrorHist() : nullptr;
267
268 if (!channelNode.has_child("axes")) {
269 writeObservables(*sample.GetHisto(), channelNode, obsnames);
270 }
271 exportHistogram(*sample.GetHisto(), data, obsnames, errH, false);
272}
273
274void exportChannel(const RooStats::HistFactory::Channel &c, JSONNode &ch)
275{
276 ch["type"] << "histfactory_dist";
277
278 auto &staterr = ch["statError"].set_map();
279 staterr["relThreshold"] << c.GetStatErrorConfig().GetRelErrorThreshold();
280 staterr["constraint"] << RooStats::HistFactory::Constraint::Name(c.GetStatErrorConfig().GetConstraintType());
281
282 const std::vector<std::string> obsnames = getObsnames(c);
283
284 for (const auto &s : c.GetSamples()) {
285 exportSample(s, ch, obsnames);
286 }
287}
288
289void setAttribute(JSONNode &rootnode, const std::string &obj, const std::string &attrib)
290{
291 auto node = &rootnode.get("misc", "ROOT_internal", "attributes").set_map()[obj].set_map();
292 auto &tags = (*node)["tags"];
293 tags.set_seq();
294 tags.append_child() << attrib;
295}
296
297void exportMeasurement(RooStats::HistFactory::Measurement &measurement, JSONNode &rootnode, Domains &domains)
298{
299 using namespace RooStats::HistFactory;
300
301 for (const auto &ch : measurement.GetChannels()) {
302 if (!ch.CheckHistograms())
303 throw std::runtime_error("unable to export histograms, please call CollectHistograms first");
304 }
305
306 // preprocess functions
307 if (!measurement.GetFunctionObjects().empty()) {
308 auto &funclist = rootnode["functions"];
309 for (const auto &func : measurement.GetFunctionObjects()) {
310 auto &f = appendNamedChild(funclist, func.GetName());
311 f["name"] << func.GetName();
312 f["expression"] << func.GetExpression();
313 f["dependents"] << func.GetDependents();
314 f["command"] << func.GetCommand();
315 }
316 }
317
318 auto &pdflist = rootnode["distributions"];
319
320 auto &analysisNode = appendNamedChild(rootnode["analyses"], "simPdf");
321 analysisNode["domains"].set_seq().append_child() << "default_domain";
322
323 auto &analysisPois = analysisNode["parameters_of_interest"].set_seq();
324
325 for (const auto &poi : measurement.GetPOIList()) {
326 analysisPois.append_child() << poi;
327 }
328
329 analysisNode["likelihood"] << measurement.GetName();
330
331 auto &likelihoodNode = appendNamedChild(rootnode["likelihoods"], measurement.GetName());
332 likelihoodNode["distributions"].set_seq();
333 likelihoodNode["data"].set_seq();
334
335 // the simpdf
336 for (const auto &c : measurement.GetChannels()) {
337
338 auto pdfName = std::string("model_") + c.GetName();
339 auto realSumPdfName = c.GetName() + std::string("_model");
340
341 likelihoodNode["distributions"].append_child() << pdfName;
342 likelihoodNode["data"].append_child() << std::string("obsData_") + c.GetName();
343 exportChannel(c, appendNamedChild(pdflist, pdfName));
344 setAttribute(rootnode, realSumPdfName, "BinnedLikelihood");
345 }
346
347 struct VariableInfo {
348 double val = 0.0;
349 double minVal = -5.0;
350 double maxVal = 5.0;
351 bool isConstant = false;
352 bool writeDomain = true;
353 };
354 std::unordered_map<std::string, VariableInfo> variables;
355
356 for (const auto &channel : measurement.GetChannels()) {
357 for (const auto &sample : channel.GetSamples()) {
358 for (const auto &norm : sample.GetNormFactorList()) {
359 auto &info = variables[norm.GetName()];
360 info.val = norm.GetVal();
361 info.minVal = norm.GetLow();
362 info.maxVal = norm.GetHigh();
363 }
364 for (const auto &sys : sample.GetOverallSysList()) {
365 variables[std::string("alpha_") + sys.GetName()] = VariableInfo{};
366 }
367 }
368 }
369 for (const auto &sys : measurement.GetConstantParams()) {
370 auto &info = variables[sys];
371 info.isConstant = true;
372 bool isGamma = sys.find("gamma_") != std::string::npos;
373 // Gammas are 1.0 by default, alphas are 0.0
374 info.val = isGamma ? 1.0 : 0.0;
375 // For the gamma parameters, HistFactory will figure out the ranges
376 // itself based on the template bin contents and errors.
377 info.writeDomain = !isGamma;
378 }
379
380 // the lumi variables
381 {
382 double nominal = measurement.GetLumi();
383 double error = measurement.GetLumi() * measurement.GetLumiRelErr();
384
385 auto &info1 = variables["Lumi"];
386 info1.val = nominal;
387 info1.minVal = 0;
388 info1.maxVal = 10 * nominal;
389 info1.isConstant = true;
390
391 auto &info2 = variables["nominalLumi"];
392 info2.val = nominal;
393 info2.minVal = 0;
394 info2.maxVal = nominal + 10 * error;
395 info2.isConstant = true;
396 }
397
398 JSONNode &varlist = appendNamedChild(rootnode["parameter_points"], "default_values")["parameters"];
399 for (auto const &item : variables) {
400 std::string const &parname = item.first;
401 VariableInfo const &info = item.second;
402
403 auto &v = appendNamedChild(varlist, parname);
404 v["value"] << info.val;
405 if (info.isConstant)
406 v["const"] << true;
407 if (info.writeDomain) {
408 domains.readVariable(parname.c_str(), info.minVal, info.maxVal);
409 }
410 }
411
412 // the data
413 auto &child1 = rootnode.get("misc", "ROOT_internal", "combined_datasets").set_map()["obsData"].set_map();
414 auto &child2 = rootnode.get("misc", "ROOT_internal", "combined_distributions").set_map()["simPdf"].set_map();
415
416 child1["index_cat"] << "channelCat";
417 auto &labels1 = child1["labels"].set_seq();
418 auto &indices1 = child1["indices"].set_seq();
419
420 child2["index_cat"] << "channelCat";
421 auto &labels2 = child2["labels"].set_seq();
422 auto &indices2 = child2["indices"].set_seq();
423 auto &pdfs2 = child2["distributions"].set_seq();
424
425 std::vector<std::string> channelNames;
426 for (const auto &c : measurement.GetChannels()) {
427 labels1.append_child() << c.GetName();
428 indices1.append_child() << int(channelNames.size());
429 labels2.append_child() << c.GetName();
430 indices2.append_child() << int(channelNames.size());
431 pdfs2.append_child() << (std::string("model_") + c.GetName());
432
433 JSONNode &dataOutput = appendNamedChild(rootnode["data"], std::string("obsData_") + c.GetName());
434 dataOutput["type"] << "binned";
435
436 exportHistogram(*c.GetData().GetHisto(), dataOutput, getObsnames(c));
437 channelNames.push_back(c.GetName());
438 }
439
440 auto &modelConfigAux = rootnode.get("misc", "ROOT_internal", "ModelConfigs", "simPdf").set_map();
441 modelConfigAux["combined_data_name"] << "obsData";
442 modelConfigAux["pdfName"] << "simPdf";
443 modelConfigAux["mcName"] << "ModelConfig";
444
445 // Finally write lumi constraint
446 auto &lumiConstraint = appendNamedChild(pdflist, "lumiConstraint");
447 lumiConstraint["mean"] << "nominalLumi";
448 lumiConstraint["sigma"] << (measurement.GetLumi() * measurement.GetLumiRelErr());
449 lumiConstraint["type"] << "gaussian_dist";
450 lumiConstraint["x"] << "Lumi";
451}
452
453std::unique_ptr<RooFit::Detail::JSONTree> createNewJSONTree()
454{
455 std::unique_ptr<RooFit::Detail::JSONTree> tree = RooFit::Detail::JSONTree::create();
456 JSONNode &n = tree->rootnode();
457 n.set_map();
458 auto &metadata = n["metadata"].set_map();
459
460 // add the mandatory hs3 version number
461 metadata["hs3_version"] << "0.1.90";
462
463 // Add information about the ROOT version that was used to generate this file
464 auto &rootInfo = appendNamedChild(metadata["packages"], "ROOT");
465 std::string versionName = gROOT->GetVersion();
466 // We want to consistently use dots such that the version name can be easily
467 // digested automatically.
468 std::replace(versionName.begin(), versionName.end(), '/', '.');
469 rootInfo["version"] << versionName;
470
471 return tree;
472}
473
474} // namespace
475
476void RooStats::HistFactory::JSONTool::PrintJSON(std::ostream &os)
477{
478 std::unique_ptr<RooFit::Detail::JSONTree> tree = createNewJSONTree();
479 auto &rootnode = tree->rootnode();
480 Domains domains;
481 exportMeasurement(_measurement, rootnode, domains);
482 domains.writeJSON(rootnode["domains"]);
483 rootnode.writeJSON(os);
484}
485void RooStats::HistFactory::JSONTool::PrintJSON(std::string const &filename)
486{
487 std::ofstream out(filename);
488 this->PrintJSON(out);
489}
490
491void RooStats::HistFactory::JSONTool::PrintYAML(std::ostream &os)
492{
493 std::unique_ptr<RooFit::Detail::JSONTree> tree = createNewJSONTree();
494 auto &rootnode = tree->rootnode().set_map();
495 Domains domains;
496 exportMeasurement(_measurement, rootnode, domains);
497 domains.writeJSON(rootnode["domains"]);
498 rootnode.writeYML(os);
499}
500
501void RooStats::HistFactory::JSONTool::PrintYAML(std::string const &filename)
502{
503 std::ofstream out(filename);
504 this->PrintYAML(out);
505}
506
507void RooStats::HistFactory::JSONTool::activateStatError(JSONNode &sampleNode)
508{
509 auto &node = sampleNode["modifiers"].set_seq().append_child().set_map();
510 node["name"] << "mcstat";
511 node["type"] << "staterror";
512}
513
514/// \endcond
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define h(i)
Definition RSha256.hxx:106
#define e(i)
Definition RSha256.hxx:103
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t child
char name[80]
Definition TGX11.cxx:110
#define gROOT
Definition TROOT.h:406
virtual JSONNode & set_map()=0
static std::unique_ptr< JSONTree > create()
This class encapsulates all information for the statistical interpretation of one experiment.
Definition Channel.h:30
The RooStats::HistFactory::Measurement class can be used to construct a model by combining multiple R...
Definition Measurement.h:33
double GetLumiRelErr()
retrieve relative uncertainty on luminosity
Definition Measurement.h:93
std::vector< RooStats::HistFactory::PreprocessFunction > & GetFunctionObjects()
get vector of defined function objects
Definition Measurement.h:77
double GetLumi()
retrieve integrated luminosity
Definition Measurement.h:91
std::vector< RooStats::HistFactory::OverallSys > & GetOverallSysList()
Definition Sample.h:108
std::string GetName() const
get name of sample
Definition Sample.h:82
const TH1 * GetHisto() const
Definition Sample.cxx:89
RooStats::HistFactory::StatError & GetStatError()
Definition Sample.h:124
std::vector< RooStats::HistFactory::NormFactor > & GetNormFactorList()
Definition Sample.h:109
std::vector< RooStats::HistFactory::HistoSys > & GetHistoSysList()
Definition Sample.h:110
bool GetNormalizeByTheory() const
does the normalization scale with luminosity
Definition Sample.h:78
std::vector< RooStats::HistFactory::ShapeSys > & GetShapeSysList()
Definition Sample.h:112
const TH1 * GetErrorHist() const
Class to manage histogram axis.
Definition TAxis.h:31
Bool_t IsVariableBinSize() const
Definition TAxis.h:142
Double_t GetXmax() const
Definition TAxis.h:140
Double_t GetXmin() const
Definition TAxis.h:139
Int_t GetNbins() const
Definition TAxis.h:125
virtual Double_t GetBinUpEdge(Int_t bin) const
Return up edge of bin.
Definition TAxis.cxx:528
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:59
virtual Int_t GetNbinsY() const
Definition TH1.h:298
virtual Double_t GetBinError(Int_t bin) const
Return value of error associated to bin number bin.
Definition TH1.cxx:9063
virtual Int_t GetNbinsZ() const
Definition TH1.h:299
virtual Int_t GetNbinsX() const
Definition TH1.h:297
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition TH1.cxx:5061
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
std::string Name(Type type)
__device__ AFloat max(AFloat x, AFloat y)
Definition Kernels.cuh:207
void variables(TString dataset, TString fin="TMVA.root", TString dirName="InputVariables_Id", TString title="TMVA Input Variables", Bool_t isRegression=kFALSE, Bool_t useTMVAStyle=kTRUE)