Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
hist103_THnSparse_hist.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_hist
3/// Evaluate the performance of THnSparse vs TH1/2/3/nF
4/// for different numbers of dimensions and bins per dimension.
5///
6/// The script calculates the bandwidth for filling and retrieving
7/// bin contents (in million entries per second) for these two
8/// histogramming techniques, where "seconds" is CPU and real time.
9///
10/// The first line of the plots contains the bandwidth based on the
11/// CPU time (THnSpase, TH1/2/3/nF*, ratio), the second line shows
12/// the plots for real time, and the third line shows the fraction of
13/// filled bins and memory used by THnSparse vs. TH1/2/3/nF.
14///
15/// The timing depends on the distribution and the amount of entries
16/// in the histograms; here, a Gaussian distribution (center is
17/// contained in the histograms) is used to fill each histogram with
18/// 1000 entries. The filling and reading is repeated until enough
19/// statistics have been collected.
20///
21/// tutorials/io/tree/tree143_drawsparse.C shows an example for visualizing a
22/// THnSparse. It creates a TTree which is then drawn using
23/// TParallelCoord.
24///
25/// This macro should be run in compiled mode due to the many nested
26/// loops that force CLING to disable its optimization. If run
27/// interpreted one would not benchmark THnSparse but CLING.
28///
29/// Run as:
30/// ~~~{.cpp}
31/// root[0] .L $ROOTSYS/tutorials/hist/hist103_THnSparse_hist.C+
32/// root[1] hist103_THnSparse_hist()
33/// ~~~
34///
35/// \macro_code
36///
37/// \date October 2023
38/// \author Axel.Naumann
39
40#include "TH1.h"
41#include "TH2.h"
42#include "TH3.h"
43#include "THn.h"
44#include "THnSparse.h"
45#include "TStopwatch.h"
46#include "TRandom.h"
47#include "TCanvas.h"
48#include "TFile.h"
49#include "TStyle.h"
50#include "TSystem.h"
51
52#ifndef INT_MAX
53#define INT_MAX std::numeric_limits<int>::max()
54#endif
55
56class TTimeHists {
57public:
58 enum EHist {
59 kHist,
60 kSparse,
62 };
63 enum ETime {
64 kReal,
65 kCPU,
67 };
68 TTimeHists(Int_t dim, Int_t bins, Long_t num)
69 : fValue(nullptr), fDim(dim), fBins(bins), fNum(num), fSparse(nullptr), fHist(nullptr), fHn(nullptr)
70 {
71 }
73 bool Run();
74 Double_t GetTime(EHist hist, ETime time) const
75 {
76 if (time == kReal)
77 return fTime[hist][0];
78 return fTime[hist][1];
79 }
80 static void SetDebug(Int_t lvl) { fgDebug = lvl; }
81 THnSparse *GetSparse() const { return fSparse; }
82
83protected:
84 void Fill(EHist hist);
85 Double_t Check(EHist hist);
86 void SetupHist(EHist hist);
87 void NextValues();
88 void SetupValues();
89
90private:
91 Double_t *fValue;
92 Int_t fDim;
93 Int_t fBins;
94 Long_t fNum;
95 Double_t fTime[2][2];
97 TH1 *fHist;
98 THn *fHn;
99 static Int_t fgDebug;
100};
101
102Int_t TTimeHists::fgDebug = 0;
103
104TTimeHists::~TTimeHists()
105{
106 delete[] fValue;
107 delete fSparse;
108 delete fHist;
109 delete fHn;
110}
111
112bool TTimeHists::Run()
113{
114 // run all tests with current settings, and check for identity of content.
115
116 Double_t check[2];
117 Long64_t rep[2];
118 for (int h = 0; h < 2; ++h) {
119 rep[h] = 0;
120 SetupValues();
121 try {
123 w.Start();
124 SetupHist((EHist)h);
125 w.Stop();
126 do {
127 w.Start(kFALSE);
128 Fill((EHist)h);
129 check[h] = Check((EHist)h);
130 w.Stop();
131 ++rep[h];
132 } while ((!h && w.RealTime() < 0.1) || (h && rep[0] > 0 && rep[1] < rep[0]));
133
134 fTime[h][0] = (1. * fNum * rep[h]) / w.RealTime() / 1E6;
135 fTime[h][1] = (1. * fNum * rep[h]) / w.CpuTime() / 1E6;
136
137 if (h == 1 && (fTime[h][0] > 1E20 || fTime[h][1] > 1E20)) {
138 do {
139 // some more cycles:
140 w.Start(kFALSE);
141 Fill((EHist)h);
142 Check((EHist)h);
143 w.Stop();
144 ++rep[h];
145 } while (w.RealTime() < 0.1);
146
147 fTime[h][0] = (1. * fNum * rep[h]) / w.RealTime() / 1E6;
148 fTime[h][1] = (1. * fNum * rep[h]) / w.CpuTime() / 1E6;
149 }
150
151 if (fTime[h][0] > 1E20)
152 fTime[h][0] = 1E20;
153 if (fTime[h][1] > 1E20)
154 fTime[h][1] = 1E20;
155 } catch (std::exception &) {
156 fTime[h][0] = fTime[h][1] = -1.;
157 check[h] = -1.; // can never be < 1 without exception
158 rep[h] = -1;
159 }
160 }
161 if (check[0] != check[1])
162 if (check[0] != -1.)
163 printf("ERROR: mismatch of histogram (%g) and sparse histogram (%g) for dim=%d, bins=%d!\n", check[0],
164 check[1], fDim, fBins);
165 // else
166 // printf("ERROR: cannot allocate histogram for dim=%d, bins=%d - out of memory!\n",
167 // fDim, fBins);
168 return (check[0] == check[1]);
169}
170
171void TTimeHists::NextValues()
172{
173 for (Int_t d = 0; d < fDim; ++d)
174 fValue[d] = gRandom->Gaus() / 4.;
175}
176
177void TTimeHists::SetupValues()
178{
179 // define fValue
180 if (!fValue)
181 fValue = new Double_t[fDim];
182 gRandom->SetSeed(42);
183}
184
185void TTimeHists::Fill(EHist hist)
186{
187 for (Long_t n = 0; n < fNum; ++n) {
188 NextValues();
189 if (fgDebug > 1) {
190 printf("%ld: fill %s", n, hist == kHist ? (fDim < 4 ? "hist" : "arr") : "sparse");
191 for (Int_t d = 0; d < fDim; ++d)
192 printf("[%g]", fValue[d]);
193 printf("\n");
194 }
195 if (hist == kHist) {
196 switch (fDim) {
197 case 1: fHist->Fill(fValue[0]); break;
198 case 2: ((TH2F *)fHist)->Fill(fValue[0], fValue[1]); break;
199 case 3: ((TH3F *)fHist)->Fill(fValue[0], fValue[1], fValue[2]); break;
200 default: fHn->Fill(fValue); break;
201 }
202 } else {
203 fSparse->Fill(fValue);
204 }
205 }
206}
207
208void TTimeHists::SetupHist(EHist hist)
209{
210 if (hist == kHist) {
211 switch (fDim) {
212 case 1: fHist = new TH1F("h1", "h1", fBins, -1., 1.); break;
213 case 2: fHist = new TH2F("h2", "h2", fBins, -1., 1., fBins, -1., 1.); break;
214 case 3: fHist = new TH3F("h3", "h3", fBins, -1., 1., fBins, -1., 1., fBins, -1., 1.); break;
215 default: {
218 Int_t size = 1;
219 for (Int_t d = 0; d < fDim; ++d) {
220 if ((Int_t)(size * sizeof(Float_t)) > INT_MAX / (fBins + 2) ||
221 (meminfo.fMemFree > 0 && meminfo.fMemFree / 2 < (Int_t)(size * sizeof(Float_t) / 1000 / 1000)))
222 throw std::bad_alloc();
223 size *= (fBins + 2);
224 }
225 if (meminfo.fMemFree > 0 && meminfo.fMemFree / 2 < (Int_t)(size * sizeof(Float_t) / 1000 / 1000))
226 throw std::bad_alloc();
227 Int_t *bins = new Int_t[fDim];
228 Double_t *xmin = new Double_t[fDim];
229 Double_t *xmax = new Double_t[fDim];
230 for (Int_t d = 0; d < fDim; ++d) {
231 bins[d] = fBins;
232 xmin[d] = -1.;
233 xmax[d] = 1.;
234 }
235 fHn = new THnF("hn", "hn", fDim, bins, xmin, xmax);
236 }
237 }
238 } else {
239 Int_t *bins = new Int_t[fDim];
240 Double_t *xmin = new Double_t[fDim];
241 Double_t *xmax = new Double_t[fDim];
242 for (Int_t d = 0; d < fDim; ++d) {
243 bins[d] = fBins;
244 xmin[d] = -1.;
245 xmax[d] = 1.;
246 }
247 fSparse = new THnSparseF("hs", "hs", fDim, bins, xmin, xmax);
248 }
249}
250
251Double_t TTimeHists::Check(EHist hist)
252{
253 // Check bin content of all bins
254 Double_t check = 0.;
255 Int_t *x = new Int_t[fDim];
256 memset(x, 0, sizeof(Int_t) * fDim);
257
258 if (hist == kHist) {
259 Long_t idx = 0;
260 Long_t size = 1;
261 for (Int_t d = 0; d < fDim; ++d)
262 size *= (fBins + 2);
263 while (x[0] <= fBins + 1) {
264 Double_t v = -1.;
265 if (fDim < 4) {
266 Long_t histidx = x[0];
267 if (fDim == 2)
268 histidx = fHist->GetBin(x[0], x[1]);
269 else if (fDim == 3)
270 histidx = fHist->GetBin(x[0], x[1], x[2]);
271 v = fHist->GetBinContent(histidx);
272 } else
273 v = fHn->GetBinContent(x);
274 Double_t checkx = 0.;
275 if (v)
276 for (Int_t d = 0; d < fDim; ++d)
277 checkx += x[d];
278 check += checkx * v;
279
280 if (fgDebug > 2 || (fgDebug > 1 && v)) {
281 printf("%s%d", fDim < 4 ? "hist" : "arr", fDim);
282 for (Int_t d = 0; d < fDim; ++d)
283 printf("[%d]", x[d]);
284 printf(" = %g\n", v);
285 }
286
287 ++x[fDim - 1];
288 // Adjust the bin idx
289 // no wrapping for dim 0 - it's what we break on!
290 for (Int_t d = fDim - 1; d > 0; --d) {
291 if (x[d] > fBins + 1) {
292 x[d] = 0;
293 ++x[d - 1];
294 }
295 }
296 ++idx;
297 } // while next bin
298 } else {
299 for (Long64_t i = 0; i < fSparse->GetNbins(); ++i) {
300 Double_t v = fSparse->GetBinContent(i, x);
301 Double_t checkx = 0.;
302 for (Int_t d = 0; d < fDim; ++d)
303 checkx += x[d];
304 check += checkx * v;
305
306 if (fgDebug > 1) {
307 printf("sparse%d", fDim);
308 for (Int_t d = 0; d < fDim; ++d)
309 printf("[%d]", x[d]);
310 printf(" = %g\n", v);
311 }
312 }
313 }
314 check /= fNum;
315 if (fgDebug > 0)
316 printf("check %s%d = %g\n", hist == kHist ? (fDim < 4 ? "hist" : "arr") : "sparse", fDim, check);
317 return check;
318}
319
321{
322// Exclude this macro also for Cling as this script requires exception support
323// which is not supported in Cling as of v6.00/00.
324#if defined(__CLING__)
325 printf("Please run this script in compiled mode by running \".x hist103_THnSparse_hist.C+\"\n");
326 return;
327#endif
328
329 TH2F *htime[TTimeHists::kNumHist][TTimeHists::kNumTime];
330 for (int h = 0; h < TTimeHists::kNumHist; ++h)
331 for (int t = 0; t < TTimeHists::kNumTime; ++t) {
332 TString name("htime_");
333 if (h == 0)
334 name += "arr";
335 else
336 name += "sp";
337 if (t == 0)
338 name += "_r";
339
340 TString title;
341 title.Form("Throughput (fill,get) %s (%s, 1M entries/sec);dim;bins;1M entries/sec",
342 h == 0 ? "TH1/2/3/nF" : "THnSparseF", t == 0 ? "real" : "CPU");
343 htime[h][t] = new TH2F(name, title, 6, 0.5, 6.5, 10, 5, 105);
344 }
345
347 new TH2F("hsparse_mem", "Fractional memory usage;dim;bins;fraction of memory used", 6, 0.5, 6.5, 10, 5, 105);
348 TH2F *hsparse_bins = new TH2F("hsparse_bins", "Fractional number of used bins;dim;bins;fraction of filled bins", 6,
349 0.5, 6.5, 10, 5, 105);
350
351 // TTimeHists::SetDebug(2);
352 Double_t max = -1.;
353 for (Int_t dim = 1; dim < 7; ++dim) {
354 printf("Processing dimension %d", dim);
355 for (Int_t bins = 10; bins <= 100; bins += 10) {
356 TTimeHists timer(dim, bins, /*num*/ 1000);
357 timer.Run();
358 for (int h = 0; h < TTimeHists::kNumHist; ++h) {
359 for (int t = 0; t < TTimeHists::kNumTime; ++t) {
360 Double_t time = timer.GetTime((TTimeHists::EHist)h, (TTimeHists::ETime)t);
361 if (time >= 0.)
362 htime[h][t]->Fill(dim, bins, time);
363 }
364 }
365 hsparse_mem->Fill(dim, bins, timer.GetSparse()->GetSparseFractionMem());
366 hsparse_bins->Fill(dim, bins, timer.GetSparse()->GetSparseFractionBins());
367
368 if (max < timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal))
369 max = timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal);
370 printf(".");
371 fflush(stdout);
372 }
373 printf(" done\n");
374 }
375
376 Double_t markersize = 2.5;
377 hsparse_mem->SetMarkerSize(markersize);
378 hsparse_bins->SetMarkerSize(markersize);
379
380 TH2F *htime_ratio[TTimeHists::kNumTime];
381 for (int t = 0; t < TTimeHists::kNumTime; ++t) {
382 const char *name = t ? "htime_ratio" : "htime_ratio_r";
383 htime_ratio[t] = (TH2F *)htime[TTimeHists::kSparse][t]->Clone(name);
384 TString title;
385 title.Form("Relative speed improvement (%s, 1M entries/sec): sparse/hist;dim;bins;#Delta 1M entries/sec",
386 t == 0 ? "real" : "CPU");
387 htime_ratio[t]->SetTitle(title);
388 htime_ratio[t]->Divide(htime[TTimeHists::kHist][t]);
389 htime_ratio[t]->SetMinimum(0.1);
390 htime_ratio[t]->SetMarkerSize(markersize);
391 }
392
393 TFile *f = new TFile("sparsehist.root", "RECREATE");
394
395 TCanvas *canv = new TCanvas("c", "c");
396 canv->Divide(3, 3);
397
398 gStyle->SetPalette(8, nullptr);
400 gStyle->SetOptStat(0);
401 const char *opt = "TEXT COL";
402
403 for (int t = 0; t < TTimeHists::kNumTime; ++t) {
404 for (int h = 0; h < TTimeHists::kNumHist; ++h) {
405 htime[h][t]->SetMaximum(max);
406 htime[h][t]->SetMarkerSize(markersize);
407 canv->cd(1 + h + 3 * t);
408 htime[h][t]->Draw(opt);
409 htime[h][t]->Write();
410 }
411 canv->cd(3 + t * 3);
412 htime_ratio[t]->Draw(opt);
413 gPad->SetLogz();
414 htime_ratio[t]->Write();
415 }
416
417 canv->cd(7);
418 hsparse_mem->Draw(opt);
419 canv->cd(8);
420 hsparse_bins->Draw(opt);
421 hsparse_mem->Write();
422 hsparse_bins->Write();
423
424 canv->Write();
425
426 delete f;
427}
#define d(i)
Definition RSha256.hxx:102
#define f(i)
Definition RSha256.hxx:104
#define h(i)
Definition RSha256.hxx:106
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
int Int_t
Definition RtypesCore.h:45
long Long_t
Definition RtypesCore.h:54
float Float_t
Definition RtypesCore.h:57
constexpr Bool_t kFALSE
Definition RtypesCore.h:94
double Double_t
Definition RtypesCore.h:59
long long Long64_t
Definition RtypesCore.h:69
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
char name[80]
Definition TGX11.cxx:110
float xmin
float xmax
THnSparseT< TArrayF > THnSparseF
Definition THnSparse.h:222
THnT< Float_t > THnF
Definition THn.h:243
R__EXTERN TRandom * gRandom
Definition TRandom.h:62
R__EXTERN TStyle * gStyle
Definition TStyle.h:442
R__EXTERN TSystem * gSystem
Definition TSystem.h:572
#define gPad
The Canvas class.
Definition TCanvas.h:23
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:131
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:645
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:59
2-D histogram with a float per channel (see TH1 documentation)
Definition TH2.h:307
3-D histogram with a float per channel (see TH1 documentation)
Definition TH3.h:317
Efficient multidimensional histogram.
Definition THnSparse.h:37
Multidimensional histogram.
Definition THn.h:30
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
Definition TRandom.cxx:275
virtual void SetSeed(ULong_t seed=0)
Set the random generator seed.
Definition TRandom.cxx:615
Stopwatch class.
Definition TStopwatch.h:28
Basic string class.
Definition TString.h:139
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition TString.cxx:2356
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition TStyle.cxx:1642
void SetPaintTextFormat(const char *format="g")
Definition TStyle.h:390
void SetPalette(Int_t ncolors=kBird, Int_t *colors=nullptr, Float_t alpha=1.)
See TColor::SetPalette.
Definition TStyle.cxx:1890
virtual int GetMemInfo(MemInfo_t *info) const
Returns ram and swap memory usage info into the MemInfo_t structure.
Definition TSystem.cxx:2479
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16