Logo ROOT  
Reference Guide
sparsehist.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_hist
3/// Evaluate the performance of THnSparse vs TH1/2/3/nF
4/// for different numbers of dimensions and bins per dimension.
5///
6/// The script calculates the bandwidth for filling and retrieving
7/// bin contents (in million entries per second) for these two
8/// histogramming techniques, where "seconds" is CPU and real time.
9///
10/// The first line of the plots contains the bandwidth based on the
11/// CPU time (THnSpase, TH1/2/3/nF*, ratio), the second line shows
12/// the plots for real time, and the third line shows the fraction of
13/// filled bins and memory used by THnSparse vs. TH1/2/3/nF.
14///
15/// The timing depends on the distribution and the amount of entries
16/// in the histograms; here, a Gaussian distribution (center is
17/// contained in the histograms) is used to fill each histogram with
18/// 1000 entries. The filling and reading is repeated until enough
19/// statistics have been collected.
20///
21/// tutorials/tree/drawsparse.C shows an example for visualizing a
22/// THnSparse. It creates a TTree which is then drawn using
23/// TParallelCoord.
24///
25/// This macro should be run in compiled mode due to the many nested
26/// loops that force CINT to disable its optimization. If run
27/// interpreted one would not benchmark THnSparse but CINT.
28///
29/// Run as:
30/// ~~~{.cpp}
31/// root[0] .L $ROOTSYS/tutorials/hist/sparsehist.C+
32/// root[1] sparsehist()
33/// ~~~
34///
35/// \macro_code
36///
37/// \author Axel.Naumann
38
39#include "TH1.h"
40#include "TH2.h"
41#include "TH3.h"
42#include "THn.h"
43#include "THnSparse.h"
44#include "TStopwatch.h"
45#include "TRandom.h"
46#include "TCanvas.h"
47#include "TFile.h"
48#include "TStyle.h"
49#include "TSystem.h"
50
51#ifndef INT_MAX
52#define INT_MAX std::numeric_limits<int>::max()
53#endif
54
55class TTimeHists {
56public:
57 enum EHist { kHist, kSparse, kNumHist };
58 enum ETime { kReal, kCPU, kNumTime };
59 TTimeHists(Int_t dim, Int_t bins, Long_t num):
60 fValue(0), fDim(dim), fBins(bins), fNum(num),
61 fSparse(0), fHist(0), fHn(0) {}
62 ~TTimeHists();
63 bool Run();
64 Double_t GetTime(EHist hist, ETime time) const {
65 if (time == kReal) return fTime[hist][0];
66 return fTime[hist][1]; }
67 static void SetDebug(Int_t lvl) { fgDebug = lvl; }
68 THnSparse* GetSparse() const { return fSparse; }
69
70protected:
71 void Fill(EHist hist);
72 Double_t Check(EHist hist);
73 void SetupHist(EHist hist);
74 void NextValues();
75 void SetupValues();
76
77private:
78 Double_t* fValue;
79 Int_t fDim;
80 Int_t fBins;
81 Long_t fNum;
82 Double_t fTime[2][2];
83 THnSparse* fSparse;
84 TH1* fHist;
85 THn* fHn;
86 static Int_t fgDebug;
87};
88
89Int_t TTimeHists::fgDebug = 0;
90
91TTimeHists::~TTimeHists()
92{
93 delete [] fValue;
94 delete fSparse;
95 delete fHist;
96 delete fHn;
97}
98
99bool TTimeHists::Run()
100{
101 // run all tests with current settings, and check for identity of content.
102
103 Double_t check[2];
104 Long64_t rep[2];
105 for (int h = 0; h < 2; ++h) {
106 rep[h] = 0;
107 SetupValues();
108 try {
109 TStopwatch w;
110 w.Start();
111 SetupHist((EHist) h);
112 w.Stop();
113 do {
114 w.Start(kFALSE);
115 Fill((EHist) h);
116 check[h] = Check((EHist) h);
117 w.Stop();
118 ++rep[h];
119 } while ((!h && w.RealTime() < 0.1)
120 || (h && rep[0] > 0 && rep[1] < rep[0]));
121
122 fTime[h][0] = (1.* fNum * rep[h]) / w.RealTime() / 1E6;
123 fTime[h][1] = (1.* fNum * rep[h]) / w.CpuTime() / 1E6;
124
125 if (h == 1 && (fTime[h][0] > 1E20 || fTime[h][1] > 1E20)) {
126 do {
127 // some more cycles:
128 w.Start(kFALSE);
129 Fill((EHist) h);
130 Check((EHist) h);
131 w.Stop();
132 ++rep[h];
133 } while (w.RealTime() < 0.1);
134
135 fTime[h][0] = (1.* fNum * rep[h]) / w.RealTime() / 1E6;
136 fTime[h][1] = (1.* fNum * rep[h]) / w.CpuTime() / 1E6;
137 }
138
139 if (fTime[h][0] > 1E20) fTime[h][0] = 1E20;
140 if (fTime[h][1] > 1E20) fTime[h][1] = 1E20;
141 }
142 catch (std::exception&) {
143 fTime[h][0] = fTime[h][1] = -1.;
144 check[h] = -1.; // can never be < 1 without exception
145 rep[h] = -1;
146 }
147 }
148 if (check[0] != check[1])
149 if (check[0] != -1.)
150 printf("ERROR: mismatch of histogram (%g) and sparse histogram (%g) for dim=%d, bins=%d!\n",
151 check[0], check[1], fDim, fBins);
152 // else
153 // printf("ERROR: cannot allocate histogram for dim=%d, bins=%d - out of memory!\n",
154 // fDim, fBins);
155 return (check[0] == check[1]);
156}
157
158void TTimeHists::NextValues()
159{
160 for (Int_t d = 0; d < fDim; ++d)
161 fValue[d] = gRandom->Gaus() / 4.;
162}
163
164void TTimeHists::SetupValues()
165{
166 // define fValue
167 if (!fValue) fValue = new Double_t[fDim];
168 gRandom->SetSeed(42);
169}
170
171void TTimeHists::Fill(EHist hist)
172{
173 for (Long_t n = 0; n < fNum; ++n) {
174 NextValues();
175 if (fgDebug > 1) {
176 printf("%ld: fill %s", n, hist == kHist? (fDim < 4 ? "hist" : "arr") : "sparse");
177 for (Int_t d = 0; d < fDim; ++d)
178 printf("[%g]", fValue[d]);
179 printf("\n");
180 }
181 if (hist == kHist) {
182 switch (fDim) {
183 case 1: fHist->Fill(fValue[0]); break;
184 case 2: ((TH2F*)fHist)->Fill(fValue[0], fValue[1]); break;
185 case 3: ((TH3F*)fHist)->Fill(fValue[0], fValue[1], fValue[2]); break;
186 default: fHn->Fill(fValue); break;
187 }
188 } else {
189 fSparse->Fill(fValue);
190 }
191 }
192}
193
194void TTimeHists::SetupHist(EHist hist)
195{
196 if (hist == kHist) {
197 switch (fDim) {
198 case 1: fHist = new TH1F("h1", "h1", fBins, -1., 1.); break;
199 case 2: fHist = new TH2F("h2", "h2", fBins, -1., 1., fBins, -1., 1.); break;
200 case 3: fHist = new TH3F("h3", "h3", fBins, -1., 1., fBins, -1., 1., fBins, -1., 1.); break;
201 default:
202 {
203 MemInfo_t meminfo;
204 gSystem->GetMemInfo(&meminfo);
205 Int_t size = 1;
206 for (Int_t d = 0; d < fDim; ++d) {
207 if ((Int_t)(size * sizeof(Float_t)) > INT_MAX / (fBins + 2)
208 || (meminfo.fMemFree > 0
209 && meminfo.fMemFree / 2 < (Int_t) (size * sizeof(Float_t)/1000/1000)))
210 throw std::bad_alloc();
211 size *= (fBins + 2);
212 }
213 if (meminfo.fMemFree > 0
214 && meminfo.fMemFree / 2 < (Int_t) (size * sizeof(Float_t)/1000/1000))
215 throw std::bad_alloc();
216 Int_t* bins = new Int_t[fDim];
217 Double_t *xmin = new Double_t[fDim];
218 Double_t *xmax = new Double_t[fDim];
219 for (Int_t d = 0; d < fDim; ++d) {
220 bins[d] = fBins;
221 xmin[d] = -1.;
222 xmax[d] = 1.;
223 }
224 fHn = new THnF("hn", "hn", fDim, bins, xmin, xmax);
225 }
226 }
227 } else {
228 Int_t* bins = new Int_t[fDim];
229 Double_t *xmin = new Double_t[fDim];
230 Double_t *xmax = new Double_t[fDim];
231 for (Int_t d = 0; d < fDim; ++d) {
232 bins[d] = fBins;
233 xmin[d] = -1.;
234 xmax[d] = 1.;
235 }
236 fSparse = new THnSparseF("hs", "hs", fDim, bins, xmin, xmax);
237 }
238}
239
240Double_t TTimeHists::Check(EHist hist)
241{
242 // Check bin content of all bins
243 Double_t check = 0.;
244 Int_t* x = new Int_t[fDim];
245 memset(x, 0, sizeof(Int_t) * fDim);
246
247 if (hist == kHist) {
248 Long_t idx = 0;
249 Long_t size = 1;
250 for (Int_t d = 0; d < fDim; ++d)
251 size *= (fBins + 2);
252 while (x[0] <= fBins + 1) {
253 Double_t v = -1.;
254 if (fDim < 4) {
255 Long_t histidx = x[0];
256 if (fDim == 2) histidx = fHist->GetBin(x[0], x[1]);
257 else if (fDim == 3) histidx = fHist->GetBin(x[0], x[1], x[2]);
258 v = fHist->GetBinContent(histidx);
259 }
260 else v = fHn->GetBinContent(x);
261 Double_t checkx = 0.;
262 if (v)
263 for (Int_t d = 0; d < fDim; ++d)
264 checkx += x[d];
265 check += checkx * v;
266
267 if (fgDebug > 2 || (fgDebug > 1 && v)) {
268 printf("%s%d", fDim < 4 ? "hist" : "arr", fDim);
269 for (Int_t d = 0; d < fDim; ++d)
270 printf("[%d]", x[d]);
271 printf(" = %g\n", v);
272 }
273
274 ++x[fDim - 1];
275 // Adjust the bin idx
276 // no wrapping for dim 0 - it's what we break on!
277 for (Int_t d = fDim - 1; d > 0; --d) {
278 if (x[d] > fBins + 1) {
279 x[d] = 0;
280 ++x[d - 1];
281 }
282 }
283 ++idx;
284 } // while next bin
285 } else {
286 for (Long64_t i = 0; i < fSparse->GetNbins(); ++i) {
287 Double_t v = fSparse->GetBinContent(i, x);
288 Double_t checkx = 0.;
289 for (Int_t d = 0; d < fDim; ++d)
290 checkx += x[d];
291 check += checkx * v;
292
293 if (fgDebug > 1) {
294 printf("sparse%d", fDim);
295 for (Int_t d = 0; d < fDim; ++d)
296 printf("[%d]", x[d]);
297 printf(" = %g\n", v);
298 }
299 }
300 }
301 check /= fNum;
302 if (fgDebug > 0)
303 printf("check %s%d = %g\n", hist == kHist ? (fDim < 4 ? "hist" : "arr") : "sparse", fDim, check);
304 return check;
305}
306
307
308void sparsehist() {
309// Exclude this macro also for Cling as this script requires exception support
310// which is not supported in Cling as of v6.00/00.
311#if defined (__CLING__)
312 printf("Please run this script in compiled mode by running \".x sparsehist.C+\"\n");
313 return;
314#endif
315
316 TH2F* htime[TTimeHists::kNumHist][TTimeHists::kNumTime];
317 for (int h = 0; h < TTimeHists::kNumHist; ++h)
318 for (int t = 0; t < TTimeHists::kNumTime; ++t) {
319 TString name("htime_");
320 if (h == 0) name += "arr";
321 else name += "sp";
322 if (t == 0) name += "_r";
323
324 TString title;
325 title.Form("Throughput (fill,get) %s (%s, 1M entries/sec);dim;bins;1M entries/sec", h == 0 ? "TH1/2/3/nF" : "THnSparseF", t == 0 ? "real" : "CPU");
326 htime[h][t] = new TH2F(name, title, 6, 0.5, 6.5, 10, 5, 105);
327 }
328
329 TH2F* hsparse_mem = new TH2F("hsparse_mem", "Fractional memory usage;dim;bins;fraction of memory used", 6, 0.5, 6.5, 10, 5, 105);
330 TH2F* hsparse_bins = new TH2F("hsparse_bins", "Fractional number of used bins;dim;bins;fraction of filled bins", 6, 0.5, 6.5, 10, 5, 105);
331
332 // TTimeHists::SetDebug(2);
333 Double_t max = -1.;
334 for (Int_t dim = 1; dim < 7; ++dim) {
335 printf("Processing dimension %d", dim);
336 for (Int_t bins = 10; bins <= 100; bins += 10) {
337 TTimeHists timer(dim, bins, /*num*/ 1000);
338 timer.Run();
339 for (int h = 0; h < TTimeHists::kNumHist; ++h) {
340 for (int t = 0; t < TTimeHists::kNumTime; ++t) {
341 Double_t time = timer.GetTime((TTimeHists::EHist)h, (TTimeHists::ETime)t);
342 if (time >= 0.)
343 htime[h][t]->Fill(dim, bins, time);
344 }
345 }
346 hsparse_mem->Fill(dim, bins, timer.GetSparse()->GetSparseFractionMem());
347 hsparse_bins->Fill(dim, bins, timer.GetSparse()->GetSparseFractionBins());
348
349 if (max < timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal))
350 max = timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal);
351 printf(".");
352 fflush(stdout);
353 }
354 printf(" done\n");
355 }
356
357 Double_t markersize = 2.5;
358 hsparse_mem->SetMarkerSize(markersize);
359 hsparse_bins->SetMarkerSize(markersize);
360
361 TH2F* htime_ratio[TTimeHists::kNumTime];
362 for (int t = 0; t < TTimeHists::kNumTime; ++t) {
363 const char* name = t ? "htime_ratio" : "htime_ratio_r";
364 htime_ratio[t] = (TH2F*) htime[TTimeHists::kSparse][t]->Clone(name);
365 TString title;
366 title.Form("Relative speed improvement (%s, 1M entries/sec): sparse/hist;dim;bins;#Delta 1M entries/sec", t == 0 ? "real" : "CPU");
367 htime_ratio[t]->SetTitle(title);
368 htime_ratio[t]->Divide(htime[TTimeHists::kHist][t]);
369 htime_ratio[t]->SetMinimum(0.1);
370 htime_ratio[t]->SetMarkerSize(markersize);
371 }
372
373 TFile* f = new TFile("sparsehist.root","RECREATE");
374
375 TCanvas* canv= new TCanvas("c","c");
376 canv->Divide(3,3);
377
378 gStyle->SetPalette(8,0);
380 gStyle->SetOptStat(0);
381 const char* opt = "TEXT COL";
382
383 for (int t = 0; t < TTimeHists::kNumTime; ++t) {
384 for (int h = 0; h < TTimeHists::kNumHist; ++h) {
385 htime[h][t]->SetMaximum(max);
386 htime[h][t]->SetMarkerSize(markersize);
387 canv->cd(1 + h + 3 * t);
388 htime[h][t]->Draw(opt);
389 htime[h][t]->Write();
390 }
391 canv->cd(3 + t * 3);
392 htime_ratio[t]->Draw(opt); gPad->SetLogz();
393 htime_ratio[t]->Write();
394 }
395
396 canv->cd(7); hsparse_mem->Draw(opt);
397 canv->cd(8); hsparse_bins->Draw(opt);
398 hsparse_mem->Write();
399 hsparse_bins->Write();
400
401 canv->Write();
402
403 delete f;
404}
#define d(i)
Definition: RSha256.hxx:102
#define f(i)
Definition: RSha256.hxx:104
#define h(i)
Definition: RSha256.hxx:106
int Int_t
Definition: RtypesCore.h:43
const Bool_t kFALSE
Definition: RtypesCore.h:90
long Long_t
Definition: RtypesCore.h:52
double Double_t
Definition: RtypesCore.h:57
long long Long64_t
Definition: RtypesCore.h:71
float Float_t
Definition: RtypesCore.h:55
char name[80]
Definition: TGX11.cxx:109
float xmin
Definition: THbookFile.cxx:93
float xmax
Definition: THbookFile.cxx:93
THnSparseT< TArrayF > THnSparseF
Definition: THnSparse.h:220
THnT< Float_t > THnF
Definition: THn.h:240
R__EXTERN TRandom * gRandom
Definition: TRandom.h:62
R__EXTERN TStyle * gStyle
Definition: TStyle.h:410
R__EXTERN TSystem * gSystem
Definition: TSystem.h:556
#define gPad
Definition: TVirtualPad.h:287
virtual void SetMarkerSize(Size_t msize=1)
Set the marker size.
Definition: TAttMarker.h:41
The Canvas class.
Definition: TCanvas.h:27
TVirtualPad * cd(Int_t subpadnumber=0)
Set current canvas & pad.
Definition: TCanvas.cxx:701
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:53
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
The TH1 histogram class.
Definition: TH1.h:56
virtual void SetTitle(const char *title)
See GetStatOverflows for more information.
Definition: TH1.cxx:6345
virtual void SetMaximum(Double_t maximum=-1111)
Definition: TH1.h:394
virtual void SetMinimum(Double_t minimum=-1111)
Definition: TH1.h:395
virtual void Draw(Option_t *option="")
Draw this histogram with options.
Definition: TH1.cxx:2998
virtual Bool_t Divide(TF1 *f1, Double_t c1=1)
Performs the operation: this = this/(c1*f1) if errors are defined (see TH1::Sumw2),...
Definition: TH1.cxx:2753
2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:251
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:294
3-D histogram with a float per channel (see TH1 documentation)}
Definition: TH3.h:267
Efficient multidimensional histogram.
Definition: THnSparse.h:36
Multidimensional histogram.
Definition: THn.h:30
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:796
virtual void Divide(Int_t nx=1, Int_t ny=1, Float_t xmargin=0.01, Float_t ymargin=0.01, Int_t color=0)
Automatic pad generation by division.
Definition: TPad.cxx:1165
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
Definition: TRandom.cxx:263
virtual void SetSeed(ULong_t seed=0)
Set the random generator seed.
Definition: TRandom.cxx:597
Stopwatch class.
Definition: TStopwatch.h:28
Double_t RealTime()
Stop the stopwatch (if it is running) and return the realtime (in seconds) passed between the start a...
Definition: TStopwatch.cxx:110
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
Definition: TStopwatch.cxx:58
Double_t CpuTime()
Stop the stopwatch (if it is running) and return the cputime (in seconds) passed between the start an...
Definition: TStopwatch.cxx:125
void Stop()
Stop the stopwatch.
Definition: TStopwatch.cxx:77
Basic string class.
Definition: TString.h:131
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition: TString.cxx:2289
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1590
void SetPaintTextFormat(const char *format="g")
Definition: TStyle.h:367
void SetPalette(Int_t ncolors=kBird, Int_t *colors=0, Float_t alpha=1.)
See TColor::SetPalette.
Definition: TStyle.cxx:1783
virtual int GetMemInfo(MemInfo_t *info) const
Returns ram and swap memory usage info into the MemInfo_t structure.
Definition: TSystem.cxx:2480
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
Int_t fMemFree
Definition: TSystem.h:181