Logo ROOT  
Reference Guide
sparsehist.C File Reference

Detailed Description

Evaluate the performance of THnSparse vs TH1/2/3/nF for different numbers of dimensions and bins per dimension.

The script calculates the bandwidth for filling and retrieving bin contents (in million entries per second) for these two histogramming techniques, where "seconds" is CPU and real time.

The first line of the plots contains the bandwidth based on the CPU time (THnSpase, TH1/2/3/nF*, ratio), the second line shows the plots for real time, and the third line shows the fraction of filled bins and memory used by THnSparse vs. TH1/2/3/nF.

The timing depends on the distribution and the amount of entries in the histograms; here, a Gaussian distribution (center is contained in the histograms) is used to fill each histogram with 1000 entries. The filling and reading is repeated until enough statistics have been collected.

tutorials/tree/drawsparse.C shows an example for visualizing a THnSparse. It creates a TTree which is then drawn using TParallelCoord.

This macro should be run in compiled mode due to the many nested loops that force CINT to disable its optimization. If run interpreted one would not benchmark THnSparse but CINT.

Run as:

root[0] .L $ROOTSYS/tutorials/hist/sparsehist.C+
root[1] sparsehist()
#include "TH1.h"
#include "TH2.h"
#include "TH3.h"
#include "THn.h"
#include "THnSparse.h"
#include "TStopwatch.h"
#include "TRandom.h"
#include "TCanvas.h"
#include "TFile.h"
#include "TStyle.h"
#include "TSystem.h"
#ifndef INT_MAX
#define INT_MAX std::numeric_limits<int>::max()
#endif
class TTimeHists {
public:
enum EHist { kHist, kSparse, kNumHist };
enum ETime { kReal, kCPU, kNumTime };
TTimeHists(Int_t dim, Int_t bins, Long_t num):
fValue(0), fDim(dim), fBins(bins), fNum(num),
fSparse(0), fHist(0), fHn(0) {}
~TTimeHists();
bool Run();
Double_t GetTime(EHist hist, ETime time) const {
if (time == kReal) return fTime[hist][0];
return fTime[hist][1]; }
static void SetDebug(Int_t lvl) { fgDebug = lvl; }
THnSparse* GetSparse() const { return fSparse; }
protected:
void Fill(EHist hist);
Double_t Check(EHist hist);
void SetupHist(EHist hist);
void NextValues();
void SetupValues();
private:
Double_t* fValue;
Int_t fDim;
Int_t fBins;
Long_t fNum;
Double_t fTime[2][2];
THnSparse* fSparse;
TH1* fHist;
THn* fHn;
static Int_t fgDebug;
};
Int_t TTimeHists::fgDebug = 0;
TTimeHists::~TTimeHists()
{
delete [] fValue;
delete fSparse;
delete fHist;
delete fHn;
}
bool TTimeHists::Run()
{
// run all tests with current settings, and check for identity of content.
Double_t check[2];
Long64_t rep[2];
for (int h = 0; h < 2; ++h) {
rep[h] = 0;
SetupValues();
try {
w.Start();
SetupHist((EHist) h);
w.Stop();
do {
w.Start(kFALSE);
Fill((EHist) h);
check[h] = Check((EHist) h);
w.Stop();
++rep[h];
} while ((!h && w.RealTime() < 0.1)
|| (h && rep[0] > 0 && rep[1] < rep[0]));
fTime[h][0] = (1.* fNum * rep[h]) / w.RealTime() / 1E6;
fTime[h][1] = (1.* fNum * rep[h]) / w.CpuTime() / 1E6;
if (h == 1 && (fTime[h][0] > 1E20 || fTime[h][1] > 1E20)) {
do {
// some more cycles:
w.Start(kFALSE);
Fill((EHist) h);
Check((EHist) h);
w.Stop();
++rep[h];
} while (w.RealTime() < 0.1);
fTime[h][0] = (1.* fNum * rep[h]) / w.RealTime() / 1E6;
fTime[h][1] = (1.* fNum * rep[h]) / w.CpuTime() / 1E6;
}
if (fTime[h][0] > 1E20) fTime[h][0] = 1E20;
if (fTime[h][1] > 1E20) fTime[h][1] = 1E20;
}
catch (std::exception&) {
fTime[h][0] = fTime[h][1] = -1.;
check[h] = -1.; // can never be < 1 without exception
rep[h] = -1;
}
}
if (check[0] != check[1])
if (check[0] != -1.)
printf("ERROR: mismatch of histogram (%g) and sparse histogram (%g) for dim=%d, bins=%d!\n",
check[0], check[1], fDim, fBins);
// else
// printf("ERROR: cannot allocate histogram for dim=%d, bins=%d - out of memory!\n",
// fDim, fBins);
return (check[0] == check[1]);
}
void TTimeHists::NextValues()
{
for (Int_t d = 0; d < fDim; ++d)
fValue[d] = gRandom->Gaus() / 4.;
}
void TTimeHists::SetupValues()
{
// define fValue
if (!fValue) fValue = new Double_t[fDim];
}
void TTimeHists::Fill(EHist hist)
{
for (Long_t n = 0; n < fNum; ++n) {
NextValues();
if (fgDebug > 1) {
printf("%ld: fill %s", n, hist == kHist? (fDim < 4 ? "hist" : "arr") : "sparse");
for (Int_t d = 0; d < fDim; ++d)
printf("[%g]", fValue[d]);
printf("\n");
}
if (hist == kHist) {
switch (fDim) {
case 1: fHist->Fill(fValue[0]); break;
case 2: ((TH2F*)fHist)->Fill(fValue[0], fValue[1]); break;
case 3: ((TH3F*)fHist)->Fill(fValue[0], fValue[1], fValue[2]); break;
default: fHn->Fill(fValue); break;
}
} else {
fSparse->Fill(fValue);
}
}
}
void TTimeHists::SetupHist(EHist hist)
{
if (hist == kHist) {
switch (fDim) {
case 1: fHist = new TH1F("h1", "h1", fBins, -1., 1.); break;
case 2: fHist = new TH2F("h2", "h2", fBins, -1., 1., fBins, -1., 1.); break;
case 3: fHist = new TH3F("h3", "h3", fBins, -1., 1., fBins, -1., 1., fBins, -1., 1.); break;
default:
{
MemInfo_t meminfo;
gSystem->GetMemInfo(&meminfo);
Int_t size = 1;
for (Int_t d = 0; d < fDim; ++d) {
if ((Int_t)(size * sizeof(Float_t)) > INT_MAX / (fBins + 2)
|| (meminfo.fMemFree > 0
&& meminfo.fMemFree / 2 < (Int_t) (size * sizeof(Float_t)/1000/1000)))
throw std::bad_alloc();
size *= (fBins + 2);
}
if (meminfo.fMemFree > 0
&& meminfo.fMemFree / 2 < (Int_t) (size * sizeof(Float_t)/1000/1000))
throw std::bad_alloc();
Int_t* bins = new Int_t[fDim];
Double_t *xmin = new Double_t[fDim];
Double_t *xmax = new Double_t[fDim];
for (Int_t d = 0; d < fDim; ++d) {
bins[d] = fBins;
xmin[d] = -1.;
xmax[d] = 1.;
}
fHn = new THnF("hn", "hn", fDim, bins, xmin, xmax);
}
}
} else {
Int_t* bins = new Int_t[fDim];
Double_t *xmin = new Double_t[fDim];
Double_t *xmax = new Double_t[fDim];
for (Int_t d = 0; d < fDim; ++d) {
bins[d] = fBins;
xmin[d] = -1.;
xmax[d] = 1.;
}
fSparse = new THnSparseF("hs", "hs", fDim, bins, xmin, xmax);
}
}
Double_t TTimeHists::Check(EHist hist)
{
// Check bin content of all bins
Double_t check = 0.;
Int_t* x = new Int_t[fDim];
memset(x, 0, sizeof(Int_t) * fDim);
if (hist == kHist) {
Long_t idx = 0;
Long_t size = 1;
for (Int_t d = 0; d < fDim; ++d)
size *= (fBins + 2);
while (x[0] <= fBins + 1) {
Double_t v = -1.;
if (fDim < 4) {
Long_t histidx = x[0];
if (fDim == 2) histidx = fHist->GetBin(x[0], x[1]);
else if (fDim == 3) histidx = fHist->GetBin(x[0], x[1], x[2]);
v = fHist->GetBinContent(histidx);
}
else v = fHn->GetBinContent(x);
Double_t checkx = 0.;
if (v)
for (Int_t d = 0; d < fDim; ++d)
checkx += x[d];
check += checkx * v;
if (fgDebug > 2 || (fgDebug > 1 && v)) {
printf("%s%d", fDim < 4 ? "hist" : "arr", fDim);
for (Int_t d = 0; d < fDim; ++d)
printf("[%d]", x[d]);
printf(" = %g\n", v);
}
++x[fDim - 1];
// Adjust the bin idx
// no wrapping for dim 0 - it's what we break on!
for (Int_t d = fDim - 1; d > 0; --d) {
if (x[d] > fBins + 1) {
x[d] = 0;
++x[d - 1];
}
}
++idx;
} // while next bin
} else {
for (Long64_t i = 0; i < fSparse->GetNbins(); ++i) {
Double_t v = fSparse->GetBinContent(i, x);
Double_t checkx = 0.;
for (Int_t d = 0; d < fDim; ++d)
checkx += x[d];
check += checkx * v;
if (fgDebug > 1) {
printf("sparse%d", fDim);
for (Int_t d = 0; d < fDim; ++d)
printf("[%d]", x[d]);
printf(" = %g\n", v);
}
}
}
check /= fNum;
if (fgDebug > 0)
printf("check %s%d = %g\n", hist == kHist ? (fDim < 4 ? "hist" : "arr") : "sparse", fDim, check);
return check;
}
void sparsehist() {
// Exclude this macro also for Cling as this script requires exception support
// which is not supported in Cling as of v6.00/00.
#if defined (__CLING__)
printf("Please run this script in compiled mode by running \".x sparsehist.C+\"\n");
return;
#endif
TH2F* htime[TTimeHists::kNumHist][TTimeHists::kNumTime];
for (int h = 0; h < TTimeHists::kNumHist; ++h)
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
TString name("htime_");
if (h == 0) name += "arr";
else name += "sp";
if (t == 0) name += "_r";
TString title;
title.Form("Throughput (fill,get) %s (%s, 1M entries/sec);dim;bins;1M entries/sec", h == 0 ? "TH1/2/3/nF" : "THnSparseF", t == 0 ? "real" : "CPU");
htime[h][t] = new TH2F(name, title, 6, 0.5, 6.5, 10, 5, 105);
}
TH2F* hsparse_mem = new TH2F("hsparse_mem", "Fractional memory usage;dim;bins;fraction of memory used", 6, 0.5, 6.5, 10, 5, 105);
TH2F* hsparse_bins = new TH2F("hsparse_bins", "Fractional number of used bins;dim;bins;fraction of filled bins", 6, 0.5, 6.5, 10, 5, 105);
// TTimeHists::SetDebug(2);
Double_t max = -1.;
for (Int_t dim = 1; dim < 7; ++dim) {
printf("Processing dimension %d", dim);
for (Int_t bins = 10; bins <= 100; bins += 10) {
TTimeHists timer(dim, bins, /*num*/ 1000);
timer.Run();
for (int h = 0; h < TTimeHists::kNumHist; ++h) {
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
Double_t time = timer.GetTime((TTimeHists::EHist)h, (TTimeHists::ETime)t);
if (time >= 0.)
htime[h][t]->Fill(dim, bins, time);
}
}
hsparse_mem->Fill(dim, bins, timer.GetSparse()->GetSparseFractionMem());
hsparse_bins->Fill(dim, bins, timer.GetSparse()->GetSparseFractionBins());
if (max < timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal))
max = timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal);
printf(".");
fflush(stdout);
}
printf(" done\n");
}
Double_t markersize = 2.5;
hsparse_mem->SetMarkerSize(markersize);
hsparse_bins->SetMarkerSize(markersize);
TH2F* htime_ratio[TTimeHists::kNumTime];
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
const char* name = t ? "htime_ratio" : "htime_ratio_r";
htime_ratio[t] = (TH2F*) htime[TTimeHists::kSparse][t]->Clone(name);
TString title;
title.Form("Relative speed improvement (%s, 1M entries/sec): sparse/hist;dim;bins;#Delta 1M entries/sec", t == 0 ? "real" : "CPU");
htime_ratio[t]->SetTitle(title);
htime_ratio[t]->Divide(htime[TTimeHists::kHist][t]);
htime_ratio[t]->SetMinimum(0.1);
htime_ratio[t]->SetMarkerSize(markersize);
}
TFile* f = new TFile("sparsehist.root","RECREATE");
TCanvas* canv= new TCanvas("c","c");
canv->Divide(3,3);
const char* opt = "TEXT COL";
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
for (int h = 0; h < TTimeHists::kNumHist; ++h) {
htime[h][t]->SetMaximum(max);
htime[h][t]->SetMarkerSize(markersize);
canv->cd(1 + h + 3 * t);
htime[h][t]->Draw(opt);
htime[h][t]->Write();
}
canv->cd(3 + t * 3);
htime_ratio[t]->Draw(opt); gPad->SetLogz();
htime_ratio[t]->Write();
}
canv->cd(7); hsparse_mem->Draw(opt);
canv->cd(8); hsparse_bins->Draw(opt);
hsparse_mem->Write();
hsparse_bins->Write();
canv->Write();
delete f;
}
#define d(i)
Definition: RSha256.hxx:102
#define f(i)
Definition: RSha256.hxx:104
#define h(i)
Definition: RSha256.hxx:106
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
int Int_t
Definition: RtypesCore.h:45
const Bool_t kFALSE
Definition: RtypesCore.h:101
long Long_t
Definition: RtypesCore.h:54
float Float_t
Definition: RtypesCore.h:57
double Double_t
Definition: RtypesCore.h:59
long long Long64_t
Definition: RtypesCore.h:80
char name[80]
Definition: TGX11.cxx:110
float xmin
Definition: THbookFile.cxx:95
float xmax
Definition: THbookFile.cxx:95
THnSparseT< TArrayF > THnSparseF
Definition: THnSparse.h:220
THnT< Float_t > THnF
Definition: THn.h:243
R__EXTERN TRandom * gRandom
Definition: TRandom.h:62
R__EXTERN TStyle * gStyle
Definition: TStyle.h:414
R__EXTERN TSystem * gSystem
Definition: TSystem.h:560
#define gPad
Definition: TVirtualPad.h:288
virtual void SetMarkerSize(Size_t msize=1)
Set the marker size.
Definition: TAttMarker.h:45
The Canvas class.
Definition: TCanvas.h:23
TVirtualPad * cd(Int_t subpadnumber=0) override
Set current canvas & pad.
Definition: TCanvas.cxx:711
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:577
TH1 is the base class of all histogram classes in ROOT.
Definition: TH1.h:58
void SetTitle(const char *title) override
Change (i.e.
Definition: TH1.cxx:6700
virtual void SetMaximum(Double_t maximum=-1111)
Definition: TH1.h:400
void Draw(Option_t *option="") override
Draw this histogram with options.
Definition: TH1.cxx:3060
virtual void SetMinimum(Double_t minimum=-1111)
Definition: TH1.h:401
virtual Bool_t Divide(TF1 *f1, Double_t c1=1)
Performs the operation: this = this/(c1*f1) if errors are defined (see TH1::Sumw2),...
Definition: TH1.cxx:2815
2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:257
Int_t Fill(Double_t) override
Invalid Fill method.
Definition: TH2.cxx:347
3-D histogram with a float per channel (see TH1 documentation)}
Definition: TH3.h:268
Efficient multidimensional histogram.
Definition: THnSparse.h:36
Multidimensional histogram.
Definition: THn.h:30
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:875
void Divide(Int_t nx=1, Int_t ny=1, Float_t xmargin=0.01, Float_t ymargin=0.01, Int_t color=0) override
Automatic pad generation by division.
Definition: TPad.cxx:1153
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
Definition: TRandom.cxx:274
virtual void SetSeed(ULong_t seed=0)
Set the random generator seed.
Definition: TRandom.cxx:608
Stopwatch class.
Definition: TStopwatch.h:28
Basic string class.
Definition: TString.h:136
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition: TString.cxx:2323
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1589
void SetPaintTextFormat(const char *format="g")
Definition: TStyle.h:370
void SetPalette(Int_t ncolors=kBird, Int_t *colors=nullptr, Float_t alpha=1.)
See TColor::SetPalette.
Definition: TStyle.cxx:1781
virtual int GetMemInfo(MemInfo_t *info) const
Returns ram and swap memory usage info into the MemInfo_t structure.
Definition: TSystem.cxx:2488
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
Int_t fMemFree
Definition: TSystem.h:182
Author
Axel.Naumann

Definition in file sparsehist.C.