Logo ROOT  
Reference Guide
sparsehist.C File Reference

Detailed Description

Evaluate the performance of THnSparse vs TH1/2/3/nF for different numbers of dimensions and bins per dimension.

The script calculates the bandwidth for filling and retrieving bin contents (in million entries per second) for these two histogramming techniques, where "seconds" is CPU and real time.

The first line of the plots contains the bandwidth based on the CPU time (THnSpase, TH1/2/3/nF*, ratio), the second line shows the plots for real time, and the third line shows the fraction of filled bins and memory used by THnSparse vs. TH1/2/3/nF.

The timing depends on the distribution and the amount of entries in the histograms; here, a Gaussian distribution (center is contained in the histograms) is used to fill each histogram with 1000 entries. The filling and reading is repeated until enough statistics have been collected.

tutorials/tree/drawsparse.C shows an example for visualizing a THnSparse. It creates a TTree which is then drawn using TParallelCoord.

This macro should be run in compiled mode due to the many nested loops that force CINT to disable its optimization. If run interpreted one would not benchmark THnSparse but CINT.

Run as:

root[0] .L $ROOTSYS/tutorials/hist/sparsehist.C+
root[1] sparsehist()
#include "TH1.h"
#include "TH2.h"
#include "TH3.h"
#include "THn.h"
#include "THnSparse.h"
#include "TStopwatch.h"
#include "TRandom.h"
#include "TCanvas.h"
#include "TFile.h"
#include "TStyle.h"
#include "TSystem.h"
#ifndef INT_MAX
#define INT_MAX std::numeric_limits<int>::max()
#endif
class TTimeHists {
public:
enum EHist { kHist, kSparse, kNumHist };
enum ETime { kReal, kCPU, kNumTime };
TTimeHists(Int_t dim, Int_t bins, Long_t num):
fValue(0), fDim(dim), fBins(bins), fNum(num),
fSparse(0), fHist(0), fHn(0) {}
~TTimeHists();
bool Run();
Double_t GetTime(EHist hist, ETime time) const {
if (time == kReal) return fTime[hist][0];
return fTime[hist][1]; }
static void SetDebug(Int_t lvl) { fgDebug = lvl; }
THnSparse* GetSparse() const { return fSparse; }
protected:
void Fill(EHist hist);
Double_t Check(EHist hist);
void SetupHist(EHist hist);
void NextValues();
void SetupValues();
private:
Double_t* fValue;
Int_t fDim;
Int_t fBins;
Long_t fNum;
Double_t fTime[2][2];
THnSparse* fSparse;
TH1* fHist;
THn* fHn;
static Int_t fgDebug;
};
Int_t TTimeHists::fgDebug = 0;
TTimeHists::~TTimeHists()
{
delete [] fValue;
delete fSparse;
delete fHist;
delete fHn;
}
bool TTimeHists::Run()
{
// run all tests with current settings, and check for identity of content.
Double_t check[2];
Long64_t rep[2];
for (int h = 0; h < 2; ++h) {
rep[h] = 0;
SetupValues();
try {
w.Start();
SetupHist((EHist) h);
w.Stop();
do {
Fill((EHist) h);
check[h] = Check((EHist) h);
w.Stop();
++rep[h];
} while ((!h && w.RealTime() < 0.1)
|| (h && rep[0] > 0 && rep[1] < rep[0]));
fTime[h][0] = (1.* fNum * rep[h]) / w.RealTime() / 1E6;
fTime[h][1] = (1.* fNum * rep[h]) / w.CpuTime() / 1E6;
if (h == 1 && (fTime[h][0] > 1E20 || fTime[h][1] > 1E20)) {
do {
// some more cycles:
Fill((EHist) h);
Check((EHist) h);
w.Stop();
++rep[h];
} while (w.RealTime() < 0.1);
fTime[h][0] = (1.* fNum * rep[h]) / w.RealTime() / 1E6;
fTime[h][1] = (1.* fNum * rep[h]) / w.CpuTime() / 1E6;
}
if (fTime[h][0] > 1E20) fTime[h][0] = 1E20;
if (fTime[h][1] > 1E20) fTime[h][1] = 1E20;
}
catch (std::exception&) {
fTime[h][0] = fTime[h][1] = -1.;
check[h] = -1.; // can never be < 1 without exception
rep[h] = -1;
}
}
if (check[0] != check[1])
if (check[0] != -1.)
printf("ERROR: mismatch of histogram (%g) and sparse histogram (%g) for dim=%d, bins=%d!\n",
check[0], check[1], fDim, fBins);
// else
// printf("ERROR: cannot allocate histogram for dim=%d, bins=%d - out of memory!\n",
// fDim, fBins);
return (check[0] == check[1]);
}
void TTimeHists::NextValues()
{
for (Int_t d = 0; d < fDim; ++d)
fValue[d] = gRandom->Gaus() / 4.;
}
void TTimeHists::SetupValues()
{
// define fValue
if (!fValue) fValue = new Double_t[fDim];
}
void TTimeHists::Fill(EHist hist)
{
for (Long_t n = 0; n < fNum; ++n) {
NextValues();
if (fgDebug > 1) {
printf("%ld: fill %s", n, hist == kHist? (fDim < 4 ? "hist" : "arr") : "sparse");
for (Int_t d = 0; d < fDim; ++d)
printf("[%g]", fValue[d]);
printf("\n");
}
if (hist == kHist) {
switch (fDim) {
case 1: fHist->Fill(fValue[0]); break;
case 2: ((TH2F*)fHist)->Fill(fValue[0], fValue[1]); break;
case 3: ((TH3F*)fHist)->Fill(fValue[0], fValue[1], fValue[2]); break;
default: fHn->Fill(fValue); break;
}
} else {
fSparse->Fill(fValue);
}
}
}
void TTimeHists::SetupHist(EHist hist)
{
if (hist == kHist) {
switch (fDim) {
case 1: fHist = new TH1F("h1", "h1", fBins, -1., 1.); break;
case 2: fHist = new TH2F("h2", "h2", fBins, -1., 1., fBins, -1., 1.); break;
case 3: fHist = new TH3F("h3", "h3", fBins, -1., 1., fBins, -1., 1., fBins, -1., 1.); break;
default:
{
MemInfo_t meminfo;
gSystem->GetMemInfo(&meminfo);
Int_t size = 1;
for (Int_t d = 0; d < fDim; ++d) {
if ((Int_t)(size * sizeof(Float_t)) > INT_MAX / (fBins + 2)
|| (meminfo.fMemFree > 0
&& meminfo.fMemFree / 2 < (Int_t) (size * sizeof(Float_t)/1000/1000)))
throw std::bad_alloc();
size *= (fBins + 2);
}
if (meminfo.fMemFree > 0
&& meminfo.fMemFree / 2 < (Int_t) (size * sizeof(Float_t)/1000/1000))
throw std::bad_alloc();
Int_t* bins = new Int_t[fDim];
Double_t *xmin = new Double_t[fDim];
Double_t *xmax = new Double_t[fDim];
for (Int_t d = 0; d < fDim; ++d) {
bins[d] = fBins;
xmin[d] = -1.;
xmax[d] = 1.;
}
fHn = new THnF("hn", "hn", fDim, bins, xmin, xmax);
}
}
} else {
Int_t* bins = new Int_t[fDim];
Double_t *xmin = new Double_t[fDim];
Double_t *xmax = new Double_t[fDim];
for (Int_t d = 0; d < fDim; ++d) {
bins[d] = fBins;
xmin[d] = -1.;
xmax[d] = 1.;
}
fSparse = new THnSparseF("hs", "hs", fDim, bins, xmin, xmax);
}
}
Double_t TTimeHists::Check(EHist hist)
{
// Check bin content of all bins
Double_t check = 0.;
Int_t* x = new Int_t[fDim];
memset(x, 0, sizeof(Int_t) * fDim);
if (hist == kHist) {
Long_t idx = 0;
Long_t size = 1;
for (Int_t d = 0; d < fDim; ++d)
size *= (fBins + 2);
while (x[0] <= fBins + 1) {
Double_t v = -1.;
if (fDim < 4) {
Long_t histidx = x[0];
if (fDim == 2) histidx = fHist->GetBin(x[0], x[1]);
else if (fDim == 3) histidx = fHist->GetBin(x[0], x[1], x[2]);
v = fHist->GetBinContent(histidx);
}
else v = fHn->GetBinContent(x);
Double_t checkx = 0.;
if (v)
for (Int_t d = 0; d < fDim; ++d)
checkx += x[d];
check += checkx * v;
if (fgDebug > 2 || (fgDebug > 1 && v)) {
printf("%s%d", fDim < 4 ? "hist" : "arr", fDim);
for (Int_t d = 0; d < fDim; ++d)
printf("[%d]", x[d]);
printf(" = %g\n", v);
}
++x[fDim - 1];
// Adjust the bin idx
// no wrapping for dim 0 - it's what we break on!
for (Int_t d = fDim - 1; d > 0; --d) {
if (x[d] > fBins + 1) {
x[d] = 0;
++x[d - 1];
}
}
++idx;
} // while next bin
} else {
for (Long64_t i = 0; i < fSparse->GetNbins(); ++i) {
Double_t v = fSparse->GetBinContent(i, x);
Double_t checkx = 0.;
for (Int_t d = 0; d < fDim; ++d)
checkx += x[d];
check += checkx * v;
if (fgDebug > 1) {
printf("sparse%d", fDim);
for (Int_t d = 0; d < fDim; ++d)
printf("[%d]", x[d]);
printf(" = %g\n", v);
}
}
}
check /= fNum;
if (fgDebug > 0)
printf("check %s%d = %g\n", hist == kHist ? (fDim < 4 ? "hist" : "arr") : "sparse", fDim, check);
return check;
}
void sparsehist() {
// Exclude this macro also for Cling as this script requires exception support
// which is not supported in Cling as of v6.00/00.
#if defined (__CLING__)
printf("Please run this script in compiled mode by running \".x sparsehist.C+\"\n");
return;
#endif
TH2F* htime[TTimeHists::kNumHist][TTimeHists::kNumTime];
for (int h = 0; h < TTimeHists::kNumHist; ++h)
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
TString name("htime_");
if (h == 0) name += "arr";
else name += "sp";
if (t == 0) name += "_r";
TString title;
title.Form("Throughput (fill,get) %s (%s, 1M entries/sec);dim;bins;1M entries/sec", h == 0 ? "TH1/2/3/nF" : "THnSparseF", t == 0 ? "real" : "CPU");
htime[h][t] = new TH2F(name, title, 6, 0.5, 6.5, 10, 5, 105);
}
TH2F* hsparse_mem = new TH2F("hsparse_mem", "Fractional memory usage;dim;bins;fraction of memory used", 6, 0.5, 6.5, 10, 5, 105);
TH2F* hsparse_bins = new TH2F("hsparse_bins", "Fractional number of used bins;dim;bins;fraction of filled bins", 6, 0.5, 6.5, 10, 5, 105);
// TTimeHists::SetDebug(2);
Double_t max = -1.;
for (Int_t dim = 1; dim < 7; ++dim) {
printf("Processing dimension %d", dim);
for (Int_t bins = 10; bins <= 100; bins += 10) {
TTimeHists timer(dim, bins, /*num*/ 1000);
timer.Run();
for (int h = 0; h < TTimeHists::kNumHist; ++h) {
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
Double_t time = timer.GetTime((TTimeHists::EHist)h, (TTimeHists::ETime)t);
if (time >= 0.)
htime[h][t]->Fill(dim, bins, time);
}
}
hsparse_mem->Fill(dim, bins, timer.GetSparse()->GetSparseFractionMem());
hsparse_bins->Fill(dim, bins, timer.GetSparse()->GetSparseFractionBins());
if (max < timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal))
max = timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal);
printf(".");
fflush(stdout);
}
printf(" done\n");
}
Double_t markersize = 2.5;
hsparse_mem->SetMarkerSize(markersize);
hsparse_bins->SetMarkerSize(markersize);
TH2F* htime_ratio[TTimeHists::kNumTime];
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
const char* name = t ? "htime_ratio" : "htime_ratio_r";
htime_ratio[t] = (TH2F*) htime[TTimeHists::kSparse][t]->Clone(name);
TString title;
title.Form("Relative speed improvement (%s, 1M entries/sec): sparse/hist;dim;bins;#Delta 1M entries/sec", t == 0 ? "real" : "CPU");
htime_ratio[t]->SetTitle(title);
htime_ratio[t]->Divide(htime[TTimeHists::kHist][t]);
htime_ratio[t]->SetMinimum(0.1);
htime_ratio[t]->SetMarkerSize(markersize);
}
TFile* f = new TFile("sparsehist.root","RECREATE");
TCanvas* canv= new TCanvas("c","c");
canv->Divide(3,3);
const char* opt = "TEXT COL";
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
for (int h = 0; h < TTimeHists::kNumHist; ++h) {
htime[h][t]->SetMaximum(max);
htime[h][t]->SetMarkerSize(markersize);
canv->cd(1 + h + 3 * t);
htime[h][t]->Draw(opt);
htime[h][t]->Write();
}
canv->cd(3 + t * 3);
htime_ratio[t]->Draw(opt); gPad->SetLogz();
htime_ratio[t]->Write();
}
canv->cd(7); hsparse_mem->Draw(opt);
canv->cd(8); hsparse_bins->Draw(opt);
hsparse_mem->Write();
hsparse_bins->Write();
canv->Write();
delete f;
}
Author
Axel.Naumann

Definition in file sparsehist.C.

n
const Int_t n
Definition: legend1.C:16
TRandom::Gaus
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
Definition: TRandom.cxx:263
f
#define f(i)
Definition: RSha256.hxx:122
TStyle::SetPaintTextFormat
void SetPaintTextFormat(const char *format="g")
Definition: TStyle.h:369
TH1::SetMinimum
virtual void SetMinimum(Double_t minimum=-1111)
Definition: TH1.h:396
MemInfo_t::fMemFree
Int_t fMemFree
Definition: TSystem.h:182
TH2F
2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:251
TH1::Divide
virtual Bool_t Divide(TF1 *f1, Double_t c1=1)
Performs the operation: this = this/(c1*f1) if errors are defined (see TH1::Sumw2),...
Definition: TH1.cxx:2752
xmax
float xmax
Definition: THbookFile.cxx:95
TStopwatch.h
Long64_t
long long Long64_t
Definition: RtypesCore.h:73
TTimer::GetTime
TTime GetTime() const
Definition: TTimer.h:82
TRandom.h
Float_t
float Float_t
Definition: RtypesCore.h:57
TStyle.h
Int_t
int Int_t
Definition: RtypesCore.h:45
x
Double_t x[n]
Definition: legend1.C:17
TPad::Divide
virtual void Divide(Int_t nx=1, Int_t ny=1, Float_t xmargin=0.01, Float_t ymargin=0.01, Int_t color=0)
Automatic pad generation by division.
Definition: TPad.cxx:1166
TCanvas.h
THn.h
TString
Definition: TString.h:136
v
@ v
Definition: rootcling_impl.cxx:3635
TFile.h
THnF
THnT< Float_t > THnF
Definition: THn.h:240
THnSparse
Definition: THnSparse.h:36
TH1::SetTitle
virtual void SetTitle(const char *title)
See GetStatOverflows for more information.
Definition: TH1.cxx:6344
TStopwatch::RealTime
Double_t RealTime()
Stop the stopwatch (if it is running) and return the realtime (in seconds) passed between the start a...
Definition: TStopwatch.cxx:110
TString::Form
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition: TString.cxx:2289
TH1::GetBinContent
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4906
gStyle
R__EXTERN TStyle * gStyle
Definition: TStyle.h:412
TSystem.h
xmin
float xmin
Definition: THbookFile.cxx:95
h
#define h(i)
Definition: RSha256.hxx:124
TCanvas::cd
TVirtualPad * cd(Int_t subpadnumber=0)
Set current canvas & pad.
Definition: TCanvas.cxx:704
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:92
MemInfo_t
Definition: TSystem.h:179
Long_t
long Long_t
Definition: RtypesCore.h:54
TH1::GetBin
virtual Int_t GetBin(Int_t binx, Int_t biny=0, Int_t binz=0) const
Return Global bin number corresponding to binx,y,z.
Definition: TH1.cxx:4808
TH1::SetMaximum
virtual void SetMaximum(Double_t maximum=-1111)
Definition: TH1.h:395
TH1::Fill
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3274
gRandom
R__EXTERN TRandom * gRandom
Definition: TRandom.h:62
TRandom::SetSeed
virtual void SetSeed(ULong_t seed=0)
Set the random generator seed.
Definition: TRandom.cxx:597
TStopwatch::Start
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
Definition: TStopwatch.cxx:58
THn
Definition: THn.h:30
TH2.h
TStopwatch::CpuTime
Double_t CpuTime()
Stop the stopwatch (if it is running) and return the cputime (in seconds) passed between the start an...
Definition: TStopwatch.cxx:125
TStyle::SetOptStat
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1592
TFile
Definition: TFile.h:54
TH2::Fill
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:294
TH3.h
TH3F
3-D histogram with a float per channel (see TH1 documentation)}
Definition: TH3.h:267
gSystem
R__EXTERN TSystem * gSystem
Definition: TSystem.h:559
THnSparse.h
Double_t
double Double_t
Definition: RtypesCore.h:59
TStyle::SetPalette
void SetPalette(Int_t ncolors=kBird, Int_t *colors=0, Float_t alpha=1.)
See TColor::SetPalette.
Definition: TStyle.cxx:1785
TCanvas
Definition: TCanvas.h:23
TH1F
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:572
TObject::Write
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:795
TStopwatch
Definition: TStopwatch.h:28
THnSparseF
THnSparseT< TArrayF > THnSparseF
Definition: THnSparse.h:220
TH1
Definition: TH1.h:57
name
char name[80]
Definition: TGX11.cxx:110
d
#define d(i)
Definition: RSha256.hxx:120
gPad
#define gPad
Definition: TVirtualPad.h:287
TStopwatch::Stop
void Stop()
Stop the stopwatch.
Definition: TStopwatch.cxx:77
TH1.h
TSystem::GetMemInfo
virtual int GetMemInfo(MemInfo_t *info) const
Returns ram and swap memory usage info into the MemInfo_t structure.
Definition: TSystem.cxx:2517
TH1::Draw
virtual void Draw(Option_t *option="")
Draw this histogram with options.
Definition: TH1.cxx:2997