Logo ROOT   6.10/09
Reference Guide
kdTreeBinning.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_math
3 /// \notebook
4 /// kdTreeBinning tutorial: bin the data in cells of equal content using a kd-tree
5 ///
6 /// Using TKDTree wrapper class as a data binning structure
7 /// Plot the 2D data using the TH2Poly class
8 ///
9 /// \macro_image
10 /// \macro_output
11 /// \macro_code
12 ///
13 /// \author Bartolomeu Rabacal
14 
15 #include <math.h>
16 
17 #include "TKDTreeBinning.h"
18 #include "TH2D.h"
19 #include "TH2Poly.h"
20 #include "TStyle.h"
21 #include "TGraph2D.h"
22 #include "TRandom3.h"
23 #include "TCanvas.h"
24 #include <iostream>
25 
26 void kdTreeBinning() {
27 
28  // -----------------------------------------------------------------------------------------------
29  // Create random sample with regular binning plotting
30 
31  const UInt_t DATASZ = 10000;
32  const UInt_t DATADIM = 2;
33  const UInt_t NBINS = 50;
34 
35  Double_t smp[DATASZ * DATADIM];
36 
37  double mu[2] = {0,2};
38  double sig[2] = {2,3};
39  TRandom3 r;
40  r.SetSeed(1);
41  for (UInt_t i = 0; i < DATADIM; ++i)
42  for (UInt_t j = 0; j < DATASZ; ++j)
43  smp[DATASZ * i + j] = r.Gaus(mu[i], sig[i]);
44 
45  UInt_t h1bins = (UInt_t) sqrt(NBINS);
46 
47  TH2D* h1 = new TH2D("h1BinTest", "Regular binning", h1bins, -5., 5., h1bins, -5., 5.);
48  for (UInt_t j = 0; j < DATASZ; ++j)
49  h1->Fill(smp[j], smp[DATASZ + j]);
50 
51 
52  // ---------------------------------------------------------------------------------------------
53  // Create KDTreeBinning object with TH2Poly plotting
54 
55  TKDTreeBinning* kdBins = new TKDTreeBinning(DATASZ, DATADIM, smp, NBINS);
56 
57  UInt_t nbins = kdBins->GetNBins();
58  UInt_t dim = kdBins->GetDim();
59 
60  const Double_t* binsMinEdges = kdBins->GetBinsMinEdges();
61  const Double_t* binsMaxEdges = kdBins->GetBinsMaxEdges();
62 
63  TH2Poly* h2pol = new TH2Poly("h2PolyBinTest", "KDTree binning", kdBins->GetDataMin(0), kdBins->GetDataMax(0), kdBins->GetDataMin(1), kdBins->GetDataMax(1));
64 
65  for (UInt_t i = 0; i < nbins; ++i) {
66  UInt_t edgeDim = i * dim;
67  h2pol->AddBin(binsMinEdges[edgeDim], binsMinEdges[edgeDim + 1], binsMaxEdges[edgeDim], binsMaxEdges[edgeDim + 1]);
68  }
69 
70  for (UInt_t i = 1; i <= kdBins->GetNBins(); ++i)
71  h2pol->SetBinContent(i, kdBins->GetBinDensity(i - 1));
72 
73  std::cout << "Bin with minimum density: " << kdBins->GetBinMinDensity() << std::endl;
74  std::cout << "Bin with maximum density: " << kdBins->GetBinMaxDensity() << std::endl;
75 
76  TCanvas* c1 = new TCanvas("glc1", "TH2Poly from a kdTree",0,0,600,800);
77  c1->Divide(1,3);
78  c1->cd(1);
79  h1->Draw("lego");
80 
81  c1->cd(2);
82  h2pol->Draw("COLZ L");
83  c1->Update();
84 
85  //-------------------------------------------------
86  // Draw an equivalent plot showing the data points
87 
88 
89  std::vector<Double_t> z = std::vector<Double_t>(DATASZ, 0.);
90  for (UInt_t i = 0; i < DATASZ; ++i)
91  z[i] = (Double_t) h2pol->GetBinContent(h2pol->FindBin(smp[i], smp[DATASZ + i]));
92 
93  TGraph2D *g = new TGraph2D(DATASZ, smp, &smp[DATASZ], &z[0]);
94  g->SetMarkerStyle(20);
95 
96  c1->cd(3);
97  g->Draw("pcol");
98  c1->Update();
99 
100  // ---------------------------------------------------------
101  // make a new TH2Poly where bins are ordered by the density
102 
103  TH2Poly* h2polrebin = new TH2Poly("h2PolyBinTest", "KDTree binning", kdBins->GetDataMin(0), kdBins->GetDataMax(0), kdBins->GetDataMin(1), kdBins->GetDataMax(1));
104  h2polrebin->SetFloat();
105 
106  //---------------------------------
107  // Sort the bins by their density
108 
109  kdBins->SortBinsByDensity();
110 
111  for (UInt_t i = 0; i < kdBins->GetNBins(); ++i) {
112  const Double_t* binMinEdges = kdBins->GetBinMinEdges(i);
113  const Double_t* binMaxEdges = kdBins->GetBinMaxEdges(i);
114  h2polrebin->AddBin(binMinEdges[0], binMinEdges[1], binMaxEdges[0], binMaxEdges[1]);
115  }
116 
117  for (UInt_t i = 1; i <= kdBins->GetNBins(); ++i){
118  h2polrebin->SetBinContent(i, kdBins->GetBinDensity(i - 1));}
119 
120  std::cout << "Bin with minimum density: " << kdBins->GetBinMinDensity() << std::endl;
121  std::cout << "Bin with maximum density: " << kdBins->GetBinMaxDensity() << std::endl;
122 
123  // now make a vector with bin number vs position
124  for (UInt_t i = 0; i < DATASZ; ++i)
125  z[i] = (Double_t) h2polrebin->FindBin(smp[i], smp[DATASZ + i]);
126 
127  TGraph2D *g2 = new TGraph2D(DATASZ, smp, &smp[DATASZ], &z[0]);
128  g2->SetMarkerStyle(20);
129 
130 
131  // plot new TH2Poly (ordered one) and TGraph2D
132  // The new TH2Poly has to be same as old one and the TGraph2D should be similar to
133  // the previous one. It is now made using as z value the bin number
134 
135  TCanvas* c4 = new TCanvas("glc4", "TH2Poly from a kdTree (Ordered)",50,50,800,800);
136 
137  c4->Divide(2,2);
138  c4->cd(1);
139  h2polrebin->Draw("COLZ L"); // draw as scatter plot
140 
141  c4->cd(2);
142  g2->Draw("pcol");
143 
144  c4->Update();
145 
146  // make also the 1D binned histograms
147 
148  TKDTreeBinning* kdX = new TKDTreeBinning(DATASZ, 1, &smp[0], 20);
149  TKDTreeBinning* kdY = new TKDTreeBinning(DATASZ, 1, &smp[DATASZ], 40);
150 
151 
152  kdX->SortOneDimBinEdges();
153  kdY->SortOneDimBinEdges();
154 
155  TH1* hX=new TH1F("hX", "X projection", kdX->GetNBins(), kdX->GetOneDimBinEdges());
156  for(int i=0; i<kdX->GetNBins(); ++i){
157  hX->SetBinContent(i+1, kdX->GetBinDensity(i));
158  }
159 
160  TH1* hY=new TH1F("hY", "Y Projection", kdY->GetNBins(), kdY->GetOneDimBinEdges());
161  for(int i=0; i<kdY->GetNBins(); ++i){
162  hY->SetBinContent(i+1, kdY->GetBinDensity(i));
163  }
164 
165  c4->cd(3);
166  hX->Draw();
167  c4->cd(4);
168  hY->Draw();
169 }
Random number generator class based on M.
Definition: TRandom3.h:27
Int_t FindBin(Double_t x, Double_t y, Double_t z=0)
Returns the bin number of the bin at the given coordinate.
Definition: TH2Poly.cxx:561
const Double_t * GetBinsMinEdges() const
Returns an array with all bins&#39; minimum edges The edges are arranges as xmin_1,ymin_1, xmin_2,ymin_2,....xmin_{nbin},ymin_{nbin}.
virtual void Draw(Option_t *option="")
Specific drawing options can be used to paint a TGraph2D:
Definition: TGraph2D.cxx:704
Double_t GetDataMax(UInt_t dim) const
Returns the maximum of the data in the dim coordinate. &#39;dim&#39; is between 0 and fDim - 1...
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
Definition: TRandom.cxx:235
return c1
Definition: legend1.C:41
UInt_t GetBinMinDensity() const
Return the bin with minimum density.
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:311
const Double_t * GetBinsMaxEdges() const
Returns an array with all bins&#39; maximum edges The edges are arranges as xmax_1,ymax_1, xmax_2,ymax_2,....xmax_{nbin},ymax_{nbin}.
TVirtualPad * cd(Int_t subpadnumber=0)
Set current canvas & pad.
Definition: TCanvas.cxx:679
virtual void SetSeed(ULong_t seed=0)
Set the random generator sequence if seed is 0 (default value) a TUUID is generated and used to fill ...
Definition: TRandom3.cxx:201
UInt_t GetNBins() const
Returns the number of bins.
int nbins[3]
double sqrt(double)
virtual Int_t AddBin(TObject *poly)
Adds a new bin to the histogram.
Definition: TH2Poly.cxx:214
const Double_t * SortOneDimBinEdges(Bool_t sortAsc=kTRUE)
Sort the one-dimensional bin edges and retuns a pointer to them.
<- TKDTreeBinning - A class providing multidimensional binning ->
TH1F * h1
Definition: legend1.C:5
Double_t GetDataMin(UInt_t dim) const
Returns the minimum of the data in the dim coordinate. &#39;dim&#39; is between 0 and fDim - 1...
virtual Double_t GetBinContent(Int_t bin) const
Returns the content of the input bin For the overflow/underflow/sea bins: -1 | -2 | -3 ---+----+---- ...
Definition: TH2Poly.cxx:761
Double_t GetBinDensity(UInt_t bin) const
Returns the density in bin.
TRandom2 r(17)
virtual void Draw(Option_t *option="")
Draw this histogram with options.
Definition: TH1.cxx:2851
void SetBinContent(Int_t bin, Double_t content)
Sets the contents of the input bin to the input content Negative values between -1 and -9 are for the...
Definition: TH2Poly.cxx:1247
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8325
const Double_t * GetBinMinEdges(UInt_t bin) const
Returns the bin&#39;s minimum edges. &#39;bin&#39; is between 0 and fNBins - 1.
unsigned int UInt_t
Definition: RtypesCore.h:42
virtual void SetMarkerStyle(Style_t mstyle=1)
Set the marker style.
Definition: TAttMarker.h:40
void SetFloat(Bool_t flag=true)
When set to kTRUE, allows the histogram to expand if a bin outside the limits is added.
Definition: TH2Poly.cxx:1262
void SortBinsByDensity(Bool_t sortAsc=kTRUE)
Sorts bins by their density.
The Canvas class.
Definition: TCanvas.h:31
double Double_t
Definition: RtypesCore.h:55
UInt_t GetBinMaxDensity() const
Return the bin with maximum density.
The TH1 histogram class.
Definition: TH1.h:56
THist< 2, double, THistStatContent, THistStatUncertainty > TH2D
Definition: THist.hxx:316
you should not use this method at all Int_t Int_t z
Definition: TRolke.cxx:630
const Double_t * GetBinMaxEdges(UInt_t bin) const
Returns the bin&#39;s maximum edges. &#39;bin&#39; is between 0 and fNBins - 1.
virtual void Divide(Int_t nx=1, Int_t ny=1, Float_t xmargin=0.01, Float_t ymargin=0.01, Int_t color=0)
Automatic pad generation by division.
Definition: TPad.cxx:1135
virtual void Update()
Update canvas pad buffers.
Definition: TCanvas.cxx:2208
UInt_t GetDim() const
Returns the number of dimensions.
Graphics object made of three arrays X, Y and Z with the same number of points each.
Definition: TGraph2D.h:40
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:292
2D Histogram with Polygonal Bins
Definition: TH2Poly.h:66
tomato 2-D histogram with a double per channel (see TH1 documentation)}
Definition: TH2.h:290
const Double_t * GetOneDimBinEdges() const
Returns a pointer to the vector of the bin edges for one dimensional binning only.