 ROOT   Reference Guide TGraphQQ.cxx
Go to the documentation of this file.
1 // @(#)root/graf:$Id$
2 // Author: Anna Kreshuk 18/11/2005
3
4 /*************************************************************************
5  * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. * 9 * For the list of contributors see$ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11
12 #include "TGraphQQ.h"
13 #include "TAxis.h"
14 #include "TF1.h"
15 #include "TMath.h"
16
18
19 /** \class TGraphQQ
20 \ingroup BasicGraphics
21
22 This class allows to draw quantile-quantile plots
23
24 Plots can be drawn for 2 datasets or for a dataset and a theoretical
25 distribution function
26
27 ## 2 datasets:
28  Quantile-quantile plots are used to determine whether 2 samples come from
29  the same distribution.
30  A qq-plot draws the quantiles of one dataset against the quantile of the
31  the other. The quantiles of the dataset with fewer entries are on Y axis,
32  with more entries - on X axis.
33  A straight line, going through 0.25 and 0.75 quantiles is also plotted
34  for reference. It represents a robust linear fit, not sensitive to the
35  extremes of the datasets.
36  If the datasets come from the same distribution, points of the plot should
37  fall approximately on the 45 degrees line. If they have the same
38  distribution function, but location or scale different parameters,
39  they should still fall on the straight line, but not the 45 degrees one.
40  The greater their departure from the straight line, the more evidence there
41  is, that the datasets come from different distributions.
42  The advantage of qq-plot is that it not only shows that the underlying
43  distributions are different, but, unlike the analytical methods, it also
44  gives information on the nature of this difference: heavier tails,
45  different location/scale, different shape, etc.
46
47  Some examples of qqplots of 2 datasets:
48
49 \image html graf_graphqq1.png
50
51 ## 1 dataset:
52  Quantile-quantile plots are used to determine if the dataset comes from the
53  specified theoretical distribution, such as normal.
54  A qq-plot draws quantiles of the dataset against quantiles of the specified
55  theoretical distribution.
56  (NOTE, that density, not CDF should be specified)
57  A straight line, going through 0.25 and 0.75 quantiles can also be plotted
58  for reference. It represents a robust linear fit, not sensitive to the
59  extremes of the dataset.
60  As in the 2 datasets case, departures from straight line indicate departures
61  from the specified distribution.
62
63  "The correlation coefficient associated with the linear fit to the data
64  in the probability plot (qq plot in our case) is a measure of the
65  goodness of the fit.
66  Estimates of the location and scale parameters of the distribution
67  are given by the intercept and slope. Probability plots can be generated
68  for several competing distributions to see which provides the best fit,
69  and the probability plot generating the highest correlation coefficient
70  is the best choice since it generates the straightest probability plot."
71
72  From "Engineering statistic handbook",
73
74  http://www.itl.nist.gov/div898/handbook/eda/section3/probplot.htm
75
76  Example of a qq-plot of a dataset from N(3, 2) distribution and
77  TMath::Gaus(0, 1) theoretical function. Fitting parameters
78  are estimates of the distribution mean and sigma.
79
80 \image html graf_graphqq2.png
81
82 References:
83
84 http://www.itl.nist.gov/div898/handbook/eda/section3/qqplot.htm
85
86 http://www.itl.nist.gov/div898/handbook/eda/section3/probplot.htm
87 */
88
89 ////////////////////////////////////////////////////////////////////////////////
90 /// default constructor
91
93 {
94  fF = 0;
95  fY0 = 0;
96  fNy0 = 0;
97  fXq1 = 0.;
98  fXq2 = 0.;
99  fYq1 = 0.;
100  fYq2 = 0.;
101
102 }
103
104 ////////////////////////////////////////////////////////////////////////////////
105 /// Creates a quantile-quantile plot of dataset x.
106 /// Theoretical distribution function can be defined later by SetFunction method
107
109  : TGraph(n)
110 {
111  fNy0 = 0;
112  fXq1 = 0.;
113  fXq2 = 0.;
114  fYq1 = 0.;
115  fYq2 = 0.;
116
117  Int_t *index = new Int_t[n];
118  TMath::Sort(n, x, index, kFALSE);
119  for (Int_t i=0; i<fNpoints; i++)
120  fY[i] = x[index[i]];
121  fF=0;
122  fY0=0;
123  delete [] index;
124 }
125
126 ////////////////////////////////////////////////////////////////////////////////
127 /// Creates a quantile-quantile plot of dataset x against function f
128
130  : TGraph(n)
131 {
132  fNy0 = 0;
133
134  Int_t *index = new Int_t[n];
135  TMath::Sort(n, x, index, kFALSE);
136  for (Int_t i=0; i<fNpoints; i++)
137  fY[i] = x[index[i]];
138  delete [] index;
139  fF = f;
140  fY0=0;
142 }
143
144 ////////////////////////////////////////////////////////////////////////////////
145 /// Creates a quantile-quantile plot of dataset x against dataset y
146 /// Parameters nx and ny are respective array sizes
147
149 {
150  fNy0 = 0;
151  fXq1 = 0.;
152  fXq2 = 0.;
153  fYq1 = 0.;
154  fYq2 = 0.;
155  fF = 0;
156  fY0 = 0;
157
158  nx<=ny ? fNpoints=nx : fNpoints=ny;
159
160  if (!CtorAllocate()) return;
161
162  Int_t *index = new Int_t[TMath::Max(nx, ny)];
163  TMath::Sort(nx, x, index, kFALSE);
164  if (nx <=ny){
165  for (Int_t i=0; i<fNpoints; i++)
166  fY[i] = x[index[i]];
167  TMath::Sort(ny, y, index, kFALSE);
168  if (nx==ny){
169  for (Int_t i=0; i<fNpoints; i++)
170  fX[i] = y[index[i]];
171  fY0 = 0;
172  Quartiles();
173  } else {
174  fNy0 = ny;
175  fY0 = new Double_t[ny];
176  for (Int_t i=0; i<ny; i++)
177  fY0[i] = y[i];
178  MakeQuantiles();
179  }
180  } else {
181  fNy0 = nx;
182  fY0 = new Double_t[nx];
183  for (Int_t i=0; i<nx; i++)
184  fY0[i] = x[index[i]];
185  TMath::Sort(ny, y, index, kFALSE);
186  for (Int_t i=0; i<ny; i++)
187  fY[i] = y[index[i]];
188  MakeQuantiles();
189  }
190
191
192  delete [] index;
193 }
194
195 ////////////////////////////////////////////////////////////////////////////////
196 /// Destroys a TGraphQQ
197
199 {
200  if (fY0)
201  delete [] fY0;
202  if (fF)
203  fF = 0;
204 }
205
206 ////////////////////////////////////////////////////////////////////////////////
207 /// Computes quantiles of theoretical distribution function
208
210 {
211  if (!fF) return;
212  TString s = fF->GetTitle();
213  Double_t pk;
214  if (s.Contains("TMath::Gaus") || s.Contains("gaus")){
215  //use plotting positions optimal for normal distribution
216  for (Int_t k=1; k<=fNpoints; k++){
217  pk = (k-0.375)/(fNpoints+0.25);
218  fX[k-1]=TMath::NormQuantile(pk);
219  }
220  } else {
221  Double_t *prob = new Double_t[fNpoints];
222  if (fNpoints > 10){
223  for (Int_t k=1; k<=fNpoints; k++)
224  prob[k-1] = (k-0.5)/fNpoints;
225  } else {
226  for (Int_t k=1; k<=fNpoints; k++)
227  prob[k-1] = (k-0.375)/(fNpoints+0.25);
228  }
229  //fF->GetQuantiles(fNpoints, prob, fX);
230  fF->GetQuantiles(fNpoints, fX, prob);
231  delete [] prob;
232  }
233
234  Quartiles();
235 }
236
237 ////////////////////////////////////////////////////////////////////////////////
238 /// When sample sizes are not equal, computes quantiles of the bigger sample
239 /// by linear interpolation
240
242 {
243
244
245  if (!fY0) return;
246
247  Double_t pi, pfrac;
248  Int_t pint;
249  for (Int_t i=0; i<fNpoints-1; i++){
250  pi = (fNy0-1)*Double_t(i)/Double_t(fNpoints-1);
251  pint = TMath::FloorNint(pi);
252  pfrac = pi - pint;
253  fX[i] = (1-pfrac)*fY0[pint]+pfrac*fY0[pint+1];
254  }
255  fX[fNpoints-1]=fY0[fNy0-1];
256
257  Quartiles();
258 }
259
260 ////////////////////////////////////////////////////////////////////////////////
261 /// compute quartiles
262 /// a quartile is a 25 per cent or 75 per cent quantile
263
265 {
266  Double_t prob[]={0.25, 0.75};
267  Double_t x;
268  Double_t y;
269  TMath::Quantiles(fNpoints, 2, fY, y, prob, kTRUE);
270  if (fY0)
271  TMath::Quantiles(fNy0, 2, fY0, x, prob, kTRUE);
272  else if (fF) {
273  TString s = fF->GetTitle();
274  if (s.Contains("TMath::Gaus") || s.Contains("gaus")){
275  x = TMath::NormQuantile(0.25);
276  x = TMath::NormQuantile(0.75);
277  } else
278  fF->GetQuantiles(2, x, prob);
279  }
280  else
281  TMath::Quantiles(fNpoints, 2, fX, x, prob, kTRUE);
282
283  fXq1=x; fXq2=x; fYq1=y; fYq2=y;
284 }
285
286 ////////////////////////////////////////////////////////////////////////////////
287 ///Sets the theoretical distribution function (density!)
288 ///and computes its quantiles
289
291 {
292  fF = f;
294 }
TGraphQQ::fNy0
Int_t fNy0
size of the fY0 dataset
Definition: TGraphQQ.h:20
n
const Int_t n
Definition: legend1.C:16
kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:95
f
#define f(i)
Definition: RSha256.hxx:104
TGraphQQ::MakeQuantiles
void MakeQuantiles()
When sample sizes are not equal, computes quantiles of the bigger sample by linear interpolation.
Definition: TGraphQQ.cxx:241
TMath::Max
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:212
TGraphQQ::MakeFunctionQuantiles
void MakeFunctionQuantiles()
Computes quantiles of theoretical distribution function.
Definition: TGraphQQ.cxx:209
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
TNamed::GetTitle
virtual const char * GetTitle() const
Returns title of object.
Definition: TNamed.h:48
TGraphQQ::fY0
Double_t * fY0
! second dataset, if specified
Definition: TGraphQQ.h:25
TGeant4Unit::s
static constexpr double s
Definition: TGeant4SystemOfUnits.h:162
TGraph::fX
Double_t * fX
[fNpoints] array of X points
Definition: TGraph.h:47
x
Double_t x[n]
Definition: legend1.C:17
TGraphQQ::SetFunction
void SetFunction(TF1 *f)
Sets the theoretical distribution function (density!) and computes its quantiles.
Definition: TGraphQQ.cxx:290
TMath::Sort
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
Definition: TMathBase.h:362
TString
Basic string class.
Definition: TString.h:136
TGraphQQ::fYq2
Double_t fYq2
y2 coordinate of the interquartile line
Definition: TGraphQQ.h:24
TGraphQQ::fXq2
Double_t fXq2
x2 coordinate of the interquartile line
Definition: TGraphQQ.h:22
TMath::FloorNint
Int_t FloorNint(Double_t x)
Definition: TMath.h:707
TMath::NormQuantile
Double_t NormQuantile(Double_t p)
Computes quantiles for standard normal distribution N(0, 1) at probability p.
Definition: TMath.cxx:2416
TGraphQQ.h
TGraphQQ::TGraphQQ
TGraphQQ()
default constructor
Definition: TGraphQQ.cxx:92
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:96
TGraph::fY
Double_t * fY
[fNpoints] array of Y points
Definition: TGraph.h:48
y
Double_t y[n]
Definition: legend1.C:17
TGraph::CtorAllocate
Bool_t CtorAllocate()
In constructors set fNpoints than call this method.
Definition: TGraph.cxx:743
TGraphQQ::~TGraphQQ
virtual ~TGraphQQ()
Destroys a TGraphQQ.
Definition: TGraphQQ.cxx:198
TGraphQQ::fYq1
Double_t fYq1
y1 coordinate of the interquartile line
Definition: TGraphQQ.h:23
TGraphQQ::Quartiles
void Quartiles()
compute quartiles a quartile is a 25 per cent or 75 per cent quantile
Definition: TGraphQQ.cxx:264
Double_t
double Double_t
Definition: RtypesCore.h:59
TGraph
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
TF1.h
TGeant4Unit::pi
static constexpr double pi
Definition: TGeant4SystemOfUnits.h:67
TGraphQQ::fF
TF1 * fF
theoretical density function, if specified
Definition: TGraphQQ.h:26
TAxis.h
TGraphQQ
This class allows to draw quantile-quantile plots.
Definition: TGraphQQ.h:18
TGraph::fNpoints
Int_t fNpoints
Number of points <= fMaxSize.
Definition: TGraph.h:46
TF1
1-Dim function class
Definition: TF1.h:213
TF1::GetQuantiles
virtual Int_t GetQuantiles(Int_t nprobSum, Double_t *q, const Double_t *probSum)
Compute Quantiles for density distribution of this function.
Definition: TF1.cxx:1982
TGraphQQ::fXq1
Double_t fXq1
x1 coordinate of the interquartile line
Definition: TGraphQQ.h:21
TMath::Quantiles
void Quantiles(Int_t n, Int_t nprob, Double_t *x, Double_t *quantiles, Double_t *prob, Bool_t isSorted=kTRUE, Int_t *index=0, Int_t type=7)
Computes sample quantiles, corresponding to the given probabilities.
Definition: TMath.cxx:1183
TMath.h
int