Logo ROOT   6.14/05
Reference Guide
TGraphQQ.cxx
Go to the documentation of this file.
1 // @(#)root/graf:$Id$
2 // Author: Anna Kreshuk 18/11/2005
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #include "TGraphQQ.h"
13 #include "TAxis.h"
14 #include "TF1.h"
15 #include "TMath.h"
16 #include "TVirtualPad.h"
17 #include "TLine.h"
18 
20 
21 /** \class TGraphQQ
22 \ingroup BasicGraphics
23 
24 This class allows to draw quantile-quantile plots
25 
26 Plots can be drawn for 2 datasets or for a dataset and a theoretical
27 distribution function
28 
29 ## 2 datasets:
30  Quantile-quantile plots are used to determine whether 2 samples come from
31  the same distribution.
32  A qq-plot draws the quantiles of one dataset against the quantile of the
33  the other. The quantiles of the dataset with fewer entries are on Y axis,
34  with more entries - on X axis.
35  A straight line, going through 0.25 and 0.75 quantiles is also plotted
36  for reference. It represents a robust linear fit, not sensitive to the
37  extremes of the datasets.
38  If the datasets come from the same distribution, points of the plot should
39  fall approximately on the 45 degrees line. If they have the same
40  distribution function, but location or scale different parameters,
41  they should still fall on the straight line, but not the 45 degrees one.
42  The greater their departure from the straight line, the more evidence there
43  is, that the datasets come from different distributions.
44  The advantage of qq-plot is that it not only shows that the underlying
45  distributions are different, but, unlike the analytical methods, it also
46  gives information on the nature of this difference: heavier tails,
47  different location/scale, different shape, etc.
48 
49  Some examples of qqplots of 2 datasets:
50 
51 \image html graf_graphqq1.png
52 
53 ## 1 dataset:
54  Quantile-quantile plots are used to determine if the dataset comes from the
55  specified theoretical distribution, such as normal.
56  A qq-plot draws quantiles of the dataset against quantiles of the specified
57  theoretical distribution.
58  (NOTE, that density, not CDF should be specified)
59  A straight line, going through 0.25 and 0.75 quantiles can also be plotted
60  for reference. It represents a robust linear fit, not sensitive to the
61  extremes of the dataset.
62  As in the 2 datasets case, departures from straight line indicate departures
63  from the specified distribution.
64 
65  "The correlation coefficient associated with the linear fit to the data
66  in the probability plot (qq plot in our case) is a measure of the
67  goodness of the fit.
68  Estimates of the location and scale parameters of the distribution
69  are given by the intercept and slope. Probability plots can be generated
70  for several competing distributions to see which provides the best fit,
71  and the probability plot generating the highest correlation coefficient
72  is the best choice since it generates the straightest probability plot."
73 
74  From "Engineering statistic handbook",
75 
76  http://www.itl.nist.gov/div898/handbook/eda/section3/probplot.htm
77 
78  Example of a qq-plot of a dataset from N(3, 2) distribution and
79  TMath::Gaus(0, 1) theoretical function. Fitting parameters
80  are estimates of the distribution mean and sigma.
81 
82 \image html graf_graphqq2.png
83 
84 References:
85 
86 http://www.itl.nist.gov/div898/handbook/eda/section3/qqplot.htm
87 
88 http://www.itl.nist.gov/div898/handbook/eda/section3/probplot.htm
89 */
90 
91 ////////////////////////////////////////////////////////////////////////////////
92 /// default constructor
93 
95 {
96  fF = 0;
97  fY0 = 0;
98  fNy0 = 0;
99  fXq1 = 0.;
100  fXq2 = 0.;
101  fYq1 = 0.;
102  fYq2 = 0.;
103 
104 }
105 
106 ////////////////////////////////////////////////////////////////////////////////
107 /// Creates a quantile-quantile plot of dataset x.
108 /// Theoretical distribution function can be defined later by SetFunction method
109 
111  : TGraph(n)
112 {
113  fNy0 = 0;
114  fXq1 = 0.;
115  fXq2 = 0.;
116  fYq1 = 0.;
117  fYq2 = 0.;
118 
119  Int_t *index = new Int_t[n];
120  TMath::Sort(n, x, index, kFALSE);
121  for (Int_t i=0; i<fNpoints; i++)
122  fY[i] = x[index[i]];
123  fF=0;
124  fY0=0;
125  delete [] index;
126 }
127 
128 ////////////////////////////////////////////////////////////////////////////////
129 /// Creates a quantile-quantile plot of dataset x against function f
130 
132  : TGraph(n)
133 {
134  fNy0 = 0;
135 
136  Int_t *index = new Int_t[n];
137  TMath::Sort(n, x, index, kFALSE);
138  for (Int_t i=0; i<fNpoints; i++)
139  fY[i] = x[index[i]];
140  delete [] index;
141  fF = f;
142  fY0=0;
144 }
145 
146 ////////////////////////////////////////////////////////////////////////////////
147 /// Creates a quantile-quantile plot of dataset x against dataset y
148 /// Parameters nx and ny are respective array sizes
149 
151 {
152  fNy0 = 0;
153  fXq1 = 0.;
154  fXq2 = 0.;
155  fYq1 = 0.;
156  fYq2 = 0.;
157  fF = 0;
158  fY0 = 0;
159 
160  nx<=ny ? fNpoints=nx : fNpoints=ny;
161 
162  if (!CtorAllocate()) return;
163 
164  Int_t *index = new Int_t[TMath::Max(nx, ny)];
165  TMath::Sort(nx, x, index, kFALSE);
166  if (nx <=ny){
167  for (Int_t i=0; i<fNpoints; i++)
168  fY[i] = x[index[i]];
169  TMath::Sort(ny, y, index, kFALSE);
170  if (nx==ny){
171  for (Int_t i=0; i<fNpoints; i++)
172  fX[i] = y[index[i]];
173  fY0 = 0;
174  Quartiles();
175  } else {
176  fNy0 = ny;
177  fY0 = new Double_t[ny];
178  for (Int_t i=0; i<ny; i++)
179  fY0[i] = y[i];
180  MakeQuantiles();
181  }
182  } else {
183  fNy0 = nx;
184  fY0 = new Double_t[nx];
185  for (Int_t i=0; i<nx; i++)
186  fY0[i] = x[index[i]];
187  TMath::Sort(ny, y, index, kFALSE);
188  for (Int_t i=0; i<ny; i++)
189  fY[i] = y[index[i]];
190  MakeQuantiles();
191  }
192 
193 
194  delete [] index;
195 }
196 
197 ////////////////////////////////////////////////////////////////////////////////
198 /// Destroys a TGraphQQ
199 
201 {
202  if (fY0)
203  delete [] fY0;
204  if (fF)
205  fF = 0;
206 }
207 
208 ////////////////////////////////////////////////////////////////////////////////
209 /// Computes quantiles of theoretical distribution function
210 
212 {
213  if (!fF) return;
214  TString s = fF->GetTitle();
215  Double_t pk;
216  if (s.Contains("TMath::Gaus") || s.Contains("gaus")){
217  //use plotting positions optimal for normal distribution
218  for (Int_t k=1; k<=fNpoints; k++){
219  pk = (k-0.375)/(fNpoints+0.25);
220  fX[k-1]=TMath::NormQuantile(pk);
221  }
222  } else {
223  Double_t *prob = new Double_t[fNpoints];
224  if (fNpoints > 10){
225  for (Int_t k=1; k<=fNpoints; k++)
226  prob[k-1] = (k-0.5)/fNpoints;
227  } else {
228  for (Int_t k=1; k<=fNpoints; k++)
229  prob[k-1] = (k-0.375)/(fNpoints+0.25);
230  }
231  //fF->GetQuantiles(fNpoints, prob, fX);
232  fF->GetQuantiles(fNpoints, fX, prob);
233  delete [] prob;
234  }
235 
236  Quartiles();
237 }
238 
239 ////////////////////////////////////////////////////////////////////////////////
240 /// When sample sizes are not equal, computes quantiles of the bigger sample
241 /// by linear interpolation
242 
244 {
245 
246 
247  if (!fY0) return;
248 
249  Double_t pi, pfrac;
250  Int_t pint;
251  for (Int_t i=0; i<fNpoints-1; i++){
252  pi = (fNy0-1)*Double_t(i)/Double_t(fNpoints-1);
253  pint = TMath::FloorNint(pi);
254  pfrac = pi - pint;
255  fX[i] = (1-pfrac)*fY0[pint]+pfrac*fY0[pint+1];
256  }
257  fX[fNpoints-1]=fY0[fNy0-1];
258 
259  Quartiles();
260 }
261 
262 ////////////////////////////////////////////////////////////////////////////////
263 /// compute quartiles
264 /// a quartile is a 25 per cent or 75 per cent quantile
265 
267 {
268  Double_t prob[]={0.25, 0.75};
269  Double_t x[2];
270  Double_t y[2];
271  TMath::Quantiles(fNpoints, 2, fY, y, prob, kTRUE);
272  if (fY0)
273  TMath::Quantiles(fNy0, 2, fY0, x, prob, kTRUE);
274  else if (fF) {
275  TString s = fF->GetTitle();
276  if (s.Contains("TMath::Gaus") || s.Contains("gaus")){
277  x[0] = TMath::NormQuantile(0.25);
278  x[1] = TMath::NormQuantile(0.75);
279  } else
280  fF->GetQuantiles(2, x, prob);
281  }
282  else
283  TMath::Quantiles(fNpoints, 2, fX, x, prob, kTRUE);
284 
285  fXq1=x[0]; fXq2=x[1]; fYq1=y[0]; fYq2=y[1];
286 }
287 
288 ////////////////////////////////////////////////////////////////////////////////
289 ///Sets the theoretical distribution function (density!)
290 ///and computes its quantiles
291 
293 {
294  fF = f;
296 }
void MakeQuantiles()
When sample sizes are not equal, computes quantiles of the bigger sample by linear interpolation...
Definition: TGraphQQ.cxx:243
Int_t fNpoints
Number of points <= fMaxSize.
Definition: TGraph.h:46
virtual Int_t GetQuantiles(Int_t nprobSum, Double_t *q, const Double_t *probSum)
Compute Quantiles for density distribution of this function.
Definition: TF1.cxx:1880
Double_t * fX
[fNpoints] array of X points
Definition: TGraph.h:47
Int_t fNy0
size of the fY0 dataset
Definition: TGraphQQ.h:20
static constexpr double pi
void SetFunction(TF1 *f)
Sets the theoretical distribution function (density!) and computes its quantiles. ...
Definition: TGraphQQ.cxx:292
Double_t NormQuantile(Double_t p)
Double_t fXq1
x1 coordinate of the interquartile line
Definition: TGraphQQ.h:21
void MakeFunctionQuantiles()
Computes quantiles of theoretical distribution function.
Definition: TGraphQQ.cxx:211
Basic string class.
Definition: TString.h:131
#define f(i)
Definition: RSha256.hxx:104
int Int_t
Definition: RtypesCore.h:41
Int_t FloorNint(Double_t x)
Definition: TMath.h:706
This class allows to draw quantile-quantile plots.
Definition: TGraphQQ.h:18
Double_t fXq2
x2 coordinate of the interquartile line
Definition: TGraphQQ.h:22
Double_t x[n]
Definition: legend1.C:17
void Quantiles(Int_t n, Int_t nprob, Double_t *x, Double_t *quantiles, Double_t *prob, Bool_t isSorted=kTRUE, Int_t *index=0, Int_t type=7)
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
Sort the n elements of the array a of generic templated type Element.
Definition: TMath.h:1282
TGraphQQ()
default constructor
Definition: TGraphQQ.cxx:94
Double_t * fY0
! second dataset, if specified
Definition: TGraphQQ.h:25
Bool_t CtorAllocate()
In constructors set fNpoints than call this method.
Definition: TGraph.cxx:719
Double_t fYq2
y2 coordinate of the interquartile line
Definition: TGraphQQ.h:24
virtual ~TGraphQQ()
Destroys a TGraphQQ.
Definition: TGraphQQ.cxx:200
const Bool_t kFALSE
Definition: RtypesCore.h:88
#define ClassImp(name)
Definition: Rtypes.h:359
double Double_t
Definition: RtypesCore.h:55
Double_t y[n]
Definition: legend1.C:17
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
static constexpr double s
Double_t * fY
[fNpoints] array of Y points
Definition: TGraph.h:48
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:200
1-Dim function class
Definition: TF1.h:211
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
Double_t fYq1
y1 coordinate of the interquartile line
Definition: TGraphQQ.h:23
void Quartiles()
compute quartiles a quartile is a 25 per cent or 75 per cent quantile
Definition: TGraphQQ.cxx:266
const Bool_t kTRUE
Definition: RtypesCore.h:87
const Int_t n
Definition: legend1.C:16
virtual const char * GetTitle() const
Returns title of object.
Definition: TNamed.h:48
TF1 * fF
theoretical density function, if specified
Definition: TGraphQQ.h:26