Logo ROOT  
Reference Guide
TGraphQQ.cxx
Go to the documentation of this file.
1// @(#)root/graf:$Id$
2// Author: Anna Kreshuk 18/11/2005
3
4/*************************************************************************
5 * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#include "TGraphQQ.h"
13#include "TAxis.h"
14#include "TF1.h"
15#include "TMath.h"
16#include "TVirtualPad.h"
17#include "TLine.h"
18
20
21/** \class TGraphQQ
22\ingroup BasicGraphics
23
24This class allows to draw quantile-quantile plots
25
26Plots can be drawn for 2 datasets or for a dataset and a theoretical
27distribution function
28
29## 2 datasets:
30 Quantile-quantile plots are used to determine whether 2 samples come from
31 the same distribution.
32 A qq-plot draws the quantiles of one dataset against the quantile of the
33 the other. The quantiles of the dataset with fewer entries are on Y axis,
34 with more entries - on X axis.
35 A straight line, going through 0.25 and 0.75 quantiles is also plotted
36 for reference. It represents a robust linear fit, not sensitive to the
37 extremes of the datasets.
38 If the datasets come from the same distribution, points of the plot should
39 fall approximately on the 45 degrees line. If they have the same
40 distribution function, but location or scale different parameters,
41 they should still fall on the straight line, but not the 45 degrees one.
42 The greater their departure from the straight line, the more evidence there
43 is, that the datasets come from different distributions.
44 The advantage of qq-plot is that it not only shows that the underlying
45 distributions are different, but, unlike the analytical methods, it also
46 gives information on the nature of this difference: heavier tails,
47 different location/scale, different shape, etc.
48
49 Some examples of qqplots of 2 datasets:
50
51\image html graf_graphqq1.png
52
53## 1 dataset:
54 Quantile-quantile plots are used to determine if the dataset comes from the
55 specified theoretical distribution, such as normal.
56 A qq-plot draws quantiles of the dataset against quantiles of the specified
57 theoretical distribution.
58 (NOTE, that density, not CDF should be specified)
59 A straight line, going through 0.25 and 0.75 quantiles can also be plotted
60 for reference. It represents a robust linear fit, not sensitive to the
61 extremes of the dataset.
62 As in the 2 datasets case, departures from straight line indicate departures
63 from the specified distribution.
64
65 "The correlation coefficient associated with the linear fit to the data
66 in the probability plot (qq plot in our case) is a measure of the
67 goodness of the fit.
68 Estimates of the location and scale parameters of the distribution
69 are given by the intercept and slope. Probability plots can be generated
70 for several competing distributions to see which provides the best fit,
71 and the probability plot generating the highest correlation coefficient
72 is the best choice since it generates the straightest probability plot."
73
74 From "Engineering statistic handbook",
75
76 http://www.itl.nist.gov/div898/handbook/eda/section3/probplot.htm
77
78 Example of a qq-plot of a dataset from N(3, 2) distribution and
79 TMath::Gaus(0, 1) theoretical function. Fitting parameters
80 are estimates of the distribution mean and sigma.
81
82\image html graf_graphqq2.png
83
84References:
85
86http://www.itl.nist.gov/div898/handbook/eda/section3/qqplot.htm
87
88http://www.itl.nist.gov/div898/handbook/eda/section3/probplot.htm
89*/
90
91////////////////////////////////////////////////////////////////////////////////
92/// default constructor
93
95{
96 fF = 0;
97 fY0 = 0;
98 fNy0 = 0;
99 fXq1 = 0.;
100 fXq2 = 0.;
101 fYq1 = 0.;
102 fYq2 = 0.;
103
104}
105
106////////////////////////////////////////////////////////////////////////////////
107/// Creates a quantile-quantile plot of dataset x.
108/// Theoretical distribution function can be defined later by SetFunction method
109
111 : TGraph(n)
112{
113 fNy0 = 0;
114 fXq1 = 0.;
115 fXq2 = 0.;
116 fYq1 = 0.;
117 fYq2 = 0.;
118
119 Int_t *index = new Int_t[n];
120 TMath::Sort(n, x, index, kFALSE);
121 for (Int_t i=0; i<fNpoints; i++)
122 fY[i] = x[index[i]];
123 fF=0;
124 fY0=0;
125 delete [] index;
126}
127
128////////////////////////////////////////////////////////////////////////////////
129/// Creates a quantile-quantile plot of dataset x against function f
130
132 : TGraph(n)
133{
134 fNy0 = 0;
135
136 Int_t *index = new Int_t[n];
137 TMath::Sort(n, x, index, kFALSE);
138 for (Int_t i=0; i<fNpoints; i++)
139 fY[i] = x[index[i]];
140 delete [] index;
141 fF = f;
142 fY0=0;
144}
145
146////////////////////////////////////////////////////////////////////////////////
147/// Creates a quantile-quantile plot of dataset x against dataset y
148/// Parameters nx and ny are respective array sizes
149
151{
152 fNy0 = 0;
153 fXq1 = 0.;
154 fXq2 = 0.;
155 fYq1 = 0.;
156 fYq2 = 0.;
157 fF = 0;
158 fY0 = 0;
159
160 nx<=ny ? fNpoints=nx : fNpoints=ny;
161
162 if (!CtorAllocate()) return;
163
164 Int_t *index = new Int_t[TMath::Max(nx, ny)];
165 TMath::Sort(nx, x, index, kFALSE);
166 if (nx <=ny){
167 for (Int_t i=0; i<fNpoints; i++)
168 fY[i] = x[index[i]];
169 TMath::Sort(ny, y, index, kFALSE);
170 if (nx==ny){
171 for (Int_t i=0; i<fNpoints; i++)
172 fX[i] = y[index[i]];
173 fY0 = 0;
174 Quartiles();
175 } else {
176 fNy0 = ny;
177 fY0 = new Double_t[ny];
178 for (Int_t i=0; i<ny; i++)
179 fY0[i] = y[i];
181 }
182 } else {
183 fNy0 = nx;
184 fY0 = new Double_t[nx];
185 for (Int_t i=0; i<nx; i++)
186 fY0[i] = x[index[i]];
187 TMath::Sort(ny, y, index, kFALSE);
188 for (Int_t i=0; i<ny; i++)
189 fY[i] = y[index[i]];
191 }
192
193
194 delete [] index;
195}
196
197////////////////////////////////////////////////////////////////////////////////
198/// Destroys a TGraphQQ
199
201{
202 if (fY0)
203 delete [] fY0;
204 if (fF)
205 fF = 0;
206}
207
208////////////////////////////////////////////////////////////////////////////////
209/// Computes quantiles of theoretical distribution function
210
212{
213 if (!fF) return;
214 TString s = fF->GetTitle();
215 Double_t pk;
216 if (s.Contains("TMath::Gaus") || s.Contains("gaus")){
217 //use plotting positions optimal for normal distribution
218 for (Int_t k=1; k<=fNpoints; k++){
219 pk = (k-0.375)/(fNpoints+0.25);
220 fX[k-1]=TMath::NormQuantile(pk);
221 }
222 } else {
223 Double_t *prob = new Double_t[fNpoints];
224 if (fNpoints > 10){
225 for (Int_t k=1; k<=fNpoints; k++)
226 prob[k-1] = (k-0.5)/fNpoints;
227 } else {
228 for (Int_t k=1; k<=fNpoints; k++)
229 prob[k-1] = (k-0.375)/(fNpoints+0.25);
230 }
231 //fF->GetQuantiles(fNpoints, prob, fX);
232 fF->GetQuantiles(fNpoints, fX, prob);
233 delete [] prob;
234 }
235
236 Quartiles();
237}
238
239////////////////////////////////////////////////////////////////////////////////
240/// When sample sizes are not equal, computes quantiles of the bigger sample
241/// by linear interpolation
242
244{
245
246
247 if (!fY0) return;
248
249 Double_t pi, pfrac;
250 Int_t pint;
251 for (Int_t i=0; i<fNpoints-1; i++){
252 pi = (fNy0-1)*Double_t(i)/Double_t(fNpoints-1);
253 pint = TMath::FloorNint(pi);
254 pfrac = pi - pint;
255 fX[i] = (1-pfrac)*fY0[pint]+pfrac*fY0[pint+1];
256 }
257 fX[fNpoints-1]=fY0[fNy0-1];
258
259 Quartiles();
260}
261
262////////////////////////////////////////////////////////////////////////////////
263/// compute quartiles
264/// a quartile is a 25 per cent or 75 per cent quantile
265
267{
268 Double_t prob[]={0.25, 0.75};
269 Double_t x[2];
270 Double_t y[2];
271 TMath::Quantiles(fNpoints, 2, fY, y, prob, kTRUE);
272 if (fY0)
273 TMath::Quantiles(fNy0, 2, fY0, x, prob, kTRUE);
274 else if (fF) {
275 TString s = fF->GetTitle();
276 if (s.Contains("TMath::Gaus") || s.Contains("gaus")){
277 x[0] = TMath::NormQuantile(0.25);
278 x[1] = TMath::NormQuantile(0.75);
279 } else
280 fF->GetQuantiles(2, x, prob);
281 }
282 else
283 TMath::Quantiles(fNpoints, 2, fX, x, prob, kTRUE);
284
285 fXq1=x[0]; fXq2=x[1]; fYq1=y[0]; fYq2=y[1];
286}
287
288////////////////////////////////////////////////////////////////////////////////
289///Sets the theoretical distribution function (density!)
290///and computes its quantiles
291
293{
294 fF = f;
296}
#define f(i)
Definition: RSha256.hxx:104
int Int_t
Definition: RtypesCore.h:41
const Bool_t kFALSE
Definition: RtypesCore.h:88
double Double_t
Definition: RtypesCore.h:55
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
1-Dim function class
Definition: TF1.h:211
virtual Int_t GetQuantiles(Int_t nprobSum, Double_t *q, const Double_t *probSum)
Compute Quantiles for density distribution of this function.
Definition: TF1.cxx:1973
This class allows to draw quantile-quantile plots.
Definition: TGraphQQ.h:18
void Quartiles()
compute quartiles a quartile is a 25 per cent or 75 per cent quantile
Definition: TGraphQQ.cxx:266
TGraphQQ()
default constructor
Definition: TGraphQQ.cxx:94
virtual ~TGraphQQ()
Destroys a TGraphQQ.
Definition: TGraphQQ.cxx:200
TF1 * fF
theoretical density function, if specified
Definition: TGraphQQ.h:26
Double_t fYq1
y1 coordinate of the interquartile line
Definition: TGraphQQ.h:23
Double_t * fY0
! second dataset, if specified
Definition: TGraphQQ.h:25
void SetFunction(TF1 *f)
Sets the theoretical distribution function (density!) and computes its quantiles.
Definition: TGraphQQ.cxx:292
Double_t fXq2
x2 coordinate of the interquartile line
Definition: TGraphQQ.h:22
Int_t fNy0
size of the fY0 dataset
Definition: TGraphQQ.h:20
void MakeFunctionQuantiles()
Computes quantiles of theoretical distribution function.
Definition: TGraphQQ.cxx:211
Double_t fXq1
x1 coordinate of the interquartile line
Definition: TGraphQQ.h:21
Double_t fYq2
y2 coordinate of the interquartile line
Definition: TGraphQQ.h:24
void MakeQuantiles()
When sample sizes are not equal, computes quantiles of the bigger sample by linear interpolation.
Definition: TGraphQQ.cxx:243
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
Int_t fNpoints
Number of points <= fMaxSize.
Definition: TGraph.h:46
Double_t * fY
[fNpoints] array of Y points
Definition: TGraph.h:48
Bool_t CtorAllocate()
In constructors set fNpoints than call this method.
Definition: TGraph.cxx:727
Double_t * fX
[fNpoints] array of X points
Definition: TGraph.h:47
virtual const char * GetTitle() const
Returns title of object.
Definition: TNamed.h:48
Basic string class.
Definition: TString.h:131
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
static constexpr double s
static constexpr double pi
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:212
Int_t FloorNint(Double_t x)
Definition: TMath.h:697
Double_t NormQuantile(Double_t p)
Computes quantiles for standard normal distribution N(0, 1) at probability p.
Definition: TMath.cxx:2423
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
Definition: TMathBase.h:362
void Quantiles(Int_t n, Int_t nprob, Double_t *x, Double_t *quantiles, Double_t *prob, Bool_t isSorted=kTRUE, Int_t *index=0, Int_t type=7)
Computes sample quantiles, corresponding to the given probabilities.
Definition: TMath.cxx:1190