ROOT   Reference Guide
Searching...
No Matches
TGraphQQ.cxx
Go to the documentation of this file.
1// @(#)root/graf:$Id$
2// Author: Anna Kreshuk 18/11/2005
3
4/*************************************************************************
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. * 9 * For the list of contributors see$ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#include "TGraphQQ.h"
13#include "TAxis.h"
14#include "TF1.h"
15#include "TMath.h"
16
18
19/** \class TGraphQQ
20\ingroup BasicGraphics
21
22This class allows to draw quantile-quantile plots
23
24Plots can be drawn for 2 datasets or for a dataset and a theoretical
25distribution function
26
27## 2 datasets:
28 Quantile-quantile plots are used to determine whether 2 samples come from
29 the same distribution.
30 A qq-plot draws the quantiles of one dataset against the quantile of the
31 the other. The quantiles of the dataset with fewer entries are on Y axis,
32 with more entries - on X axis.
33 A straight line, going through 0.25 and 0.75 quantiles is also plotted
34 for reference. It represents a robust linear fit, not sensitive to the
35 extremes of the datasets.
36 If the datasets come from the same distribution, points of the plot should
37 fall approximately on the 45 degrees line. If they have the same
38 distribution function, but location or scale different parameters,
39 they should still fall on the straight line, but not the 45 degrees one.
40 The greater their departure from the straight line, the more evidence there
41 is, that the datasets come from different distributions.
42 The advantage of qq-plot is that it not only shows that the underlying
43 distributions are different, but, unlike the analytical methods, it also
44 gives information on the nature of this difference: heavier tails,
45 different location/scale, different shape, etc.
46
47 Some examples of qqplots of 2 datasets:
48
49\image html graf_graphqq1.png
50
51## 1 dataset:
52 Quantile-quantile plots are used to determine if the dataset comes from the
53 specified theoretical distribution, such as normal.
54 A qq-plot draws quantiles of the dataset against quantiles of the specified
55 theoretical distribution.
56 (NOTE, that density, not CDF should be specified)
57 A straight line, going through 0.25 and 0.75 quantiles can also be plotted
58 for reference. It represents a robust linear fit, not sensitive to the
59 extremes of the dataset.
60 As in the 2 datasets case, departures from straight line indicate departures
61 from the specified distribution.
62
63 "The correlation coefficient associated with the linear fit to the data
64 in the probability plot (qq plot in our case) is a measure of the
65 goodness of the fit.
66 Estimates of the location and scale parameters of the distribution
67 are given by the intercept and slope. Probability plots can be generated
68 for several competing distributions to see which provides the best fit,
69 and the probability plot generating the highest correlation coefficient
70 is the best choice since it generates the straightest probability plot."
71
72 From "Engineering statistic handbook",
73
74 http://www.itl.nist.gov/div898/handbook/eda/section3/probplot.htm
75
76 Example of a qq-plot of a dataset from N(3, 2) distribution and
77 TMath::Gaus(0, 1) theoretical function. Fitting parameters
78 are estimates of the distribution mean and sigma.
79
80\image html graf_graphqq2.png
81
82References:
83
84http://www.itl.nist.gov/div898/handbook/eda/section3/qqplot.htm
85
86http://www.itl.nist.gov/div898/handbook/eda/section3/probplot.htm
87*/
88
89////////////////////////////////////////////////////////////////////////////////
90/// default constructor
91
93{
94}
95
96////////////////////////////////////////////////////////////////////////////////
97/// Creates a quantile-quantile plot of dataset x.
98/// Theoretical distribution function can be defined later by SetFunction method
99
101 : TGraph(n)
102{
103 Int_t *index = new Int_t[n];
105 for (Int_t i=0; i<fNpoints; i++)
106 fY[i] = x[index[i]];
107 delete [] index;
108}
109
110////////////////////////////////////////////////////////////////////////////////
111/// Creates a quantile-quantile plot of dataset x against function f
112
114 : TGraph(n)
115{
116 fNy0 = 0;
117
118 Int_t *index = new Int_t[n];
120 for (Int_t i=0; i<fNpoints; i++)
121 fY[i] = x[index[i]];
122 delete [] index;
123 fF = f;
125}
126
127////////////////////////////////////////////////////////////////////////////////
128/// Creates a quantile-quantile plot of dataset x against dataset y
129/// Parameters nx and ny are respective array sizes
130
132{
133 fNpoints = (nx <= ny) ? nx : ny;
134
135 if (!CtorAllocate()) return;
136
137 Int_t *index = new Int_t[TMath::Max(nx, ny)];
138 TMath::Sort(nx, x, index, kFALSE);
139 if (nx <=ny){
140 for (Int_t i=0; i<fNpoints; i++)
141 fY[i] = x[index[i]];
142 TMath::Sort(ny, y, index, kFALSE);
143 if (nx==ny){
144 for (Int_t i=0; i<fNpoints; i++)
145 fX[i] = y[index[i]];
146 fY0 = nullptr;
147 Quartiles();
148 } else {
149 fNy0 = ny;
150 fY0 = new Double_t[ny];
151 for (Int_t i=0; i<ny; i++)
152 fY0[i] = y[i];
154 }
155 } else {
156 fNy0 = nx;
157 fY0 = new Double_t[nx];
158 for (Int_t i=0; i<nx; i++)
159 fY0[i] = x[index[i]];
160 TMath::Sort(ny, y, index, kFALSE);
161 for (Int_t i=0; i<ny; i++)
162 fY[i] = y[index[i]];
164 }
165
166 delete [] index;
167}
168
169////////////////////////////////////////////////////////////////////////////////
170/// Destroys a TGraphQQ
171
173{
174 if (fY0)
175 delete [] fY0;
176 if (fF)
177 fF = nullptr;
178}
179
180////////////////////////////////////////////////////////////////////////////////
181/// Computes quantiles of theoretical distribution function
182
184{
185 if (!fF) return;
186 TString s = fF->GetTitle();
187 Double_t pk;
188 if (s.Contains("TMath::Gaus") || s.Contains("gaus")){
189 //use plotting positions optimal for normal distribution
190 for (Int_t k=1; k<=fNpoints; k++){
191 pk = (k-0.375)/(fNpoints+0.25);
192 fX[k-1]=TMath::NormQuantile(pk);
193 }
194 } else {
195 Double_t *prob = new Double_t[fNpoints];
196 if (fNpoints > 10){
197 for (Int_t k=1; k<=fNpoints; k++)
198 prob[k-1] = (k-0.5)/fNpoints;
199 } else {
200 for (Int_t k=1; k<=fNpoints; k++)
201 prob[k-1] = (k-0.375)/(fNpoints+0.25);
202 }
203 //fF->GetQuantiles(fNpoints, prob, fX);
204 fF->GetQuantiles(fNpoints, fX, prob);
205 delete [] prob;
206 }
207
208 Quartiles();
209}
210
211////////////////////////////////////////////////////////////////////////////////
212/// When sample sizes are not equal, computes quantiles of the bigger sample
213/// by linear interpolation
214
216{
217 if (!fY0) return;
218
219 Double_t pi, pfrac;
220 Int_t pint;
221 for (Int_t i=0; i<fNpoints-1; i++){
222 pi = (fNy0-1)*Double_t(i)/Double_t(fNpoints-1);
223 pint = TMath::FloorNint(pi);
224 pfrac = pi - pint;
225 fX[i] = (1-pfrac)*fY0[pint]+pfrac*fY0[pint+1];
226 }
227 fX[fNpoints-1]=fY0[fNy0-1];
228
229 Quartiles();
230}
231
232////////////////////////////////////////////////////////////////////////////////
233/// compute quartiles
234/// a quartile is a 25 per cent or 75 per cent quantile
235
237{
238 Double_t prob[]={0.25, 0.75};
239 Double_t x[2];
240 Double_t y[2];
241 TMath::Quantiles(fNpoints, 2, fY, y, prob, kTRUE);
242 if (fY0)
243 TMath::Quantiles(fNy0, 2, fY0, x, prob, kTRUE);
244 else if (fF) {
245 TString s = fF->GetTitle();
246 if (s.Contains("TMath::Gaus") || s.Contains("gaus")){
247 x[0] = TMath::NormQuantile(0.25);
248 x[1] = TMath::NormQuantile(0.75);
249 } else
250 fF->GetQuantiles(2, x, prob);
251 }
252 else
253 TMath::Quantiles(fNpoints, 2, fX, x, prob, kTRUE);
254
255 fXq1=x[0]; fXq2=x[1]; fYq1=y[0]; fYq2=y[1];
256}
257
258////////////////////////////////////////////////////////////////////////////////
259///Sets the theoretical distribution function (density!)
260///and computes its quantiles
261
263{
264 fF = f;
266}
#define f(i)
Definition RSha256.hxx:104
constexpr Bool_t kFALSE
Definition RtypesCore.h:94
double Double_t
Definition RtypesCore.h:59
constexpr Bool_t kTRUE
Definition RtypesCore.h:93
#define ClassImp(name)
Definition Rtypes.h:377
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
1-Dim function class
Definition TF1.h:233
virtual Int_t GetQuantiles(Int_t nprobSum, Double_t *q, const Double_t *probSum)
Compute Quantiles for density distribution of this function.
Definition TF1.cxx:1994
This class allows to draw quantile-quantile plots.
Definition TGraphQQ.h:18
void Quartiles()
compute quartiles a quartile is a 25 per cent or 75 per cent quantile
Definition TGraphQQ.cxx:236
TGraphQQ()
default constructor
Definition TGraphQQ.cxx:92
TF1 * fF
theoretical density function, if specified
Definition TGraphQQ.h:26
Double_t fYq1
y1 coordinate of the interquartile line
Definition TGraphQQ.h:23
Double_t * fY0
! second dataset, if specified
Definition TGraphQQ.h:25
void SetFunction(TF1 *f)
Sets the theoretical distribution function (density!) and computes its quantiles.
Definition TGraphQQ.cxx:262
~TGraphQQ() override
Destroys a TGraphQQ.
Definition TGraphQQ.cxx:172
Double_t fXq2
x2 coordinate of the interquartile line
Definition TGraphQQ.h:22
Int_t fNy0
size of the fY0 dataset
Definition TGraphQQ.h:20
void MakeFunctionQuantiles()
Computes quantiles of theoretical distribution function.
Definition TGraphQQ.cxx:183
Double_t fXq1
x1 coordinate of the interquartile line
Definition TGraphQQ.h:21
Double_t fYq2
y2 coordinate of the interquartile line
Definition TGraphQQ.h:24
void MakeQuantiles()
When sample sizes are not equal, computes quantiles of the bigger sample by linear interpolation.
Definition TGraphQQ.cxx:215
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
Int_t fNpoints
Number of points <= fMaxSize.
Definition TGraph.h:46
Double_t * fY
[fNpoints] array of Y points
Definition TGraph.h:48
Bool_t CtorAllocate()
In constructors set fNpoints than call this method.
Definition TGraph.cxx:805
Double_t * fX
[fNpoints] array of X points
Definition TGraph.h:47
const char * GetTitle() const override
Returns title of object.
Definition TNamed.h:48
Basic string class.
Definition TString.h:139
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition TString.h:632
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Definition TMathBase.h:250
Int_t FloorNint(Double_t x)
Returns the nearest integer of TMath::Floor(x).
Definition TMath.h:686
Double_t NormQuantile(Double_t p)
Computes quantiles for standard normal distribution N(0, 1) at probability p.
Definition TMath.cxx:2456
void Quantiles(Int_t n, Int_t nprob, Double_t *x, Double_t *quantiles, Double_t *prob, Bool_t isSorted=kTRUE, Int_t *index=nullptr, Int_t type=7)
Computes sample quantiles, corresponding to the given probabilities.
Definition TMath.cxx:1207
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
Sort the n elements of the array a of generic templated type Element.
Definition TMathBase.h:431