Logo ROOT   6.10/09
Reference Guide
TRobustEstimator.h
Go to the documentation of this file.
1 // @(#)root/physics:$Id$
2 // Author: Anna Kreshuk 08/10/2004
3 
4 
5 //////////////////////////////////////////////////////////////////////////////
6 //
7 // TRobustEstimator
8 //
9 // Minimum Covariance Determinant Estimator - a Fast Algorithm
10 // invented by Peter J.Rousseeuw and Katrien Van Dreissen
11 // "A Fast Algorithm for the Minimum covariance Determinant Estimator"
12 // Technometrics, August 1999, Vol.41, NO.3
13 //
14 //////////////////////////////////////////////////////////////////////////////
15 
16 #ifndef ROOT_TRobustEstimator
17 #define ROOT_TRobustEstimator
18 
19 #include "TArrayI.h"
20 #include "TMatrixDSym.h"
21 #include "TVectorD.h"
22 
23 class TRobustEstimator : public TObject {
24 
25 protected:
26 
27  Int_t fNvar; //number of variables
28  Int_t fH; //algorithm parameter, determining the subsample size
29  Int_t fN; //number of observations
30 
31  Int_t fVarTemp; //number of variables already added to the data matrix
32  Int_t fVecTemp; //number of observations already added to the data matrix
33 
34  Int_t fExact; //if there was an exact fit, stores the number of points on a hyperplane
35 
36  TVectorD fMean; //location estimate (mean values)
37  TMatrixDSym fCovariance; //covariance matrix estimate
38  TMatrixDSym fInvcovariance; //inverse of the covariance matrix
39  TMatrixDSym fCorrelation; //correlation matrix
40  TVectorD fRd; //array of robust distances, size n
41  TVectorD fSd; //array of standard deviations
42  TArrayI fOut; //array of indexes of ouliers, size <0.5*n
43  TVectorD fHyperplane; //in case more than fH observations lie on a hyperplane
44  //the equation of this hyperplane is stored here
45 
46  TMatrixD fData; //the original data
47 
48  //functions needed for evaluation
49 
50  void AddToSscp(TMatrixD &sscp, TVectorD &vec);
51  void ClearSscp(TMatrixD &sscp);
52 
53  void Classic();
54  void Covar(TMatrixD &sscp, TVectorD &m, TMatrixDSym &cov, TVectorD &sd, Int_t nvec);
55  void Correl();
56 
57  void CreateSubset(Int_t ntotal, Int_t htotal, Int_t p, Int_t *index, TMatrixD &data,
58  TMatrixD &sscp, Double_t *ndist);
59  void CreateOrtSubset(TMatrixD &dat, Int_t *index, Int_t hmerged, Int_t nmerged, TMatrixD &sscp, Double_t *ndist);
60 
61  Double_t CStep(Int_t ntotal, Int_t htotal, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist);
62 
63  Int_t Exact(Double_t *ndist);
64  Int_t Exact2(TMatrixD &mstockbig, TMatrixD &cstockbig, TMatrixD &hyperplane,
65  Double_t *deti, Int_t nbest,Int_t kgroup,
66  TMatrixD &sscp, Double_t *ndist);
67 
68  Int_t Partition(Int_t nmini, Int_t *indsubdat);
69  Int_t RDist(TMatrixD &sscp);
70  void RDraw(Int_t *subdat, Int_t ngroup, Int_t *indsubdat);
71 
72  Double_t KOrdStat(Int_t ntotal, Double_t *arr, Int_t k, Int_t *work);
73 
74 public:
75 
77  TRobustEstimator(Int_t nvectors, Int_t nvariables, Int_t hh=0);
78  virtual ~TRobustEstimator(){;}
79 
80  void AddColumn(Double_t *col); //adds a column to the data matrix
81  void AddRow(Double_t *row); //adds a row to the data matrix
82 
83  void Evaluate();
84  void EvaluateUni(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh=0);
85 
86  Int_t GetBDPoint(); //returns the breakdown point of the algorithm
87 
88  /// returns a reference to the data matrix
89  const TMatrixD & GetData() { return fData; }
90 
91  void GetCovariance(TMatrixDSym &matr); //returns robust covariance matrix estimate
92  const TMatrixDSym* GetCovariance() const{return &fCovariance;}
93  void GetCorrelation(TMatrixDSym &matr); //returns robust correlation matrix estimate
94  const TMatrixDSym* GetCorrelation() const{return &fCorrelation;}
95  void GetHyperplane(TVectorD &vec); //if the data lies on a hyperplane, returns this hyperplane
96  const TVectorD* GetHyperplane() const; //if the data lies on a hyperplane, returns this hyperplane
97  Int_t GetNHyp() {return fExact;} //returns the number of points on a hyperplane
98  void GetMean(TVectorD &means); //returns robust mean vector estimate
99  const TVectorD* GetMean() const {return &fMean;} //returns robust mean vector estimate
100  void GetRDistances(TVectorD &rdist); //returns robust distances of all observations
101  const TVectorD* GetRDistances() const {return &fRd;} //returns robust distances of all observations
102  Int_t GetNumberObservations() const {return fN;}
103  Int_t GetNvar() const {return fNvar;}
104  const TArrayI* GetOuliers() const{return &fOut;} //returns an array of outlier indexes
105  Int_t GetNOut(); //returns the number of points outside the tolerance ellipsoid.
106  //ONLY those with robust distances significantly larger than the
107  //cutoff value, should be considered outliers!
108  Double_t GetChiQuant(Int_t i) const;
109 
110  ClassDef(TRobustEstimator,1) //Minimum Covariance Determinant Estimator
111 
112 };
113 
114 
115 #endif
116 
void Classic()
called when h=n.
Int_t GetNOut()
returns the number of outliers
void CreateOrtSubset(TMatrixD &dat, Int_t *index, Int_t hmerged, Int_t nmerged, TMatrixD &sscp, Double_t *ndist)
creates a subset of hmerged vectors with smallest orthogonal distances to the hyperplane hyp[1]*(x1-m...
void CreateSubset(Int_t ntotal, Int_t htotal, Int_t p, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist)
creates a subset of htotal elements from ntotal elements first, p+1 elements are drawn randomly(witho...
virtual ~TRobustEstimator()
Int_t GetNumberObservations() const
void AddColumn(Double_t *col)
adds a column to the data matrix it is assumed that the column has size fN variable fVarTemp keeps th...
void Evaluate()
Finds the estimate of multivariate mean and variance.
Int_t Exact2(TMatrixD &mstockbig, TMatrixD &cstockbig, TMatrixD &hyperplane, Double_t *deti, Int_t nbest, Int_t kgroup, TMatrixD &sscp, Double_t *ndist)
This function is called if determinant of the covariance matrix of a subset=0.
void ClearSscp(TMatrixD &sscp)
clear the sscp matrix, used for covariance and mean calculation
Double_t CStep(Int_t ntotal, Int_t htotal, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist)
from the input htotal-subset constructs another htotal subset with lower determinant ...
int Int_t
Definition: RtypesCore.h:41
Double_t KOrdStat(Int_t ntotal, Double_t *arr, Int_t k, Int_t *work)
because I need an Int_t work array
void AddRow(Double_t *row)
adds a vector to the data matrix it is supposed that the vector is of size fNvar
const TMatrixDSym * GetCovariance() const
Array of integers (32 bits per element).
Definition: TArrayI.h:27
const TVectorD * GetMean() const
TMatrixDSym fInvcovariance
#define ClassDef(name, id)
Definition: Rtypes.h:297
Minimum Covariance Determinant Estimator - a Fast Algorithm invented by Peter J.Rousseeuw and Katrien...
Double_t GetChiQuant(Int_t i) const
returns the chi2 quantiles
const Double_t sigma
void RDraw(Int_t *subdat, Int_t ngroup, Int_t *indsubdat)
Draws ngroup nonoverlapping subdatasets out of a dataset of size n such that the selected case number...
TMarker * m
Definition: textangle.C:8
void AddToSscp(TMatrixD &sscp, TVectorD &vec)
update the sscp matrix with vector vec
const TVectorD * GetRDistances() const
Int_t Exact(Double_t *ndist)
for the exact fit situations returns number of observations on the hyperplane
Int_t Partition(Int_t nmini, Int_t *indsubdat)
divides the elements into approximately equal subgroups number of elements in each subgroup is stored...
TMatrixDSym fCovariance
const TMatrixD & GetData()
returns a reference to the data matrix
const TVectorD * GetHyperplane() const
if the points are on a hyperplane, returns this hyperplane
const TMatrixDSym * GetCorrelation() const
double Double_t
Definition: RtypesCore.h:55
Int_t RDist(TMatrixD &sscp)
Calculates robust distances.Then the samples with robust distances greater than a cutoff value (0...
void EvaluateUni(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh=0)
for the univariate case estimates of location and scatter are returned in mean and sigma parameters t...
Int_t GetBDPoint()
returns the breakdown point of the algorithm
Mother of all ROOT objects.
Definition: TObject.h:37
void Correl()
transforms covariance matrix into correlation matrix
TMatrixDSym fCorrelation
Int_t GetNvar() const
void Covar(TMatrixD &sscp, TVectorD &m, TMatrixDSym &cov, TVectorD &sd, Int_t nvec)
calculates mean and covariance
TRobustEstimator()
this constructor should be used in a univariate case: first call this constructor, then - the EvaluateUni(..) function
const TArrayI * GetOuliers() const