Logo ROOT  
Reference Guide
TRobustEstimator.h
Go to the documentation of this file.
1 // @(#)root/physics:$Id$
2 // Author: Anna Kreshuk 08/10/2004
3 
4 
5 //////////////////////////////////////////////////////////////////////////////
6 //
7 // TRobustEstimator
8 //
9 // Minimum Covariance Determinant Estimator - a Fast Algorithm
10 // invented by Peter J.Rousseeuw and Katrien Van Dreissen
11 // "A Fast Algorithm for the Minimum covariance Determinant Estimator"
12 // Technometrics, August 1999, Vol.41, NO.3
13 //
14 //////////////////////////////////////////////////////////////////////////////
15 
16 #ifndef ROOT_TRobustEstimator
17 #define ROOT_TRobustEstimator
18 
19 #include "TArrayI.h"
20 #include "TMatrixDSym.h"
21 #include "TVectorD.h"
22 
23 class TRobustEstimator : public TObject {
24 
25 protected:
26 
27  Int_t fNvar; //number of variables
28  Int_t fH; //algorithm parameter, determining the subsample size
29  Int_t fN; //number of observations
30 
31  Int_t fVarTemp; //number of variables already added to the data matrix
32  Int_t fVecTemp; //number of observations already added to the data matrix
33 
34  Int_t fExact; //if there was an exact fit, stores the number of points on a hyperplane
35 
36  TVectorD fMean; //location estimate (mean values)
37  TMatrixDSym fCovariance; //covariance matrix estimate
38  TMatrixDSym fInvcovariance; //inverse of the covariance matrix
39  TMatrixDSym fCorrelation; //correlation matrix
40  TVectorD fRd; //array of robust distances, size n
41  TVectorD fSd; //array of standard deviations
42  TArrayI fOut; //array of indexes of ouliers, size <0.5*n
43  TVectorD fHyperplane; //in case more than fH observations lie on a hyperplane
44  //the equation of this hyperplane is stored here
45 
46  TMatrixD fData; //the original data
47 
48  //functions needed for evaluation
49 
50  void AddToSscp(TMatrixD &sscp, TVectorD &vec);
51  void ClearSscp(TMatrixD &sscp);
52 
53  void Classic();
54  void Covar(TMatrixD &sscp, TVectorD &m, TMatrixDSym &cov, TVectorD &sd, Int_t nvec);
55  void Correl();
56 
57  void CreateSubset(Int_t ntotal, Int_t htotal, Int_t p, Int_t *index, TMatrixD &data,
58  TMatrixD &sscp, Double_t *ndist);
59  void CreateOrtSubset(TMatrixD &dat, Int_t *index, Int_t hmerged, Int_t nmerged, TMatrixD &sscp, Double_t *ndist);
60 
61  Double_t CStep(Int_t ntotal, Int_t htotal, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist);
62 
63  Int_t Exact(Double_t *ndist);
64  Int_t Exact2(TMatrixD &mstockbig, TMatrixD &cstockbig, TMatrixD &hyperplane,
65  Double_t *deti, Int_t nbest,Int_t kgroup,
66  TMatrixD &sscp, Double_t *ndist);
67 
68  Int_t Partition(Int_t nmini, Int_t *indsubdat);
69  Int_t RDist(TMatrixD &sscp);
70  void RDraw(Int_t *subdat, Int_t ngroup, Int_t *indsubdat);
71 
72  Double_t KOrdStat(Int_t ntotal, Double_t *arr, Int_t k, Int_t *work);
73 
74 public:
75 
77  TRobustEstimator(Int_t nvectors, Int_t nvariables, Int_t hh=0);
78  virtual ~TRobustEstimator(){;}
79 
80  void AddColumn(Double_t *col); //adds a column to the data matrix
81  void AddRow(Double_t *row); //adds a row to the data matrix
82 
83  void Evaluate();
84  void EvaluateUni(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh=0);
85 
86  Int_t GetBDPoint(); //returns the breakdown point of the algorithm
87 
88  /// returns a reference to the data matrix
89  const TMatrixD & GetData() { return fData; }
90 
91  void GetCovariance(TMatrixDSym &matr); //returns robust covariance matrix estimate
92  const TMatrixDSym* GetCovariance() const{return &fCovariance;}
93  void GetCorrelation(TMatrixDSym &matr); //returns robust correlation matrix estimate
94  const TMatrixDSym* GetCorrelation() const{return &fCorrelation;}
95  void GetHyperplane(TVectorD &vec); //if the data lies on a hyperplane, returns this hyperplane
96  const TVectorD* GetHyperplane() const; //if the data lies on a hyperplane, returns this hyperplane
97  Int_t GetNHyp() {return fExact;} //returns the number of points on a hyperplane
98  void GetMean(TVectorD &means); //returns robust mean vector estimate
99  const TVectorD* GetMean() const {return &fMean;} //returns robust mean vector estimate
100  void GetRDistances(TVectorD &rdist); //returns robust distances of all observations
101  const TVectorD* GetRDistances() const {return &fRd;} //returns robust distances of all observations
102  Int_t GetNumberObservations() const {return fN;}
103  Int_t GetNvar() const {return fNvar;}
104  const TArrayI* GetOuliers() const{return &fOut;} //returns an array of outlier indexes
105  Int_t GetNOut(); //returns the number of points outside the tolerance ellipsoid.
106  //ONLY those with robust distances significantly larger than the
107  //cutoff value, should be considered outliers!
108  Double_t GetChiQuant(Int_t i) const;
109 
110  ClassDef(TRobustEstimator,1) //Minimum Covariance Determinant Estimator
111 
112 };
113 
114 
115 #endif
116 
TRobustEstimator::GetData
const TMatrixD & GetData()
returns a reference to the data matrix
Definition: TRobustEstimator.h:89
m
auto * m
Definition: textangle.C:8
TRobustEstimator::fSd
TVectorD fSd
Definition: TRobustEstimator.h:41
TRobustEstimator::AddToSscp
void AddToSscp(TMatrixD &sscp, TVectorD &vec)
update the sscp matrix with vector vec
Definition: TRobustEstimator.cxx:778
TRobustEstimator::fExact
Int_t fExact
Definition: TRobustEstimator.h:34
TRobustEstimator::fVarTemp
Int_t fVarTemp
Definition: TRobustEstimator.h:31
TVectorD.h
TRobustEstimator::GetRDistances
const TVectorD * GetRDistances() const
Definition: TRobustEstimator.h:101
TRobustEstimator::fH
Int_t fH
Definition: TRobustEstimator.h:28
TRobustEstimator::GetHyperplane
const TVectorD * GetHyperplane() const
if the points are on a hyperplane, returns this hyperplane
Definition: TRobustEstimator.cxx:717
TRobustEstimator::GetBDPoint
Int_t GetBDPoint()
returns the breakdown point of the algorithm
Definition: TRobustEstimator.cxx:674
TRobustEstimator::fVecTemp
Int_t fVecTemp
Definition: TRobustEstimator.h:32
TRobustEstimator::fRd
TVectorD fRd
Definition: TRobustEstimator.h:40
TRobustEstimator::GetNvar
Int_t GetNvar() const
Definition: TRobustEstimator.h:103
Int_t
int Int_t
Definition: RtypesCore.h:45
TRobustEstimator::GetCorrelation
const TMatrixDSym * GetCorrelation() const
Definition: TRobustEstimator.h:94
TRobustEstimator::Exact
Int_t Exact(Double_t *ndist)
for the exact fit situations returns number of observations on the hyperplane
Definition: TRobustEstimator.cxx:1036
TRobustEstimator::fCorrelation
TMatrixDSym fCorrelation
Definition: TRobustEstimator.h:39
TRobustEstimator::fCovariance
TMatrixDSym fCovariance
Definition: TRobustEstimator.h:37
TMatrixTSym< Double_t >
TRobustEstimator::Covar
void Covar(TMatrixD &sscp, TVectorD &m, TMatrixDSym &cov, TVectorD &sd, Int_t nvec)
calculates mean and covariance
Definition: TRobustEstimator.cxx:826
TMatrixT< Double_t >
TRobustEstimator::GetNOut
Int_t GetNOut()
returns the number of outliers
Definition: TRobustEstimator.cxx:770
TMatrixDSym.h
TRobustEstimator::EvaluateUni
void EvaluateUni(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh=0)
for the univariate case estimates of location and scatter are returned in mean and sigma parameters t...
Definition: TRobustEstimator.cxx:608
TRobustEstimator::RDist
Int_t RDist(TMatrixD &sscp)
Calculates robust distances.Then the samples with robust distances greater than a cutoff value (0....
Definition: TRobustEstimator.cxx:1172
TRobustEstimator::fMean
TVectorD fMean
Definition: TRobustEstimator.h:36
TRobustEstimator::Correl
void Correl()
transforms covariance matrix into correlation matrix
Definition: TRobustEstimator.cxx:849
TRobustEstimator::~TRobustEstimator
virtual ~TRobustEstimator()
Definition: TRobustEstimator.h:78
TRobustEstimator::AddColumn
void AddColumn(Double_t *col)
adds a column to the data matrix it is assumed that the column has size fN variable fVarTemp keeps th...
Definition: TRobustEstimator.cxx:170
TRobustEstimator::GetOuliers
const TArrayI * GetOuliers() const
Definition: TRobustEstimator.h:104
TRobustEstimator::TRobustEstimator
TRobustEstimator()
this constructor should be used in a univariate case: first call this constructor,...
Definition: TRobustEstimator.cxx:124
TRobustEstimator::fOut
TArrayI fOut
Definition: TRobustEstimator.h:42
TRobustEstimator::Classic
void Classic()
called when h=n.
Definition: TRobustEstimator.cxx:808
TRobustEstimator::fN
Int_t fN
Definition: TRobustEstimator.h:29
TArrayI.h
TRobustEstimator::CreateOrtSubset
void CreateOrtSubset(TMatrixD &dat, Int_t *index, Int_t hmerged, Int_t nmerged, TMatrixD &sscp, Double_t *ndist)
creates a subset of hmerged vectors with smallest orthogonal distances to the hyperplane hyp[1]*(x1-m...
Definition: TRobustEstimator.cxx:967
TRobustEstimator::CStep
Double_t CStep(Int_t ntotal, Int_t htotal, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist)
from the input htotal-subset constructs another htotal subset with lower determinant
Definition: TRobustEstimator.cxx:999
TRobustEstimator::Exact2
Int_t Exact2(TMatrixD &mstockbig, TMatrixD &cstockbig, TMatrixD &hyperplane, Double_t *deti, Int_t nbest, Int_t kgroup, TMatrixD &sscp, Double_t *ndist)
This function is called if determinant of the covariance matrix of a subset=0.
Definition: TRobustEstimator.cxx:1071
TRobustEstimator::GetNumberObservations
Int_t GetNumberObservations() const
Definition: TRobustEstimator.h:102
TRobustEstimator::fData
TMatrixD fData
Definition: TRobustEstimator.h:46
TRobustEstimator::Evaluate
void Evaluate()
Finds the estimate of multivariate mean and variance.
Definition: TRobustEstimator.cxx:208
sigma
const Double_t sigma
Definition: h1analysisProxy.h:11
TVectorT< Double_t >
TRobustEstimator::CreateSubset
void CreateSubset(Int_t ntotal, Int_t htotal, Int_t p, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist)
creates a subset of htotal elements from ntotal elements first, p+1 elements are drawn randomly(witho...
Definition: TRobustEstimator.cxx:877
Double_t
double Double_t
Definition: RtypesCore.h:59
TRobustEstimator::fHyperplane
TVectorD fHyperplane
Definition: TRobustEstimator.h:43
TRobustEstimator::ClearSscp
void ClearSscp(TMatrixD &sscp)
clear the sscp matrix, used for covariance and mean calculation
Definition: TRobustEstimator.cxx:795
TObject
Mother of all ROOT objects.
Definition: TObject.h:37
ClassDef
#define ClassDef(name, id)
Definition: Rtypes.h:325
TRobustEstimator::fInvcovariance
TMatrixDSym fInvcovariance
Definition: TRobustEstimator.h:38
TRobustEstimator::GetChiQuant
Double_t GetChiQuant(Int_t i) const
returns the chi2 quantiles
Definition: TRobustEstimator.cxx:684
TRobustEstimator::GetMean
const TVectorD * GetMean() const
Definition: TRobustEstimator.h:99
TRobustEstimator
Minimum Covariance Determinant Estimator - a Fast Algorithm invented by Peter J.Rousseeuw and Katrien...
Definition: TRobustEstimator.h:23
TRobustEstimator::fNvar
Int_t fNvar
Definition: TRobustEstimator.h:27
TRobustEstimator::KOrdStat
Double_t KOrdStat(Int_t ntotal, Double_t *arr, Int_t k, Int_t *work)
because I need an Int_t work array
Definition: TRobustEstimator.cxx:1267
TRobustEstimator::GetCovariance
const TMatrixDSym * GetCovariance() const
Definition: TRobustEstimator.h:92
TRobustEstimator::AddRow
void AddRow(Double_t *row)
adds a vector to the data matrix it is supposed that the vector is of size fNvar
Definition: TRobustEstimator.cxx:191
TRobustEstimator::Partition
Int_t Partition(Int_t nmini, Int_t *indsubdat)
divides the elements into approximately equal subgroups number of elements in each subgroup is stored...
Definition: TRobustEstimator.cxx:1118
TRobustEstimator::GetNHyp
Int_t GetNHyp()
Definition: TRobustEstimator.h:97
TArrayI
Array of integers (32 bits per element).
Definition: TArrayI.h:27
int
TRobustEstimator::RDraw
void RDraw(Int_t *subdat, Int_t ngroup, Int_t *indsubdat)
Draws ngroup nonoverlapping subdatasets out of a dataset of size n such that the selected case number...
Definition: TRobustEstimator.cxx:1235