ROOT logo
// @(#)root/physics:$Id: TRobustEstimator.h 22727 2008-03-19 09:54:16Z pcanal $
// Author: Anna Kreshuk  08/10/2004


//////////////////////////////////////////////////////////////////////////////
//
//  TRobustEstimator
//
// Minimum Covariance Determinant Estimator - a Fast Algorithm
// invented by Peter J.Rousseeuw and Katrien Van Dreissen
// "A Fast Algorithm for the Minimum covariance Determinant Estimator"
// Technometrics, August 1999, Vol.41, NO.3
//
//////////////////////////////////////////////////////////////////////////////

#ifndef ROOT_TRobustEstimator
#define ROOT_TRobustEstimator

#include "TArrayI.h"
#include "TMatrixDSym.h"
#include "TMatrixDSymEigen.h"

class TRobustEstimator : public TObject {

protected:

   Int_t        fNvar;          //number of variables
   Int_t        fH;             //algorithm parameter, determining the subsample size
   Int_t        fN;             //number of observations

   Int_t        fVarTemp;       //number of variables already added to the data matrix
   Int_t        fVecTemp;       //number of observations already added to the data matrix

   Int_t        fExact;         //if there was an exact fit, stores the number of points on a hyperplane 

   TVectorD     fMean;          //location estimate (mean values)
   TMatrixDSym  fCovariance;    //covariance matrix estimate
   TMatrixDSym  fInvcovariance; //inverse of the covariance matrix
   TMatrixDSym  fCorrelation;   //correlation matrix
   TVectorD     fRd;            //array of robust distances, size n
   TVectorD     fSd;            //array of standard deviations
   TArrayI      fOut;           //array of indexes of ouliers, size <0.5*n
   TVectorD     fHyperplane;    //in case more than fH observations lie on a hyperplane
                               //the equation of this hyperplane is stored here
 
   TMatrixD fData;              //the original data

   //functions needed for evaluation

   void     AddToSscp(TMatrixD &sscp, TVectorD &vec);
   void     ClearSscp(TMatrixD &sscp); 

   void     Classic();
   void     Covar(TMatrixD &sscp, TVectorD &m, TMatrixDSym &cov, TVectorD &sd, Int_t nvec); 
   void     Correl();

   void     CreateSubset(Int_t ntotal, Int_t htotal, Int_t p, Int_t *index, TMatrixD &data, 
                    TMatrixD &sscp, Double_t *ndist);
   void     CreateOrtSubset(TMatrixD &dat, Int_t *index, Int_t hmerged, Int_t nmerged, TMatrixD &sscp, Double_t *ndist);

   Double_t CStep(Int_t ntotal, Int_t htotal, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist);

   Int_t    Exact(Double_t *ndist); 
   Int_t    Exact2(TMatrixD &mstockbig, TMatrixD &cstockbig, TMatrixD &hyperplane,
               Double_t *deti, Int_t nbest,Int_t kgroup, 
               TMatrixD &sscp, Double_t *ndist);

   Int_t    Partition(Int_t nmini, Int_t *indsubdat); 
   Int_t    RDist(TMatrixD &sscp);
   void     RDraw(Int_t *subdat, Int_t ngroup, Int_t *indsubdat);

   Double_t KOrdStat(Int_t ntotal, Double_t *arr, Int_t k, Int_t *work);

public:

   TRobustEstimator();
   TRobustEstimator(Int_t nvectors, Int_t nvariables, Int_t hh=0);
   virtual ~TRobustEstimator(){;}

   void    AddColumn(Double_t *col);         //adds a column to the data matrix
   void    AddRow(Double_t *row);            //adds a row to the data matrix

   void    Evaluate();
   void    EvaluateUni(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh=0);

   Int_t   GetBDPoint();                     //returns the breakdown point of the algorithm

   void    GetCovariance(TMatrixDSym &matr); //returns robust covariance matrix estimate
   const   TMatrixDSym* GetCovariance() const{return &fCovariance;}
   void    GetCorrelation(TMatrixDSym &matr); //returns robust correlation matrix estimate
   const   TMatrixDSym* GetCorrelation() const{return &fCorrelation;}
   void    GetHyperplane(TVectorD &vec);      //if the data lies on a hyperplane, returns this hyperplane
   const   TVectorD* GetHyperplane() const;   //if the data lies on a hyperplane, returns this hyperplane
   Int_t   GetNHyp() {return fExact;}         //returns the number of points on a hyperplane
   void    GetMean(TVectorD &means);                        //returns robust mean vector estimate
   const   TVectorD* GetMean() const {return &fMean;}       //returns robust mean vector estimate
   void    GetRDistances(TVectorD &rdist);                  //returns robust distances of all observations
   const   TVectorD* GetRDistances() const {return &fRd;}   //returns robust distances of all observations
   Int_t   GetNumberObservations() const {return fN;}
   Int_t   GetNvar() const {return fNvar;}
   const   TArrayI* GetOuliers() const{return &fOut;}       //returns an array of outlier indexes
   Int_t   GetNOut(); //returns the number of points outside the tolerance ellipsoid.
                      //ONLY those with robust distances significantly larger than the
                      //cutoff value, should be considered outliers!
   Double_t GetChiQuant(Int_t i) const;
   
   ClassDef(TRobustEstimator,1)  //Minimum Covariance Determinant Estimator
 
};


#endif

 TRobustEstimator.h:1
 TRobustEstimator.h:2
 TRobustEstimator.h:3
 TRobustEstimator.h:4
 TRobustEstimator.h:5
 TRobustEstimator.h:6
 TRobustEstimator.h:7
 TRobustEstimator.h:8
 TRobustEstimator.h:9
 TRobustEstimator.h:10
 TRobustEstimator.h:11
 TRobustEstimator.h:12
 TRobustEstimator.h:13
 TRobustEstimator.h:14
 TRobustEstimator.h:15
 TRobustEstimator.h:16
 TRobustEstimator.h:17
 TRobustEstimator.h:18
 TRobustEstimator.h:19
 TRobustEstimator.h:20
 TRobustEstimator.h:21
 TRobustEstimator.h:22
 TRobustEstimator.h:23
 TRobustEstimator.h:24
 TRobustEstimator.h:25
 TRobustEstimator.h:26
 TRobustEstimator.h:27
 TRobustEstimator.h:28
 TRobustEstimator.h:29
 TRobustEstimator.h:30
 TRobustEstimator.h:31
 TRobustEstimator.h:32
 TRobustEstimator.h:33
 TRobustEstimator.h:34
 TRobustEstimator.h:35
 TRobustEstimator.h:36
 TRobustEstimator.h:37
 TRobustEstimator.h:38
 TRobustEstimator.h:39
 TRobustEstimator.h:40
 TRobustEstimator.h:41
 TRobustEstimator.h:42
 TRobustEstimator.h:43
 TRobustEstimator.h:44
 TRobustEstimator.h:45
 TRobustEstimator.h:46
 TRobustEstimator.h:47
 TRobustEstimator.h:48
 TRobustEstimator.h:49
 TRobustEstimator.h:50
 TRobustEstimator.h:51
 TRobustEstimator.h:52
 TRobustEstimator.h:53
 TRobustEstimator.h:54
 TRobustEstimator.h:55
 TRobustEstimator.h:56
 TRobustEstimator.h:57
 TRobustEstimator.h:58
 TRobustEstimator.h:59
 TRobustEstimator.h:60
 TRobustEstimator.h:61
 TRobustEstimator.h:62
 TRobustEstimator.h:63
 TRobustEstimator.h:64
 TRobustEstimator.h:65
 TRobustEstimator.h:66
 TRobustEstimator.h:67
 TRobustEstimator.h:68
 TRobustEstimator.h:69
 TRobustEstimator.h:70
 TRobustEstimator.h:71
 TRobustEstimator.h:72
 TRobustEstimator.h:73
 TRobustEstimator.h:74
 TRobustEstimator.h:75
 TRobustEstimator.h:76
 TRobustEstimator.h:77
 TRobustEstimator.h:78
 TRobustEstimator.h:79
 TRobustEstimator.h:80
 TRobustEstimator.h:81
 TRobustEstimator.h:82
 TRobustEstimator.h:83
 TRobustEstimator.h:84
 TRobustEstimator.h:85
 TRobustEstimator.h:86
 TRobustEstimator.h:87
 TRobustEstimator.h:88
 TRobustEstimator.h:89
 TRobustEstimator.h:90
 TRobustEstimator.h:91
 TRobustEstimator.h:92
 TRobustEstimator.h:93
 TRobustEstimator.h:94
 TRobustEstimator.h:95
 TRobustEstimator.h:96
 TRobustEstimator.h:97
 TRobustEstimator.h:98
 TRobustEstimator.h:99
 TRobustEstimator.h:100
 TRobustEstimator.h:101
 TRobustEstimator.h:102
 TRobustEstimator.h:103
 TRobustEstimator.h:104
 TRobustEstimator.h:105
 TRobustEstimator.h:106
 TRobustEstimator.h:107
 TRobustEstimator.h:108
 TRobustEstimator.h:109
 TRobustEstimator.h:110
 TRobustEstimator.h:111
 TRobustEstimator.h:112
 TRobustEstimator.h:113