Logo ROOT  
Reference Guide
LDA.cxx
Go to the documentation of this file.
1 // $Id$
2 /**********************************************************************************
3  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
4  * Package: TMVA *
5  * Class : LDA *
6  * Web : http://tmva.sourceforge.net *
7  * *
8  * Description: *
9  * Local LDA method used by MethodKNN to compute MVA value. *
10  * This is experimental code under development. This class computes *
11  * parameters of signal and background PDFs using Gaussian approximation. *
12  * *
13  * Author: *
14  * John Alison John.Alison@cern.ch - University of Pennsylvania, USA *
15  * *
16  * Copyright (c) 2007: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * University of Pennsylvania, USA *
20  * *
21  * Redistribution and use in source and binary forms, with or without *
22  * modification, are permitted according to the terms listed in LICENSE *
23  * (http://tmva.sourceforge.net/LICENSE) *
24  **********************************************************************************/
25 
26 /*! \class TMVA::LDA
27 \ingroup TMVA
28 
29 */
30 
31 // Local
32 #include "TMVA/LDA.h"
33 
34 // C/C++
35 #include "TDecompSVD.h"
36 #include "TMatrixF.h"
37 #include "TMath.h"
38 
39 #include "TMVA/Types.h"
40 #include "TMVA/MsgLogger.h"
41 
42 /////////////////////////////////////////////////////////////////////////////////
43 /// constructor
44 
45 TMVA::LDA::LDA( Float_t tolerence, Bool_t debug )
46  : fTolerence(tolerence),
47  fNumParams(0),
48  fSigma(0),
49  fSigmaInverse(0),
50  fDebug(debug),
51  fLogger( new MsgLogger("LDA", (debug?kINFO:kDEBUG)) )
52 {
53 }
54 
55 ////////////////////////////////////////////////////////////////////////////////
56 /// destructor
57 
59 {
60  delete fLogger;
61 }
62 
63 ////////////////////////////////////////////////////////////////////////////////
64 /// Create LDA matrix using local events found by knn method
65 
66 void TMVA::LDA::Initialize(const LDAEvents& inputSignalEvents, const LDAEvents& inputBackgroundEvents)
67 {
68  Log() << kDEBUG << "There are: " << inputSignalEvents.size() << " input signal events " << Endl;
69  Log() << kDEBUG << "There are: " << inputBackgroundEvents.size() << " input background events " << Endl;
70 
71  fNumParams = inputSignalEvents[0].size();
72 
73  UInt_t numSignalEvents = inputSignalEvents.size();
74  UInt_t numBackEvents = inputBackgroundEvents.size();
75  UInt_t numTotalEvents = numSignalEvents + numBackEvents;
76  fEventFraction[0] = (Float_t)numBackEvents/numTotalEvents;
77  fEventFraction[1] = (Float_t)numSignalEvents/numTotalEvents;
78  UInt_t K = 2;
79 
80  // get the mean of the signal and background for each parameter
81  std::vector<Float_t> m_muSignal (fNumParams,0.0);
82  std::vector<Float_t> m_muBackground (fNumParams,0.0);
83  for (UInt_t param=0; param < fNumParams; ++param) {
84  for (UInt_t eventNumber=0; eventNumber < numSignalEvents; ++eventNumber)
85  m_muSignal[param] += inputSignalEvents[eventNumber][param];
86  for (UInt_t eventNumber=0; eventNumber < numBackEvents; ++eventNumber)
87  m_muBackground[param] += inputBackgroundEvents[eventNumber][param]/numBackEvents;
88  if (numSignalEvents > 0) m_muSignal[param] /= numSignalEvents;
89  if (numBackEvents > 0 ) m_muBackground[param] /= numBackEvents;
90  }
91  fMu[0] = m_muBackground;
92  fMu[1] = m_muSignal;
93 
94  if (fDebug) {
95  Log() << kDEBUG << "the signal means" << Endl;
96  for (UInt_t param=0; param < fNumParams; ++param)
97  Log() << kDEBUG << m_muSignal[param] << Endl;
98  Log() << kDEBUG << "the background means" << Endl;
99  for (UInt_t param=0; param < inputBackgroundEvents[0].size(); ++param)
100  Log() << kDEBUG << m_muBackground[param] << Endl;
101  }
102 
103  // sigma is a sum of two symmetric matrices, one for the background and one for signal
104  // get the matrices separately (def not be the best way to do it!)
105 
106  // the signal, background, and total matrix
107  TMatrixF sigmaSignal(fNumParams, fNumParams);
108  TMatrixF sigmaBack(fNumParams, fNumParams);
109  if (fSigma!=0) delete fSigma;
110  fSigma = new TMatrixF(fNumParams, fNumParams);
111  for (UInt_t row=0; row < fNumParams; ++row) {
112  for (UInt_t col=0; col < fNumParams; ++col) {
113  sigmaSignal[row][col] = 0;
114  sigmaBack[row][col] = 0;
115  (*fSigma)[row][col] = 0;
116  }
117  }
118 
119  for (UInt_t eventNumber=0; eventNumber < numSignalEvents; ++eventNumber) {
120  for (UInt_t row=0; row < fNumParams; ++row) {
121  for (UInt_t col=0; col < fNumParams; ++col) {
122  sigmaSignal[row][col] += (inputSignalEvents[eventNumber][row] - m_muSignal[row]) * (inputSignalEvents[eventNumber][col] - m_muSignal[col] );
123  }
124  }
125  }
126 
127  for (UInt_t eventNumber=0; eventNumber < numBackEvents; ++eventNumber) {
128  for (UInt_t row=0; row < fNumParams; ++row) {
129  for (UInt_t col=0; col < fNumParams; ++col) {
130  sigmaBack[row][col] += (inputBackgroundEvents[eventNumber][row] - m_muBackground[row]) * (inputBackgroundEvents[eventNumber][col] - m_muBackground[col] );
131  }
132  }
133  }
134 
135  // the total matrix
136  *fSigma = sigmaBack + sigmaSignal;
137  *fSigma *= 1.0/(numTotalEvents - K);
138 
139  if (fDebug) {
140  Log() << "after filling sigmaSignal" <<Endl;
141  sigmaSignal.Print();
142  Log() << "after filling sigmaBack" <<Endl;
143  sigmaBack.Print();
144  Log() << "after filling total Sigma" <<Endl;
145  fSigma->Print();
146  }
147 
148  TDecompSVD solutionSVD = TDecompSVD( *fSigma );
149  TMatrixF decomposed = TMatrixF( fNumParams, fNumParams );
150  TMatrixF diag ( fNumParams, fNumParams );
151  TMatrixF uTrans( fNumParams, fNumParams );
152  TMatrixF vTrans( fNumParams, fNumParams );
153  if (solutionSVD.Decompose()) {
154  for (UInt_t i = 0; i< fNumParams; ++i) {
155  if (solutionSVD.GetSig()[i] > fTolerence)
156  diag(i,i) = solutionSVD.GetSig()[i];
157  else
158  diag(i,i) = fTolerence;
159  }
160 
161  if (fDebug) {
162  Log() << "the diagonal" <<Endl;
163  diag.Print();
164  }
165 
166  decomposed = solutionSVD.GetV();
167  decomposed *= diag;
168  decomposed *= solutionSVD.GetU();
169 
170  if (fDebug) {
171  Log() << "the decomposition " <<Endl;
172  decomposed.Print();
173  }
174 
175  *fSigmaInverse = uTrans.Transpose(solutionSVD.GetU());
176  *fSigmaInverse /= diag;
177  *fSigmaInverse *= vTrans.Transpose(solutionSVD.GetV());
178 
179  if (fDebug) {
180  Log() << "the SigmaInverse " <<Endl;
181  fSigmaInverse->Print();
182 
183  Log() << "the real " <<Endl;
184  fSigma->Invert();
185  fSigma->Print();
186 
187  Bool_t problem = false;
188  for (UInt_t i =0; i< fNumParams; ++i) {
189  for (UInt_t j =0; j< fNumParams; ++j) {
190  if (TMath::Abs((Float_t)(*fSigma)(i,j) - (Float_t)(*fSigmaInverse)(i,j)) > 0.01) {
191  Log() << "problem, i= "<< i << " j= " << j << Endl;
192  Log() << "Sigma(i,j)= "<< (*fSigma)(i,j) << " SigmaInverse(i,j)= " << (*fSigmaInverse)(i,j) <<Endl;
193  Log() << "The difference is : " << TMath::Abs((Float_t)(*fSigma)(i,j) - (Float_t)(*fSigmaInverse)(i,j)) <<Endl;
194  problem = true;
195  }
196  }
197  }
198  if (problem) Log() << kWARNING << "Problem with the inversion!" << Endl;
199 
200  }
201  }
202 }
203 
204 ////////////////////////////////////////////////////////////////////////////////
205 ///
206 /// Probability value using Gaussian approximation
207 ///
208 
209 Float_t TMVA::LDA::FSub(const std::vector<Float_t>& x, Int_t k)
210 {
211  Float_t prefactor = 1.0/(TMath::TwoPi()*TMath::Sqrt(fSigma->Determinant()));
212  std::vector<Float_t> m_transPoseTimesSigmaInverse;
213 
214  for (UInt_t j=0; j < fNumParams; ++j) {
215  Float_t m_temp = 0;
216  for (UInt_t i=0; i < fNumParams; ++i) {
217  m_temp += (x[i] - fMu[k][i]) * (*fSigmaInverse)(j,i);
218  }
219  m_transPoseTimesSigmaInverse.push_back(m_temp);
220  }
221 
222  Float_t exponent = 0.0;
223  for (UInt_t i=0; i< fNumParams; ++i) {
224  exponent += m_transPoseTimesSigmaInverse[i]*(x[i] - fMu[k][i]);
225  }
226 
227  exponent *= -0.5;
228 
229  return prefactor*TMath::Exp( exponent );
230 }
231 
232 ////////////////////////////////////////////////////////////////////////////////
233 ///
234 /// Signal probability with Gaussian approximation
235 ///
236 
237 Float_t TMVA::LDA::GetProb(const std::vector<Float_t>& x, Int_t k)
238 {
239  Float_t m_numerator = FSub(x,k)*fEventFraction[k];
240  Float_t m_denominator = FSub(x,0)*fEventFraction[0]+FSub(x,1)*fEventFraction[1];
241 
242  return m_numerator/m_denominator;
243 }
244 
245 ////////////////////////////////////////////////////////////////////////////////
246 ///
247 /// Log likelihood function with Gaussian approximation
248 ///
249 
250 Float_t TMVA::LDA::GetLogLikelihood( const std::vector<Float_t>& x, Int_t k )
251 {
252  return TMath::Log( FSub(x,k)/FSub(x,!k) ) + TMath::Log( fEventFraction[k]/fEventFraction[!k] );
253 }
TDecompSVD::GetU
const TMatrixD & GetU()
Definition: TDecompSVD.h:55
TMVA::LDA::FSub
Float_t FSub(const std::vector< Float_t > &x, Int_t k)
Probability value using Gaussian approximation.
Definition: LDA.cxx:209
TDecompSVD
Single Value Decomposition class.
Definition: TDecompSVD.h:24
ROOT::Internal::SHA256::K
static const uint32_t K[64]
Definition: RSha256.hxx:148
TObject::Print
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition: TObject.cxx:552
TMVA::LDA::~LDA
~LDA()
destructor
Definition: LDA.cxx:58
TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:760
TMath::Sqrt
Double_t Sqrt(Double_t x)
Definition: TMath.h:691
Float_t
float Float_t
Definition: RtypesCore.h:57
TMath::Exp
Double_t Exp(Double_t x)
Definition: TMath.h:727
TMVA::LDA::GetProb
Float_t GetProb(const std::vector< Float_t > &x, Int_t k)
Signal probability with Gaussian approximation.
Definition: LDA.cxx:237
x
Double_t x[n]
Definition: legend1.C:17
TMath::Abs
Short_t Abs(Short_t d)
Definition: TMathBase.h:120
TDecompSVD.h
TMatrixT
TMatrixT.
Definition: TMatrixT.h:39
LDAEvents
std::vector< std::vector< Float_t > > LDAEvents
Definition: LDA.h:38
TMatrixT::Transpose
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Definition: TMatrixT.cxx:1470
TDecompSVD::GetSig
const TVectorD & GetSig()
Definition: TDecompSVD.h:59
bool
TDecompSVD::GetV
const TMatrixD & GetV()
Definition: TDecompSVD.h:57
MsgLogger.h
Types.h
TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
unsigned int
TMath::TwoPi
constexpr Double_t TwoPi()
Definition: TMath.h:44
TMVA::LDA::GetLogLikelihood
Float_t GetLogLikelihood(const std::vector< Float_t > &x, Int_t k)
Log likelihood function with Gaussian approximation.
Definition: LDA.cxx:250
TMVA::LDA::Initialize
void Initialize(const LDAEvents &inputSignal, const LDAEvents &inputBackground)
Create LDA matrix using local events found by knn method.
Definition: LDA.cxx:66
TMVA::MsgLogger
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
LDA.h
TMVA::LDA::LDA
LDA(Float_t tolerence=1.0e-5, Bool_t debug=false)
constructor
Definition: LDA.cxx:45
TDecompSVD::Decompose
virtual Bool_t Decompose()
SVD decomposition of matrix If the decomposition succeeds, bit kDecomposed is set ,...
Definition: TDecompSVD.cxx:123
TMatrixF
TMatrixT< Float_t > TMatrixF
Definition: TMatrixFfwd.h:22
TMatrixF.h
TMath.h
int