Logo ROOT  
Reference Guide
ModulekNN.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Rustem Ospanov
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : ModulekNN *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Module for k-nearest neighbor algorithm *
12 * *
13 * Author: *
14 * Rustem Ospanov <rustem@fnal.gov> - U. of Texas at Austin, USA *
15 * *
16 * Copyright (c) 2007: *
17 * CERN, Switzerland *
18 * MPI-K Heidelberg, Germany *
19 * U. of Texas at Austin, USA *
20 * *
21 * Redistribution and use in source and binary forms, with or without *
22 * modification, are permitted according to the terms listed in LICENSE *
23 * (http://tmva.sourceforge.net/LICENSE) *
24 **********************************************************************************/
25
26#ifndef ROOT_TMVA_ModulekNN
27#define ROOT_TMVA_ModulekNN
28
29//______________________________________________________________________
30/*
31 kNN::Event describes point in input variable vector-space, with
32 additional functionality like distance between points
33*/
34//______________________________________________________________________
35
36
37// C++
38#include <iosfwd>
39#include <map>
40#include <string>
41#include <vector>
42
43// ROOT
44#include "Rtypes.h"
45#include "TRandom3.h"
46#include "ThreadLocalStorage.h"
47#include "TMVA/NodekNN.h"
48
49namespace TMVA {
50
51 class MsgLogger;
52
53 namespace kNN {
54
55 typedef Float_t VarType;
56 typedef std::vector<VarType> VarVec;
57
58 class Event {
59 public:
60
61 Event();
62 Event(const VarVec &vec, Double_t weight, Short_t type);
63 Event(const VarVec &vec, Double_t weight, Short_t type, const VarVec &tvec);
64 ~Event();
65
66 Double_t GetWeight() const;
67
68 VarType GetVar(UInt_t i) const;
69 VarType GetTgt(UInt_t i) const;
70
71 UInt_t GetNVar() const;
72 UInt_t GetNTgt() const;
73
74 Short_t GetType() const;
75
76 // keep these two function separate
77 VarType GetDist(VarType var, UInt_t ivar) const;
78 VarType GetDist(const Event &other) const;
79
80 void SetTargets(const VarVec &tvec);
81 const VarVec& GetTargets() const;
82 const VarVec& GetVars() const;
83
84 void Print() const;
85 void Print(std::ostream& os) const;
86
87 private:
88
89 VarVec fVar; // coordinates (variables) for knn search
90 VarVec fTgt; // targets for regression analysis
91
92 Double_t fWeight; // event weight
93 Short_t fType; // event type ==0 or == 1, expand it to arbitrary class types?
94 };
95
96 typedef std::vector<TMVA::kNN::Event> EventVec;
97 typedef std::pair<const Node<Event> *, VarType> Elem;
98 typedef std::list<Elem> List;
99
100 std::ostream& operator<<(std::ostream& os, const Event& event);
101
103 {
104 public:
105
106 typedef std::map<int, std::vector<Double_t> > VarMap;
107
108 public:
109
110 ModulekNN();
111 ~ModulekNN();
112
113 void Clear();
114
115 void Add(const Event &event);
116
117 Bool_t Fill(const UShort_t odepth, UInt_t ifrac, const std::string &option = "");
118
119 Bool_t Find(Event event, UInt_t nfind = 100, const std::string &option = "count") const;
120 Bool_t Find(UInt_t nfind, const std::string &option) const;
121
122 const EventVec& GetEventVec() const;
123
124 const List& GetkNNList() const;
125 const Event& GetkNNEvent() const;
126
127 const VarMap& GetVarMap() const;
128
129 const std::map<Int_t, Double_t>& GetMetric() const;
130
131 void Print() const;
132 void Print(std::ostream &os) const;
133
134 private:
135
136 Node<Event>* Optimize(UInt_t optimize_depth);
137
138 void ComputeMetric(UInt_t ifrac);
139
140 const Event Scale(const Event &event) const;
141
142 private:
143
144 // This is a workaround for OSx where static thread_local data members are
145 // not supported. The C++ solution would indeed be the following:
146 static TRandom3& GetRndmThreadLocal() {TTHREAD_TLS_DECL_ARG(TRandom3,fgRndm,1); return fgRndm;};
147
149
151
152 std::map<Int_t, Double_t> fVarScale;
153
154 mutable List fkNNList; // latest result from kNN search
155 mutable Event fkNNEvent; // latest event used for kNN search
156
157 std::map<Short_t, UInt_t> fCount; // count number of events of each type
158
159 EventVec fEvent; // vector of all events used to build tree and analysis
160 VarMap fVar; // sorted map of variables in each dimension for all event types
161
162 mutable MsgLogger* fLogger; // message logger
163 MsgLogger& Log() const { return *fLogger; }
164 };
165
166 //
167 // inlined functions for Event class
168 //
169 inline VarType Event::GetDist(const VarType var1, const UInt_t ivar) const
170 {
171 const VarType var2 = GetVar(ivar);
172 return (var1 - var2) * (var1 - var2);
173 }
175 {
176 return fWeight;
177 }
178 inline VarType Event::GetVar(const UInt_t i) const
179 {
180 return fVar[i];
181 }
182 inline VarType Event::GetTgt(const UInt_t i) const
183 {
184 return fTgt[i];
185 }
186
187 inline UInt_t Event::GetNVar() const
188 {
189 return fVar.size();
190 }
191 inline UInt_t Event::GetNTgt() const
192 {
193 return fTgt.size();
194 }
195 inline Short_t Event::GetType() const
196 {
197 return fType;
198 }
199
200 //
201 // inline functions for ModulekNN class
202 //
203 inline const List& ModulekNN::GetkNNList() const
204 {
205 return fkNNList;
206 }
207 inline const Event& ModulekNN::GetkNNEvent() const
208 {
209 return fkNNEvent;
210 }
211 inline const EventVec& ModulekNN::GetEventVec() const
212 {
213 return fEvent;
214 }
216 {
217 return fVar;
218 }
219 inline const std::map<Int_t, Double_t>& ModulekNN::GetMetric() const
220 {
221 return fVarScale;
222 }
223
224 } // end of kNN namespace
225} // end of TMVA namespace
226
227#endif
228
unsigned short UShort_t
Definition: RtypesCore.h:38
short Short_t
Definition: RtypesCore.h:37
double Double_t
Definition: RtypesCore.h:57
float Float_t
Definition: RtypesCore.h:55
int type
Definition: TGX11.cxx:120
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Short_t GetType() const
Definition: ModulekNN.h:195
VarType GetDist(VarType var, UInt_t ivar) const
Definition: ModulekNN.h:169
VarType GetTgt(UInt_t i) const
Definition: ModulekNN.h:182
void SetTargets(const VarVec &tvec)
Definition: ModulekNN.cxx:107
const VarVec & GetTargets() const
Definition: ModulekNN.cxx:114
Double_t GetWeight() const
Definition: ModulekNN.h:174
UInt_t GetNVar() const
Definition: ModulekNN.h:187
UInt_t GetNTgt() const
Definition: ModulekNN.h:191
Event()
default constructor
Definition: ModulekNN.cxx:50
~Event()
destructor
Definition: ModulekNN.cxx:81
void Print() const
print
Definition: ModulekNN.cxx:129
Short_t fType
Definition: ModulekNN.h:93
VarType GetVar(UInt_t i) const
Definition: ModulekNN.h:178
Double_t fWeight
Definition: ModulekNN.h:92
const VarVec & GetVars() const
Definition: ModulekNN.cxx:121
std::map< Int_t, Double_t > fVarScale
Definition: ModulekNN.h:152
MsgLogger * fLogger
Definition: ModulekNN.h:162
Bool_t Fill(const UShort_t odepth, UInt_t ifrac, const std::string &option="")
fill the tree
Definition: ModulekNN.cxx:245
const VarMap & GetVarMap() const
Definition: ModulekNN.h:215
Node< Event > * Optimize(UInt_t optimize_depth)
Optimize() balances binary tree for first odepth levels for each depth we split sorted depth % dimens...
Definition: ModulekNN.cxx:449
const EventVec & GetEventVec() const
Definition: ModulekNN.h:211
static TRandom3 & GetRndmThreadLocal()
Definition: ModulekNN.h:146
std::map< int, std::vector< Double_t > > VarMap
Definition: ModulekNN.h:106
void Print() const
print
Definition: ModulekNN.cxx:662
void Clear()
clean up
Definition: ModulekNN.cxx:194
ModulekNN()
default constructor
Definition: ModulekNN.cxx:173
Bool_t Find(Event event, UInt_t nfind=100, const std::string &option="count") const
find in tree if tree has been filled then search for nfind closest events if metic (fVarScale map) is...
Definition: ModulekNN.cxx:348
const Event Scale(const Event &event) const
scale each event variable so that rms of variables is approximately 1.0 this allows comparisons of va...
Definition: ModulekNN.cxx:628
void ComputeMetric(UInt_t ifrac)
compute scale factor for each variable (dimension) so that distance is computed uniformly along each ...
Definition: ModulekNN.cxx:542
Node< Event > * fTree
Definition: ModulekNN.h:150
const Event & GetkNNEvent() const
Definition: ModulekNN.h:207
const std::map< Int_t, Double_t > & GetMetric() const
Definition: ModulekNN.h:219
~ModulekNN()
destructor
Definition: ModulekNN.cxx:183
std::map< Short_t, UInt_t > fCount
Definition: ModulekNN.h:157
MsgLogger & Log() const
Definition: ModulekNN.h:163
void Add(const Event &event)
add an event to tree
Definition: ModulekNN.cxx:212
const List & GetkNNList() const
Definition: ModulekNN.h:203
This file contains binary tree and global function template that searches tree for k-nearest neigbors...
Definition: NodekNN.h:67
Random number generator class based on M.
Definition: TRandom3.h:27
std::ostream & operator<<(std::ostream &os, const Event &event)
create variable transformations