83 const UInt_t nvar = GetNVar();
86 std::cerr <<
"Distance: two events have different dimensions" << std::endl;
91 for (
UInt_t ivar = 0; ivar < nvar; ++ivar) {
92 sum += GetDist(other.
GetVar(ivar), ivar);
132 Int_t dp = os.precision();
134 for (
UInt_t ivar = 0; ivar != GetNVar(); ++ivar) {
142 os << std::setfill(
' ') << std::setw(5) << std::setprecision(3) << GetVar(ivar);
149 os <<
" no variables";
151 os << std::setprecision(dp);
179 delete fTree; fTree = 0;
208 Log() <<
kFATAL <<
"<Add> Cannot add event: tree is already built" <<
Endl;
213 fDimn =
event.GetNVar();
215 else if (fDimn != event.
GetNVar()) {
216 Log() <<
kFATAL <<
"ModulekNN::Add() - number of dimension does not match previous events" <<
Endl;
220 fEvent.push_back(event);
222 for (
UInt_t ivar = 0; ivar < fDimn; ++ivar) {
223 fVar[ivar].push_back(event.
GetVar(ivar));
226 std::map<Short_t, UInt_t>::iterator cit = fCount.find(event.
GetType());
227 if (cit == fCount.end()) {
228 fCount[
event.GetType()] = 1;
241 Log() <<
kFATAL <<
"ModulekNN::Fill - tree has already been created" <<
Endl;
248 if (option.find(
"trim") != std::string::npos) {
249 for (std::map<Short_t, UInt_t>::const_iterator it = fCount.begin(); it != fCount.end(); ++it) {
250 if (min == 0 || min > it->second) {
255 Log() <<
kINFO <<
"<Fill> Will trim all event types to " << min <<
" events" <<
Endl;
262 for (EventVec::const_iterator event = fEvent.begin();
event != fEvent.end(); ++event) {
263 std::map<Short_t, UInt_t>::iterator cit = fCount.find(event->GetType());
264 if (cit == fCount.end()) {
265 fCount[
event->GetType()] = 1;
267 else if (cit->second < min) {
274 for (
UInt_t d = 0; d < fDimn; ++d) {
275 fVar[d].push_back(event->GetVar(d));
278 evec.push_back(*event);
281 Log() <<
kINFO <<
"<Fill> Erased " << fEvent.size() - evec.size() <<
" events" <<
Endl;
290 for (VarMap::iterator it = fVar.begin(); it != fVar.end(); ++it) {
291 std::sort((it->second).begin(), (it->second).end());
294 if (option.find(
"metric") != std::string::npos && ifrac > 0) {
295 ComputeMetric(ifrac);
299 for (VarMap::iterator it = fVar.begin(); it != fVar.end(); ++it) {
300 std::sort((it->second).begin(), (it->second).end());
311 Log() <<
kFATAL <<
"ModulekNN::Fill() - failed to create tree" <<
Endl;
315 for (EventVec::const_iterator event = fEvent.begin();
event != fEvent.end(); ++event) {
316 fTree->Add(*event, 0);
318 std::map<Short_t, UInt_t>::iterator cit = fCount.find(event->GetType());
319 if (cit == fCount.end()) {
320 fCount[
event->GetType()] = 1;
327 for (std::map<Short_t, UInt_t>::const_iterator it = fCount.begin(); it != fCount.end(); ++it) {
328 Log() <<
kINFO <<
"<Fill> Class " << it->first <<
" has " << std::setw(8)
329 << it->second <<
" events" <<
Endl;
344 Log() <<
kFATAL <<
"ModulekNN::Find() - tree has not been filled" <<
Endl;
347 if (fDimn != event.
GetNVar()) {
348 Log() <<
kFATAL <<
"ModulekNN::Find() - number of dimension does not match training events" <<
Endl;
352 Log() <<
kFATAL <<
"ModulekNN::Find() - requested 0 nearest neighbors" <<
Endl;
358 if (!fVarScale.empty()) {
359 event = Scale(event);
366 if(option.find(
"weight") != std::string::npos)
371 kNN::Find<kNN::Event>(fkNNList, fTree, event,
Double_t(nfind), 0.0);
377 kNN::Find<kNN::Event>(fkNNList, fTree, event, nfind);
388 if (fCount.empty() || !fTree) {
391 typedef std::map<Short_t, UInt_t>::const_iterator const_iterator;
392 TTHREAD_TLS_DECL_ARG(const_iterator,cit,fCount.end());
394 if (cit == fCount.end()) {
395 cit = fCount.begin();
398 const Short_t etype = (cit++)->first;
400 if (option ==
"flat") {
402 for (
UInt_t d = 0; d < fDimn; ++d) {
403 VarMap::const_iterator vit = fVar.find(d);
404 if (vit == fVar.end()) {
408 const std::vector<Double_t> &vvec = vit->second;
419 if (width < 0.0 || width > 0.0) {
420 dvec.push_back(min + width*GetRndmThreadLocal().Rndm());
427 const Event event(dvec, 1.0, etype);
444 if (fVar.empty() || fDimn != fVar.size()) {
449 const UInt_t size = (fVar.begin()->second).size();
451 Log() <<
kWARNING <<
"<Optimize> Cannot build a tree without events" <<
Endl;
455 VarMap::const_iterator it = fVar.begin();
456 for (; it != fVar.end(); ++it) {
457 if ((it->second).size() != size) {
458 Log() <<
kWARNING <<
"<Optimize> # of variables doesn't match between dimensions" <<
Endl;
463 if (
double(fDimn*size) <
TMath::Power(2.0,
double(odepth))) {
464 Log() <<
kWARNING <<
"<Optimize> Optimization depth exceeds number of events" <<
Endl;
468 Log() <<
kINFO <<
"Optimizing tree for " << fDimn <<
" variables with " << size <<
" values" <<
Endl;
470 std::vector<Node<Event> *> pvec, cvec;
473 if (it == fVar.end() || (it->second).size() < 2) {
478 const Event pevent(
VarVec(fDimn, (it->second)[size/2]), -1.0, -1);
482 pvec.push_back(tree);
484 for (
UInt_t depth = 1; depth < odepth; ++depth) {
485 const UInt_t mod = depth % fDimn;
487 VarMap::const_iterator vit = fVar.find(mod);
488 if (vit == fVar.end()) {
492 const std::vector<Double_t> &dvec = vit->second;
494 if (dvec.size() < 2) {
500 for (std::vector<
Node<Event> *>::iterator pit = pvec.begin(); pit != pvec.end(); ++pit) {
503 const VarType lmedian = dvec[size*ichild/(2*pvec.size() + 1)];
506 const VarType rmedian = dvec[size*ichild/(2*pvec.size() + 1)];
509 const Event levent(
VarVec(fDimn, lmedian), -1.0, -1);
510 const Event revent(
VarVec(fDimn, rmedian), -1.0, -1);
518 cvec.push_back(lchild);
519 cvec.push_back(rchild);
541 Log() <<
kFATAL <<
"ModulekNN::ComputeMetric - fraction can not exceed 100%" <<
Endl;
544 if (!fVarScale.empty()) {
545 Log() <<
kFATAL <<
"ModulekNN::ComputeMetric - metric is already computed" <<
Endl;
548 if (fEvent.size() < 100) {
549 Log() <<
kFATAL <<
"ModulekNN::ComputeMetric - number of events is too small" <<
Endl;
553 const UInt_t lfrac = (100 - ifrac)/2;
554 const UInt_t rfrac = 100 - (100 - ifrac)/2;
556 Log() <<
kINFO <<
"Computing scale factor for 1d distributions: "
557 <<
"(ifrac, bottom, top) = (" << ifrac <<
"%, " << lfrac <<
"%, " << rfrac <<
"%)" <<
Endl;
561 for (VarMap::const_iterator vit = fVar.begin(); vit != fVar.end(); ++vit) {
562 const std::vector<Double_t> &dvec = vit->second;
564 std::vector<Double_t>::const_iterator beg_it = dvec.end();
565 std::vector<Double_t>::const_iterator end_it = dvec.end();
568 for (std::vector<Double_t>::const_iterator dit = dvec.begin(); dit != dvec.end(); ++dit, ++
dist) {
570 if ((100*dist)/dvec.size() == lfrac && beg_it == dvec.end()) {
574 if ((100*dist)/dvec.size() == rfrac && end_it == dvec.end()) {
579 if (beg_it == dvec.end() || end_it == dvec.end()) {
580 beg_it = dvec.begin();
583 assert(beg_it != end_it &&
"Empty vector");
591 if (!(lpos < rpos)) {
592 Log() <<
kFATAL <<
"ModulekNN::ComputeMetric() - min value is greater than max value" <<
Endl;
603 fVarScale[vit->first] = rpos - lpos;
608 for (
UInt_t ievent = 0; ievent < fEvent.size(); ++ievent) {
609 fEvent[ievent] = Scale(fEvent[ievent]);
611 for (
UInt_t ivar = 0; ivar < fDimn; ++ivar) {
612 fVar[ivar].push_back(fEvent[ievent].GetVar(ivar));
623 if (fVarScale.empty()) {
627 if (event.
GetNVar() != fVarScale.size()) {
628 Log() <<
kFATAL <<
"ModulekNN::Scale() - mismatched metric and event size" <<
Endl;
634 for (
UInt_t ivar = 0; ivar <
event.GetNVar(); ++ivar) {
635 std::map<int, Double_t>::const_iterator fit = fVarScale.find(ivar);
636 if (fit == fVarScale.end()) {
637 Log() <<
kFATAL <<
"ModulekNN::Scale() - failed to find scale for " << ivar <<
Endl;
641 if (fit->second > 0.0) {
642 vvec[ivar] =
event.GetVar(ivar)/fit->second;
645 Log() <<
kFATAL <<
"Variable " << ivar <<
" has zero width" <<
Endl;
649 return Event(vvec, event.
GetWeight(),
event.GetType(),
event.GetTargets());
665 os <<
"----------------------------------------------------------------------"<< std::endl;
666 os <<
"Printing knn result" << std::endl;
667 os << fkNNEvent << std::endl;
671 std::map<Short_t, Double_t>
min,
max;
673 os <<
"Printing " << fkNNList.size() <<
" nearest neighbors" << std::endl;
674 for (List::const_iterator it = fkNNList.begin(); it != fkNNList.end(); ++it) {
675 os << ++count <<
": " << it->second <<
": " << it->first->GetEvent() << std::endl;
677 const Event &
event = it->first->GetEvent();
678 for (
UShort_t ivar = 0; ivar <
event.GetNVar(); ++ivar) {
679 if (min.find(ivar) == min.end()) {
680 min[ivar] =
event.GetVar(ivar);
682 else if (min[ivar] > event.GetVar(ivar)) {
683 min[ivar] =
event.GetVar(ivar);
686 if (max.find(ivar) == max.end()) {
687 max[ivar] =
event.GetVar(ivar);
689 else if (max[ivar] < event.GetVar(ivar)) {
690 max[ivar] =
event.GetVar(ivar);
695 if (min.size() == max.size()) {
696 for (std::map<Short_t, Double_t>::const_iterator mit = min.begin(); mit != min.end(); ++mit) {
698 Log() <<
kINFO <<
"(var, min, max) = (" << i <<
"," << min[i] <<
", " << max[i] <<
")" <<
Endl;
702 os <<
"----------------------------------------------------------------------" << std::endl;
Event()
default constructor
double dist(Rotation3D const &r1, Rotation3D const &r2)
RooCmdArg Optimize(Int_t flag=2)
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
MsgLogger & Endl(MsgLogger &ml)
ModulekNN()
default constructor
const Event Scale(const Event &event) const
scale each event variable so that rms of variables is approximately 1.0 this allows comparisons of va...
std::ostream & operator<<(std::ostream &os, const Event &event)
streamer
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
std::vector< TMVA::kNN::Event > EventVec
const VarVec & GetTargets() const
void SetTargets(const VarVec &tvec)
Bool_t Fill(const UShort_t odepth, UInt_t ifrac, const std::string &option="")
fill the tree
const VarVec & GetVars() const
void SetNodeL(Node *node)
void Add(const Event &event)
add an event to tree
Bool_t Find(Event event, UInt_t nfind=100, const std::string &option="count") const
find in tree if tree has been filled then search for nfind closest events if metic (fVarScale map) is...
Double_t GetWeight() const
void Print(std::ostream &os, const OptionType &opt)
VarType GetDist(VarType var, UInt_t ivar) const
UInt_t Find(std::list< std::pair< const Node< T > *, Float_t > > &nlist, const Node< T > *node, const T &event, UInt_t nfind)
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
VarType GetVar(UInt_t i) const
Node< Event > * Optimize(UInt_t optimize_depth)
Optimize() balances binary tree for first odepth levels for each depth we split sorted depth % dimens...
void SetNodeR(Node *node)
std::vector< VarType > VarVec
void ComputeMetric(UInt_t ifrac)
compute scale factor for each variable (dimension) so that distance is computed uniformely along each...