Logo ROOT   master
Reference Guide
DataSet.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : DataSet *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
17  * *
18  * Copyright (c) 2006: *
19  * CERN, Switzerland *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 /*! \class TMVA::DataSet
28 \ingroup TMVA
29 
30 Class that contains all the data information
31 
32 */
33 
34 #include <vector>
35 #include <algorithm>
36 #include <cstdlib>
37 #include <stdexcept>
38 
39 #include "TMVA/DataSetInfo.h"
40 #include "TMVA/DataSet.h"
41 #include "TMVA/Event.h"
42 #include "TMVA/MsgLogger.h"
43 #include "TMVA/ResultsRegression.h"
45 #include "TMVA/ResultsMulticlass.h"
46 #include "TMVA/Configurable.h"
47 
48 #include "TMVA/Types.h"
49 #include "TMVA/Results.h"
50 #include "TMVA/VariableInfo.h"
51 
52 #include "TRandom3.h"
53 
54 ////////////////////////////////////////////////////////////////////////////////
55 /// constructor
56 
58  :TNamed(dsi.GetName(),"DataSet"),
59  fdsi(&dsi),
60  fEventCollection(4),
61  fCurrentTreeIdx(0),
62  fCurrentEventIdx(0),
63  fHasNegativeEventWeights(kFALSE),
64  fLogger( new MsgLogger(TString(TString("Dataset:")+dsi.GetName()).Data()) ),
65  fTrainingBlockSize(0)
66 {
67 
68  fClassEvents.resize(4);
69  fBlockBelongToTraining.reserve(10);
70  fBlockBelongToTraining.push_back(kTRUE);
71 
72  // sampling
73  fSamplingRandom = 0;
74 
75  Int_t treeNum = 2;
76  fSampling.resize( treeNum );
77  fSamplingNEvents.resize( treeNum );
78  fSamplingWeight.resize(treeNum);
79 
80  for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
81  fSampling.at(treeIdx) = kFALSE;
82  fSamplingNEvents.at(treeIdx) = 0;
83  fSamplingWeight.at(treeIdx) = 1.0;
84  }
85 }
86 
87 ////////////////////////////////////////////////////////////////////////////////
88 /// constructor
89 
91  :fdsi(new DataSetInfo(GetName())),
92  fEventCollection(4),
93  fCurrentTreeIdx(0),
94  fCurrentEventIdx(0),
95  fHasNegativeEventWeights(kFALSE),
96  fLogger( new MsgLogger(TString(TString("Dataset:")+GetName()).Data()) ),
97  fTrainingBlockSize(0)
98 {
99 
100  fClassEvents.resize(4);
101  fBlockBelongToTraining.reserve(10);
102  fBlockBelongToTraining.push_back(kTRUE);
103 
104  // sampling
105  fSamplingRandom = 0;
106 
107  Int_t treeNum = 2;
108  fSampling.resize( treeNum );
109  fSamplingNEvents.resize( treeNum );
110  fSamplingWeight.resize(treeNum);
111 
112  for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
113  fSampling.at(treeIdx) = kFALSE;
114  fSamplingNEvents.at(treeIdx) = 0;
115  fSamplingWeight.at(treeIdx) = 1.0;
116  }
117 }
118 
119 ////////////////////////////////////////////////////////////////////////////////
120 /// destructor
121 
123 {
124  // delete event collection
125  Bool_t deleteEvents=true; // dataset owns the events /JS
126  DestroyCollection( Types::kTraining, deleteEvents );
127  DestroyCollection( Types::kTesting, deleteEvents );
128 
129  fBlockBelongToTraining.clear();
130  // delete results
131  for (std::vector< std::map< TString, Results* > >::iterator it = fResults.begin(); it != fResults.end(); ++it) {
132  for (std::map< TString, Results* >::iterator itMap = (*it).begin(); itMap != (*it).end(); ++itMap) {
133  delete itMap->second;
134  }
135  }
136 
137  // delete sampling
138  if (fSamplingRandom != 0 ) delete fSamplingRandom;
139 
140 
141  // need also to delete fEventCollections[2] and [3], not sure if they are used
142  DestroyCollection( Types::kValidation, deleteEvents );
143  DestroyCollection( Types::kTrainingOriginal, deleteEvents );
144 
145  delete fLogger;
146 }
147 
148 ////////////////////////////////////////////////////////////////////////////////
149 
151 {
152  if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
153  if (fClassEvents.at( type ).size() < classNumber+1) fClassEvents.at( type ).resize( classNumber+1 );
154  fClassEvents.at( type ).at( classNumber ) += 1;
155 }
156 
157 ////////////////////////////////////////////////////////////////////////////////
158 
160 {
161  if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
162  fClassEvents.at( type ).clear();
163 }
164 
165 ////////////////////////////////////////////////////////////////////////////////
166 
168 {
169  try {
170  return fClassEvents.at(type).at(classNumber);
171  }
172  catch (std::out_of_range &) {
173  ClassInfo* ci = fdsi->GetClassInfo( classNumber );
174  Log() << kFATAL << Form("Dataset[%s] : ",fdsi->GetName()) << "No " << (type==0?"training":(type==1?"testing":"_unknown_type_"))
175  << " events for class " << (ci==NULL?"_no_name_known_":ci->GetName()) << " (index # "<<classNumber<<")"
176  << " available. Check if all class names are spelled correctly and if events are"
177  << " passing the selection cuts." << Endl;
178  }
179  catch (...) {
180  Log() << kFATAL << Form("Dataset[%s] : ",fdsi->GetName()) << "ERROR/CAUGHT : DataSet/GetNClassEvents, .. unknown error" << Endl;
181  }
182  return 0;
183 }
184 
185 ////////////////////////////////////////////////////////////////////////////////
186 /// destroys the event collection (events + vector)
187 
189 {
190  UInt_t i = TreeIndex(type);
191  if (i>=fEventCollection.size() || fEventCollection[i].size()==0) return;
192  if (deleteEvents) {
193 
194  for (UInt_t j=0; j<fEventCollection[i].size(); j++) delete fEventCollection[i][j];
195  }
196  fEventCollection[i].clear();
197 }
198 
199 ////////////////////////////////////////////////////////////////////////////////
200 
202 {
203  if (fSampling.size() > UInt_t(fCurrentTreeIdx) && fSampling.at(fCurrentTreeIdx)) {
204  Long64_t iEvt = fSamplingSelected.at(fCurrentTreeIdx).at( fCurrentEventIdx ).second;
205  return ((fEventCollection.at(fCurrentTreeIdx))).at(iEvt);
206  }
207  else {
208  return ((fEventCollection.at(fCurrentTreeIdx))).at(fCurrentEventIdx);
209  }
210 }
211 
212 ////////////////////////////////////////////////////////////////////////////////
213 /// access the number of variables through the datasetinfo
214 
216 {
217  return fdsi->GetNVariables();
218 }
219 
220 ////////////////////////////////////////////////////////////////////////////////
221 /// access the number of targets through the datasetinfo
222 
224 {
225  return fdsi->GetNTargets();
226 }
227 
228 ////////////////////////////////////////////////////////////////////////////////
229 /// access the number of targets through the datasetinfo
230 
232 {
233  return fdsi->GetNSpectators();
234 }
235 
236 ////////////////////////////////////////////////////////////////////////////////
237 /// add event to event list
238 /// after which the event is owned by the dataset
239 
241 {
242  fEventCollection.at(Int_t(type)).push_back(ev);
243  if (ev->GetWeight()<0) fHasNegativeEventWeights = kTRUE;
244 }
245 
246 ////////////////////////////////////////////////////////////////////////////////
247 /// Sets the event collection (by DataSetFactory)
248 
249 void TMVA::DataSet::SetEventCollection(std::vector<TMVA::Event*>* events, Types::ETreeType type, Bool_t deleteEvents)
250 {
251  DestroyCollection(type,deleteEvents);
252 
253  const Int_t t = TreeIndex(type);
254  ClearNClassEvents( type );
255  //pointer to std::vector is not serializable,
256  fEventCollection.at(t) = *events;
257  for (std::vector<Event*>::iterator it = fEventCollection.at(t).begin(); it < fEventCollection.at(t).end(); ++it) {
258  IncrementNClassEvents( t, (*it)->GetClass() );
259  }
260 }
261 
262 ////////////////////////////////////////////////////////////////////////////////
263 
266  Types::EAnalysisType analysistype )
267 {
268  UInt_t t = TreeIndex(type);
269  if (t<fResults.size()) {
270  const std::map< TString, Results* >& resultsForType = fResults[t];
271  std::map< TString, Results* >::const_iterator it = resultsForType.find(resultsName);
272  if (it!=resultsForType.end()) {
273  //Log() << kINFO << " GetResults("<<info<<") returns existing result." << Endl;
274  return it->second;
275  }
276  }
277  else {
278  fResults.resize(t+1);
279  }
280 
281  // nothing found
282 
283  Results * newresults = 0;
284  switch(analysistype) {
286  newresults = new ResultsClassification(fdsi,resultsName);
287  break;
288  case Types::kRegression:
289  newresults = new ResultsRegression(fdsi,resultsName);
290  break;
291  case Types::kMulticlass:
292  newresults = new ResultsMulticlass(fdsi,resultsName);
293  break;
295  newresults = new ResultsClassification(fdsi,resultsName);
296  break;
298  //Log() << kINFO << " GetResults("<<info<<") can't create new one." << Endl;
299  return 0;
300  break;
301  }
302 
303  newresults->SetTreeType( type );
304  fResults[t][resultsName] = newresults;
305 
306  //Log() << kINFO << " GetResults("<<info<<") builds new result." << Endl;
307  return newresults;
308 }
309 ////////////////////////////////////////////////////////////////////////////////
310 /// delete the results stored for this particular Method instance.
311 /// (here apparently called resultsName instead of MethodTitle
312 /// Tree type (Training, testing etc..)
313 /// Analysis Type (Classification, Multiclass, Regression etc..)
314 
315 void TMVA::DataSet::DeleteResults( const TString & resultsName,
317  Types::EAnalysisType /* analysistype */ )
318 {
319  if (fResults.empty()) return;
320 
321  if (UInt_t(type) > fResults.size()){
322  Log()<<kFATAL<< Form("Dataset[%s] : ",fdsi->GetName()) << "you asked for an Treetype (training/testing/...)"
323  << " whose index " << type << " does not exist " << Endl;
324  }
325  std::map< TString, Results* >& resultsForType = fResults[UInt_t(type)];
326  std::map< TString, Results* >::iterator it = resultsForType.find(resultsName);
327  if (it!=resultsForType.end()) {
328  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << " Delete Results previous existing result:" << resultsName
329  << " of type " << type << Endl;
330  delete it->second;
331  resultsForType.erase(it->first);
332  }
333  else {
334  Log() << kINFO << Form("Dataset[%s] : ",fdsi->GetName()) << "could not fine Result class of " << resultsName
335  << " of type " << type << " which I should have deleted" << Endl;
336  }
337 }
338 
339 ////////////////////////////////////////////////////////////////////////////////
340 /// Deletes all results currently in the dataset.
341 ///
343  Types::EAnalysisType /* analysistype */ )
344 {
345  if (fResults.empty()) return;
346 
347  if (UInt_t(type) > fResults.size()){
348  Log()<<kFATAL<< Form("Dataset[%s] : ",fdsi->GetName()) << "you asked for an Treetype (training/testing/...)"
349  << " whose index " << type << " does not exist " << Endl;
350  }
351 
352  std::map<TString, Results *> & resultsForType = fResults[UInt_t(type)];
353 
354  for (auto && it : resultsForType) {
355  auto & resultsName = it.first;
356 
357  Log() << kDEBUG << Form("Dataset[%s] : ", fdsi->GetName())
358  << " DeleteAllResults previous existing result: "
359  << resultsName << " of type " << type << Endl;
360 
361  delete it.second;
362  }
363 
364  resultsForType.clear();
365 }
366 
367 ////////////////////////////////////////////////////////////////////////////////
368 /// divide training set
369 
371 {
372  Int_t tOrg = TreeIndex(Types::kTrainingOriginal),tTrn = TreeIndex(Types::kTraining);
373  // not changing anything ??
374  if (fBlockBelongToTraining.size() == blockNum) return;
375  // storing the original training vector
376  if (fBlockBelongToTraining.size() == 1) {
377  if (fEventCollection[tOrg].size() == 0)
378  fEventCollection[tOrg].resize(fEventCollection[tTrn].size());
379  fEventCollection[tOrg].clear();
380  for (UInt_t i=0; i<fEventCollection[tTrn].size(); i++)
381  fEventCollection[tOrg].push_back(fEventCollection[tTrn][i]);
382  fClassEvents[tOrg] = fClassEvents[tTrn];
383  }
384  //reseting the event division vector
385  fBlockBelongToTraining.clear();
386  for (UInt_t i=0 ; i < blockNum ; i++) fBlockBelongToTraining.push_back(kTRUE);
387 
388  ApplyTrainingSetDivision();
389 }
390 
391 ////////////////////////////////////////////////////////////////////////////////
392 /// apply division of data set
393 
395 {
396  Int_t tOrg = TreeIndex(Types::kTrainingOriginal), tTrn = TreeIndex(Types::kTraining), tVld = TreeIndex(Types::kValidation);
397  fEventCollection[tTrn].clear();
398  if (fEventCollection[tVld].size()==0)
399  fEventCollection[tVld].resize(fEventCollection[tOrg].size());
400  fEventCollection[tVld].clear();
401 
402  //creating the new events collections, notice that the events that can't be evenly divided belong to the last event
403  for (UInt_t i=0; i<fEventCollection[tOrg].size(); i++) {
404  if (fBlockBelongToTraining[i % fBlockBelongToTraining.size()])
405  fEventCollection[tTrn].push_back(fEventCollection[tOrg][i]);
406  else
407  fEventCollection[tVld].push_back(fEventCollection[tOrg][i]);
408  }
409 }
410 
411 ////////////////////////////////////////////////////////////////////////////////
412 /// move training block
413 
415 {
416  if (dest == Types::kValidation)
417  fBlockBelongToTraining[blockInd]=kFALSE;
418  else
419  fBlockBelongToTraining[blockInd]=kTRUE;
420  if (applyChanges) ApplyTrainingSetDivision();
421 }
422 
423 ////////////////////////////////////////////////////////////////////////////////
424 /// return number of signal test events in dataset
425 
427 {
428  return GetNClassEvents(Types::kTesting, fdsi->GetClassInfo("Signal")->GetNumber() );
429 }
430 
431 ////////////////////////////////////////////////////////////////////////////////
432 /// return number of background test events in dataset
433 
435 {
436  return GetNClassEvents(Types::kTesting, fdsi->GetClassInfo("Background")->GetNumber() );
437 }
438 
439 ////////////////////////////////////////////////////////////////////////////////
440 /// return number of signal training events in dataset
441 
443 {
444  return GetNClassEvents(Types::kTraining, fdsi->GetClassInfo("Signal")->GetNumber() );
445 }
446 
447 ////////////////////////////////////////////////////////////////////////////////
448 /// return number of background training events in dataset
449 
451 {
452  return GetNClassEvents(Types::kTraining, fdsi->GetClassInfo("Background")->GetNumber() );
453 }
454 
455 ////////////////////////////////////////////////////////////////////////////////
456 /// initialize random or importance sampling
457 
458 void TMVA::DataSet::InitSampling( Float_t fraction, Float_t weight, UInt_t seed )
459 {
460  // add a random generator if not yet present
461  if (fSamplingRandom == 0 ) fSamplingRandom = new TRandom3( seed );
462 
463  // first, clear the lists
464  std::vector< std::pair< Float_t, Long64_t >* > evtList;
465 
466  Int_t treeIdx = TreeIndex( GetCurrentType() );
467 
468  if (fSamplingEventList.size() < UInt_t(treeIdx+1) ) fSamplingEventList.resize(treeIdx+1);
469  if (fSamplingSelected.size() < UInt_t(treeIdx+1) ) fSamplingSelected.resize(treeIdx+1);
470 
471  fSamplingEventList.at(treeIdx).clear();
472  fSamplingSelected.at(treeIdx).clear();
473 
474  if (fSampling.size() < UInt_t(treeIdx+1) ) fSampling.resize(treeIdx+1);
475  if (fSamplingNEvents.size() < UInt_t(treeIdx+1) ) fSamplingNEvents.resize(treeIdx+1);
476  if (fSamplingWeight.size() < UInt_t(treeIdx+1) ) fSamplingWeight.resize(treeIdx+1);
477 
478  if (fraction > 0.999999 || fraction < 0.0000001) {
479  fSampling.at( treeIdx ) = false;
480  fSamplingNEvents.at( treeIdx ) = 0;
481  fSamplingWeight.at( treeIdx ) = 1.0;
482  return;
483  }
484 
485  // for the initialization, the sampling has to be turned off, afterwards we will turn it on
486  fSampling.at( treeIdx ) = false;
487 
488  fSamplingNEvents.at( treeIdx ) = Int_t(fraction*GetNEvents());
489  fSamplingWeight.at( treeIdx ) = weight;
490 
491  Long64_t nEvts = GetNEvents();
492  fSamplingEventList.at( treeIdx ).reserve( nEvts );
493  fSamplingSelected.at( treeIdx ).reserve( fSamplingNEvents.at(treeIdx) );
494  for (Long64_t ievt=0; ievt<nEvts; ievt++) {
495  std::pair<Float_t,Long64_t> p(1.0,ievt);
496  fSamplingEventList.at( treeIdx ).push_back( p );
497  }
498 
499  // now turn sampling on
500  fSampling.at( treeIdx ) = true;
501 }
502 
503 
504 ////////////////////////////////////////////////////////////////////////////////
505 /// create an event sampling (random or importance sampling)
506 
508 {
509  Int_t treeIdx = TreeIndex( GetCurrentType() );
510 
511  if (!fSampling.at(treeIdx) ) return;
512 
513  if (fSamplingRandom == 0 )
514  Log() << kFATAL<< Form("Dataset[%s] : ",fdsi->GetName())
515  << "no random generator present for creating a random/importance sampling (initialized?)" << Endl;
516 
517  // delete the previous selection
518  fSamplingSelected.at(treeIdx).clear();
519 
520  // create a temporary event-list
521  std::vector< std::pair< Float_t, Long64_t > > evtList;
522  std::vector< std::pair< Float_t, Long64_t > >::iterator evtListIt;
523 
524  // some variables
525  Float_t sumWeights = 0;
526 
527  // make a copy of the event-list
528  evtList.assign( fSamplingEventList.at(treeIdx).begin(), fSamplingEventList.at(treeIdx).end() );
529 
530  // sum up all the weights (internal weights for importance sampling)
531  for (evtListIt = evtList.begin(); evtListIt != evtList.end(); ++evtListIt) {
532  sumWeights += (*evtListIt).first;
533  }
534  evtListIt = evtList.begin();
535 
536  // random numbers
537  std::vector< Float_t > rnds;
538  rnds.reserve(fSamplingNEvents.at(treeIdx));
539 
540  Float_t pos = 0;
541  for (Int_t i = 0; i < fSamplingNEvents.at(treeIdx); i++) {
542  pos = fSamplingRandom->Rndm()*sumWeights;
543  rnds.push_back( pos );
544  }
545 
546  // sort the random numbers
547  std::sort(rnds.begin(),rnds.end());
548 
549  // select the events according to the random numbers
550  std::vector< Float_t >::iterator rndsIt = rnds.begin();
551  Float_t runningSum = 0.000000001;
552  for (evtListIt = evtList.begin(); evtListIt != evtList.end();) {
553  runningSum += (*evtListIt).first;
554  if (runningSum >= (*rndsIt)) {
555  fSamplingSelected.at(treeIdx).push_back( (*evtListIt) );
556  evtListIt = evtList.erase( evtListIt );
557 
558  ++rndsIt;
559  if (rndsIt == rnds.end() ) break;
560  }
561  else {
562  ++evtListIt;
563  }
564  }
565 }
566 
567 ////////////////////////////////////////////////////////////////////////////////
568 /// increase the importance sampling weight of the event
569 /// when not successful and decrease it when successful
570 
571 void TMVA::DataSet::EventResult( Bool_t successful, Long64_t evtNumber )
572 {
573 
574  if (!fSampling.at(fCurrentTreeIdx)) return;
575  if (fSamplingWeight.at(fCurrentTreeIdx) > 0.99999999999) return;
576 
577  Long64_t start = 0;
578  Long64_t stop = fSamplingEventList.at(fCurrentTreeIdx).size() -1;
579  if (evtNumber >= 0) {
580  start = evtNumber;
581  stop = evtNumber;
582  }
583  for ( Long64_t iEvt = start; iEvt <= stop; iEvt++ ){
584  if (Long64_t(fSamplingEventList.at(fCurrentTreeIdx).size()) < iEvt) {
585  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "event number (" << iEvt
586  << ") larger than number of sampled events ("
587  << fSamplingEventList.at(fCurrentTreeIdx).size() << " of tree " << fCurrentTreeIdx << ")" << Endl;
588  return;
589  }
590  Float_t weight = fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first;
591  if (!successful) {
592  // weight /= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
593  weight /= fSamplingWeight.at(fCurrentTreeIdx);
594  if (weight > 1.0 ) weight = 1.0;
595  }
596  else {
597  // weight *= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
598  weight *= fSamplingWeight.at(fCurrentTreeIdx);
599  }
600  fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first = weight;
601  }
602 }
603 
604 ////////////////////////////////////////////////////////////////////////////////
605 /// create the test/trainings tree with all the variables, the weights, the
606 /// classes, the targets, the spectators, the MVA outputs
607 
609 {
610  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "GetTree(" << ( type==Types::kTraining ? "training" : "testing" ) << ")" << Endl;
611 
612  // the dataset does not hold the tree, this function returns a new tree every time it is called
613 
614  if (type!=Types::kTraining && type!=Types::kTesting) return 0;
615 
616  Types::ETreeType savedType = GetCurrentType();
617 
618  SetCurrentType(type);
619  const UInt_t t = TreeIndex(type);
620  if (fResults.size() <= t) {
621  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "No results for treetype " << ( type==Types::kTraining ? "training" : "testing" )
622  << " found. Size=" << fResults.size() << Endl;
623  }
624 
625  // return number of background training events in dataset
626  TString treeName( (type == Types::kTraining ? "TrainTree" : "TestTree" ) );
627  TTree *tree = new TTree(treeName,treeName);
628 
629  Float_t *varVals = new Float_t[fdsi->GetNVariables()];
630  Float_t *tgtVals = new Float_t[fdsi->GetNTargets()];
631  Float_t *visVals = new Float_t[fdsi->GetNSpectators()];
632 
633  UInt_t cls;
634  Float_t weight;
635  // TObjString *className = new TObjString();
636  char className[40];
637 
638 
639  //Float_t metVals[fResults.at(t).size()][Int_t(fdsi->GetNTargets()+1)];
640  // replace by: [Joerg]
641  Float_t **metVals = new Float_t*[fResults.at(t).size()];
642  for(UInt_t i=0; i<fResults.at(t).size(); i++ )
643  metVals[i] = new Float_t[fdsi->GetNTargets()+fdsi->GetNClasses()];
644 
645  // create branches for event-variables
646  tree->Branch( "classID", &cls, "classID/I" );
647  tree->Branch( "className", className, "className/C" );
648 
649  // create all branches for the variables
650  Int_t n = 0;
651  Int_t ivar_array = 0;
652  Int_t arraySize = -1;
653  for (std::vector<VariableInfo>::const_iterator itVars = fdsi->GetVariableInfos().begin();
654  itVars != fdsi->GetVariableInfos().end(); ++itVars) {
655 
656  // has to be changed to take care of types different than float: TODO
657  if (!itVars->TestBit(DataSetInfo::kIsArrayVariable) )
658  tree->Branch( (*itVars).GetInternalName(), &varVals[n], (*itVars).GetInternalName()+TString("/F") );
659  else {
660  // variable is an array
661  if (ivar_array == 0) {
662  TString name = (*itVars).GetInternalName();
663  name.ReplaceAll("[0]", "");
664  arraySize = fdsi->GetVarArraySize((*itVars).GetExpression());
665  tree->Branch(name, &varVals[n], name + TString::Format("[%d]/F", arraySize));
666  Log() << kDEBUG << "creating branch for array " << name << " with size " << arraySize << Endl;
667  }
668  ivar_array++;
669  if (ivar_array == arraySize)
670  ivar_array = 0;
671  }
672  n++;
673  }
674  // create the branches for the targets
675  n = 0;
676  for (std::vector<VariableInfo>::const_iterator itTgts = fdsi->GetTargetInfos().begin();
677  itTgts != fdsi->GetTargetInfos().end(); ++itTgts) {
678  // has to be changed to take care of types different than float: TODO
679  tree->Branch( (*itTgts).GetInternalName(), &tgtVals[n], (*itTgts).GetInternalName()+TString("/F") );
680  n++;
681  }
682  // create the branches for the spectator variables
683  n = 0;
684  for (std::vector<VariableInfo>::const_iterator itVis = fdsi->GetSpectatorInfos().begin();
685  itVis != fdsi->GetSpectatorInfos().end(); ++itVis) {
686  // has to be changed to take care of types different than float: TODO
687  tree->Branch( (*itVis).GetInternalName(), &visVals[n], (*itVis).GetInternalName()+TString("/F") );
688  n++;
689  }
690 
691  tree->Branch( "weight", &weight, "weight/F" );
692 
693  // create all the branches for the results
694  n = 0;
695  for (std::map< TString, Results* >::iterator itMethod = fResults.at(t).begin();
696  itMethod != fResults.at(t).end(); ++itMethod) {
697 
698 
699  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "analysis type: " << (itMethod->second->GetAnalysisType()==Types::kRegression ? "Regression" :
700  (itMethod->second->GetAnalysisType()==Types::kMulticlass ? "Multiclass" : "Classification" )) << Endl;
701 
702  if (itMethod->second->GetAnalysisType() == Types::kClassification) {
703  // classification
704  tree->Branch( itMethod->first, &(metVals[n][0]), itMethod->first + "/F" );
705  }
706  else if (itMethod->second->GetAnalysisType() == Types::kMulticlass) {
707  // multiclass classification
708  TString leafList("");
709  for (UInt_t iCls = 0; iCls < fdsi->GetNClasses(); iCls++) {
710  if (iCls > 0) leafList.Append( ":" );
711  leafList.Append( fdsi->GetClassInfo( iCls )->GetName() );
712  leafList.Append( "/F" );
713  }
714  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "itMethod->first " << itMethod->first << " LEAFLIST: "
715  << leafList << " itMethod->second " << itMethod->second << Endl;
716  tree->Branch( itMethod->first, (metVals[n]), leafList );
717  }
718  else if (itMethod->second->GetAnalysisType() == Types::kRegression) {
719  // regression
720  TString leafList("");
721  for (UInt_t iTgt = 0; iTgt < fdsi->GetNTargets(); iTgt++) {
722  if (iTgt > 0) leafList.Append( ":" );
723  leafList.Append( fdsi->GetTargetInfo( iTgt ).GetInternalName() );
724  // leafList.Append( fdsi->GetTargetInfo( iTgt ).GetLabel() );
725  leafList.Append( "/F" );
726  }
727  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "itMethod->first " << itMethod->first << " LEAFLIST: "
728  << leafList << " itMethod->second " << itMethod->second << Endl;
729  tree->Branch( itMethod->first, (metVals[n]), leafList );
730  }
731  else {
732  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "Unknown analysis type for result found when writing TestTree." << Endl;
733  }
734  n++;
735 
736  }
737 
738  // Sanity check, ensure all result sets have the expected number of events
739  for (auto && itMethod : fResults.at(t)) {
740  auto numEvents = GetNEvents(type);
741  auto results = itMethod.second;
742  auto resultsName = itMethod.first;
743 
744  Long64_t numEventsResults = 0;
745  auto analysisType = results->GetAnalysisType();
746  if (analysisType == Types::kClassification) {
747  numEventsResults = dynamic_cast<ResultsClassification *>(results)->GetSize();
748  } else if (analysisType == Types::kMulticlass) {
749  numEventsResults = dynamic_cast<ResultsMulticlass *>(results)->GetSize();
750  } else if (analysisType == Types::kRegression) {
751  numEventsResults = dynamic_cast<ResultsRegression *>(results)->GetSize();
752  } else {
753  Log() << kFATAL << "Unexpected analysisType." << Endl;
754  }
755 
756  if (numEventsResults != numEvents) {
757  Log() << kFATAL << "An error occurred in DataSet::GetTree. "
758  "Inconsistent size of result for result with name '"
759  << resultsName << "'."
760  << " Size is '" << std::to_string(numEventsResults)
761  << "'.'"
762  << " Expected '" << numEvents << "'." << Endl;
763  }
764  }
765 
766  // loop through all the events
767  for (Long64_t iEvt = 0; iEvt < GetNEvents( type ); iEvt++) {
768  // write the event-variables
769  const Event* ev = GetEvent( iEvt );
770  // write the classnumber and the classname
771  cls = ev->GetClass();
772  weight = ev->GetWeight();
773  strlcpy(className, fdsi->GetClassInfo( cls )->GetName(), sizeof(className));
774 
775  // write the variables, targets and spectator variables
776  for (UInt_t ivar = 0; ivar < ev->GetNVariables(); ivar++) varVals[ivar] = ev->GetValue( ivar );
777  for (UInt_t itgt = 0; itgt < ev->GetNTargets(); itgt++) tgtVals[itgt] = ev->GetTarget( itgt );
778  for (UInt_t ivis = 0; ivis < ev->GetNSpectators(); ivis++) visVals[ivis] = ev->GetSpectator( ivis );
779 
780 
781  // loop through all the results and write the branches
782  auto iMethod = 0;
783  for (auto && itMethod : fResults.at(t)) {
784  auto & results = *itMethod.second;
785  auto analysisType = results.GetAnalysisType();
786 
787  auto const & vals = results[iEvt];
788 
789  if (analysisType == Types::kClassification) {
790  metVals[iMethod][0] = vals[0];
791  } else if (analysisType == Types::kMulticlass) {
792  for (UInt_t nCls = 0; nCls < fdsi->GetNClasses(); nCls++) {
793  Float_t val = vals.at(nCls);
794  metVals[iMethod][nCls] = val;
795  }
796  } else if (analysisType == Types::kRegression) {
797  for (UInt_t nTgts = 0; nTgts < fdsi->GetNTargets(); nTgts++) {
798  Float_t val = vals.at(nTgts);
799  metVals[iMethod][nTgts] = val;
800  }
801  }
802  ++iMethod;
803  }
804  // fill the variables into the tree
805  tree->Fill();
806  }
807 
808  Log() << kHEADER //<< Form("[%s] : ",fdsi.GetName())
809  << "Created tree '" << tree->GetName() << "' with " << tree->GetEntries() << " events" << Endl << Endl;
810 
811  SetCurrentType(savedType);
812 
813  delete[] varVals;
814  delete[] tgtVals;
815  delete[] visVals;
816 
817  for(UInt_t i=0; i<fResults.at(t).size(); i++ )
818  delete[] metVals[i];
819  delete[] metVals;
820 
821  return tree;
822 }
823 
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:150
Random number generator class based on M.
Definition: TRandom3.h:27
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
long long Long64_t
Definition: RtypesCore.h:71
Class that is the base-class for a vector of result.
Double_t Log(Double_t x)
Definition: TMath.h:750
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Definition: DataSet.cxx:240
float Float_t
Definition: RtypesCore.h:55
DataSet()
constructor
Definition: DataSet.cxx:90
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition: DataSet.cxx:507
TRandom3 * fSamplingRandom
Definition: DataSet.h:165
EAnalysisType
Definition: Types.h:127
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition: DataSet.cxx:215
void SetTreeType(Types::ETreeType type)
Definition: Results.h:67
Basic string class.
Definition: TString.h:131
int Int_t
Definition: RtypesCore.h:43
std::vector< Char_t > fBlockBelongToTraining
Definition: DataSet.h:176
void ClearNClassEvents(Int_t type)
Definition: DataSet.cxx:159
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:450
Class that contains all the information of a class.
Definition: ClassInfo.h:49
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString...
Definition: TString.cxx:2311
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:231
The TNamed class is the base class for all named ROOT classes.
Definition: TNamed.h:29
virtual ~DataSet()
destructor
Definition: DataSet.cxx:122
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
Definition: DataSet.cxx:608
UInt_t GetClass() const
Definition: Event.h:86
TString & Append(const char *cs)
Definition: TString.h:559
Class that contains all the data information.
Definition: DataSetInfo.h:60
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:381
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:319
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
Definition: DataSet.cxx:414
void ApplyTrainingSetDivision()
apply division of data set
Definition: DataSet.cxx:394
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:264
std::vector< std::vector< Long64_t > > fClassEvents
Definition: DataSet.h:169
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:426
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particular Method instance.
Definition: DataSet.cxx:315
UInt_t GetNSpectators() const
accessor to the number of spectators
Definition: Event.cxx:327
unsigned int UInt_t
Definition: RtypesCore.h:44
char * Form(const char *fmt,...)
void DivideTrainingSet(UInt_t blockNum)
divide training set
Definition: DataSet.cxx:370
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
Definition: DataSet.cxx:188
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:434
void DeleteAllResults(Types::ETreeType type, Types::EAnalysisType analysistype)
Deletes all results currently in the dataset.
Definition: DataSet.cxx:342
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:308
const Bool_t kFALSE
Definition: RtypesCore.h:90
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:236
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:150
std::vector< Char_t > fSampling
Definition: DataSet.h:160
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition: DataSet.cxx:571
std::vector< Float_t > fSamplingWeight
Definition: DataSet.h:162
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:442
int type
Definition: TGX11.cxx:120
Class which takes the results of a multiclass classification.
void SetEventCollection(std::vector< Event *> *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
Definition: DataSet.cxx:249
std::vector< Int_t > fSamplingNEvents
Definition: DataSet.h:161
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:167
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
#define dest(otri, vertexptr)
Definition: triangle.c:1040
Class that is the base-class for a vector of result.
Definition: Results.h:57
Definition: tree.py:1
A TTree represents a columnar dataset.
Definition: TTree.h:78
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:223
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Definition: Event.cxx:261
const Bool_t kTRUE
Definition: RtypesCore.h:89
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition: DataSet.cxx:458
const Int_t n
Definition: legend1.C:16
const Event * GetEvent() const
Definition: DataSet.cxx:201
char name[80]
Definition: TGX11.cxx:109
Class that is the base-class for a vector of result.