Logo ROOT  
Reference Guide
DataSet.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : DataSet *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
17 * *
18 * Copyright (c) 2006: *
19 * CERN, Switzerland *
20 * MPI-K Heidelberg, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27/*! \class TMVA::DataSet
28\ingroup TMVA
29
30Class that contains all the data information
31
32*/
33
34#include <vector>
35#include <algorithm>
36#include <cstdlib>
37#include <stdexcept>
38
39#include "TMVA/DataSetInfo.h"
40#include "TMVA/DataSet.h"
41#include "TMVA/Event.h"
42#include "TMVA/MsgLogger.h"
46#include "TMVA/Configurable.h"
47
48#include "TMVA/Types.h"
49#include "TMVA/Results.h"
50#include "TMVA/VariableInfo.h"
51
52#include "TRandom3.h"
53
54////////////////////////////////////////////////////////////////////////////////
55/// constructor
56
58 :TNamed(dsi.GetName(),"DataSet"),
59 fdsi(&dsi),
60 fEventCollection(4),
61 fCurrentTreeIdx(0),
62 fCurrentEventIdx(0),
63 fHasNegativeEventWeights(kFALSE),
64 fLogger( new MsgLogger(TString(TString("Dataset:")+dsi.GetName()).Data()) ),
65 fTrainingBlockSize(0)
66{
67
68 fClassEvents.resize(4);
69 fBlockBelongToTraining.reserve(10);
71
72 // sampling
74
75 Int_t treeNum = 2;
76 fSampling.resize( treeNum );
77 fSamplingNEvents.resize( treeNum );
78 fSamplingWeight.resize(treeNum);
79
80 for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
81 fSampling.at(treeIdx) = kFALSE;
82 fSamplingNEvents.at(treeIdx) = 0;
83 fSamplingWeight.at(treeIdx) = 1.0;
84 }
85}
86
87////////////////////////////////////////////////////////////////////////////////
88/// constructor
89
91 :fdsi(new DataSetInfo(GetName())),
92 fEventCollection(4),
93 fCurrentTreeIdx(0),
94 fCurrentEventIdx(0),
95 fHasNegativeEventWeights(kFALSE),
96 fLogger( new MsgLogger(TString(TString("Dataset:")+GetName()).Data()) ),
97 fTrainingBlockSize(0)
98{
99
100 fClassEvents.resize(4);
101 fBlockBelongToTraining.reserve(10);
102 fBlockBelongToTraining.push_back(kTRUE);
103
104 // sampling
105 fSamplingRandom = 0;
106
107 Int_t treeNum = 2;
108 fSampling.resize( treeNum );
109 fSamplingNEvents.resize( treeNum );
110 fSamplingWeight.resize(treeNum);
111
112 for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
113 fSampling.at(treeIdx) = kFALSE;
114 fSamplingNEvents.at(treeIdx) = 0;
115 fSamplingWeight.at(treeIdx) = 1.0;
116 }
117}
118
119////////////////////////////////////////////////////////////////////////////////
120/// destructor
121
123{
124 // delete event collection
125 Bool_t deleteEvents=true; // dataset owns the events /JS
126 DestroyCollection( Types::kTraining, deleteEvents );
127 DestroyCollection( Types::kTesting, deleteEvents );
128
129 fBlockBelongToTraining.clear();
130 // delete results
131 for (std::vector< std::map< TString, Results* > >::iterator it = fResults.begin(); it != fResults.end(); ++it) {
132 for (std::map< TString, Results* >::iterator itMap = (*it).begin(); itMap != (*it).end(); ++itMap) {
133 delete itMap->second;
134 }
135 }
136
137 // delete sampling
138 if (fSamplingRandom != 0 ) delete fSamplingRandom;
139
140
141 // need also to delete fEventCollections[2] and [3], not sure if they are used
142 DestroyCollection( Types::kValidation, deleteEvents );
143 DestroyCollection( Types::kTrainingOriginal, deleteEvents );
144
145 delete fLogger;
146}
147
148////////////////////////////////////////////////////////////////////////////////
149
151{
152 if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
153 if (fClassEvents.at( type ).size() < classNumber+1) fClassEvents.at( type ).resize( classNumber+1 );
154 fClassEvents.at( type ).at( classNumber ) += 1;
155}
156
157////////////////////////////////////////////////////////////////////////////////
158
160{
161 if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
162 fClassEvents.at( type ).clear();
163}
164
165////////////////////////////////////////////////////////////////////////////////
166
168{
169 try {
170 return fClassEvents.at(type).at(classNumber);
171 }
172 catch (std::out_of_range &) {
173 ClassInfo* ci = fdsi->GetClassInfo( classNumber );
174 Log() << kFATAL << Form("Dataset[%s] : ",fdsi->GetName()) << "No " << (type==0?"training":(type==1?"testing":"_unknown_type_"))
175 << " events for class " << (ci==NULL?"_no_name_known_":ci->GetName()) << " (index # "<<classNumber<<")"
176 << " available. Check if all class names are spelled correctly and if events are"
177 << " passing the selection cuts." << Endl;
178 }
179 catch (...) {
180 Log() << kFATAL << Form("Dataset[%s] : ",fdsi->GetName()) << "ERROR/CAUGHT : DataSet/GetNClassEvents, .. unknown error" << Endl;
181 }
182 return 0;
183}
184
185////////////////////////////////////////////////////////////////////////////////
186/// destroys the event collection (events + vector)
187
189{
190 UInt_t i = TreeIndex(type);
191 if (i>=fEventCollection.size() || fEventCollection[i].size()==0) return;
192 if (deleteEvents) {
193
194 for (UInt_t j=0; j<fEventCollection[i].size(); j++) delete fEventCollection[i][j];
195 }
196 fEventCollection[i].clear();
197}
198
199////////////////////////////////////////////////////////////////////////////////
200
202{
203 if (fSampling.size() > UInt_t(fCurrentTreeIdx) && fSampling.at(fCurrentTreeIdx)) {
204 Long64_t iEvt = fSamplingSelected.at(fCurrentTreeIdx).at( fCurrentEventIdx ).second;
205 return ((fEventCollection.at(fCurrentTreeIdx))).at(iEvt);
206 }
207 else {
208 return ((fEventCollection.at(fCurrentTreeIdx))).at(fCurrentEventIdx);
209 }
210}
211
212////////////////////////////////////////////////////////////////////////////////
213/// access the number of variables through the datasetinfo
214
216{
217 return fdsi->GetNVariables();
218}
219
220////////////////////////////////////////////////////////////////////////////////
221/// access the number of targets through the datasetinfo
222
224{
225 return fdsi->GetNTargets();
226}
227
228////////////////////////////////////////////////////////////////////////////////
229/// access the number of targets through the datasetinfo
230
232{
233 return fdsi->GetNSpectators();
234}
235
236////////////////////////////////////////////////////////////////////////////////
237/// add event to event list
238/// after which the event is owned by the dataset
239
241{
242 fEventCollection.at(Int_t(type)).push_back(ev);
243 if (ev->GetWeight()<0) fHasNegativeEventWeights = kTRUE;
244}
245
246////////////////////////////////////////////////////////////////////////////////
247/// Sets the event collection (by DataSetFactory)
248
249void TMVA::DataSet::SetEventCollection(std::vector<TMVA::Event*>* events, Types::ETreeType type, Bool_t deleteEvents)
250{
251 DestroyCollection(type,deleteEvents);
252
253 const Int_t t = TreeIndex(type);
254 ClearNClassEvents( type );
255 //pointer to std::vector is not serializable,
256 fEventCollection.at(t) = *events;
257 for (std::vector<Event*>::iterator it = fEventCollection.at(t).begin(); it < fEventCollection.at(t).end(); ++it) {
258 IncrementNClassEvents( t, (*it)->GetClass() );
259 }
260}
261
262////////////////////////////////////////////////////////////////////////////////
263
266 Types::EAnalysisType analysistype )
267{
268 UInt_t t = TreeIndex(type);
269 if (t<fResults.size()) {
270 const std::map< TString, Results* >& resultsForType = fResults[t];
271 std::map< TString, Results* >::const_iterator it = resultsForType.find(resultsName);
272 if (it!=resultsForType.end()) {
273 //Log() << kINFO << " GetResults("<<info<<") returns existing result." << Endl;
274 return it->second;
275 }
276 }
277 else {
278 fResults.resize(t+1);
279 }
280
281 // nothing found
282
283 Results * newresults = 0;
284 switch(analysistype) {
286 newresults = new ResultsClassification(fdsi,resultsName);
287 break;
289 newresults = new ResultsRegression(fdsi,resultsName);
290 break;
292 newresults = new ResultsMulticlass(fdsi,resultsName);
293 break;
295 newresults = new ResultsClassification(fdsi,resultsName);
296 break;
298 //Log() << kINFO << " GetResults("<<info<<") can't create new one." << Endl;
299 return 0;
300 break;
301 }
302
303 newresults->SetTreeType( type );
304 fResults[t][resultsName] = newresults;
305
306 //Log() << kINFO << " GetResults("<<info<<") builds new result." << Endl;
307 return newresults;
308}
309////////////////////////////////////////////////////////////////////////////////
310/// delete the results stored for this particular Method instance.
311/// (here apparently called resultsName instead of MethodTitle
312/// Tree type (Training, testing etc..)
313/// Analysis Type (Classification, Multiclass, Regression etc..)
314
315void TMVA::DataSet::DeleteResults( const TString & resultsName,
317 Types::EAnalysisType /* analysistype */ )
318{
319 if (fResults.empty()) return;
320
321 if (UInt_t(type) > fResults.size()){
322 Log()<<kFATAL<< Form("Dataset[%s] : ",fdsi->GetName()) << "you asked for an Treetype (training/testing/...)"
323 << " whose index " << type << " does not exist " << Endl;
324 }
325 std::map< TString, Results* >& resultsForType = fResults[UInt_t(type)];
326 std::map< TString, Results* >::iterator it = resultsForType.find(resultsName);
327 if (it!=resultsForType.end()) {
328 Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << " Delete Results previous existing result:" << resultsName
329 << " of type " << type << Endl;
330 delete it->second;
331 resultsForType.erase(it->first);
332 }
333 else {
334 Log() << kINFO << Form("Dataset[%s] : ",fdsi->GetName()) << "could not fine Result class of " << resultsName
335 << " of type " << type << " which I should have deleted" << Endl;
336 }
337}
338
339////////////////////////////////////////////////////////////////////////////////
340/// Deletes all results currently in the dataset.
341///
343 Types::EAnalysisType /* analysistype */ )
344{
345 if (fResults.empty()) return;
346
347 if (UInt_t(type) > fResults.size()){
348 Log()<<kFATAL<< Form("Dataset[%s] : ",fdsi->GetName()) << "you asked for an Treetype (training/testing/...)"
349 << " whose index " << type << " does not exist " << Endl;
350 }
351
352 std::map<TString, Results *> & resultsForType = fResults[UInt_t(type)];
353
354 for (auto && it : resultsForType) {
355 auto & resultsName = it.first;
356
357 Log() << kDEBUG << Form("Dataset[%s] : ", fdsi->GetName())
358 << " DeleteAllResults previous existing result: "
359 << resultsName << " of type " << type << Endl;
360
361 delete it.second;
362 }
363
364 resultsForType.clear();
365}
366
367////////////////////////////////////////////////////////////////////////////////
368/// divide training set
369
371{
372 Int_t tOrg = TreeIndex(Types::kTrainingOriginal),tTrn = TreeIndex(Types::kTraining);
373 // not changing anything ??
374 if (fBlockBelongToTraining.size() == blockNum) return;
375 // storing the original training vector
376 if (fBlockBelongToTraining.size() == 1) {
377 if (fEventCollection[tOrg].size() == 0)
378 fEventCollection[tOrg].resize(fEventCollection[tTrn].size());
379 fEventCollection[tOrg].clear();
380 for (UInt_t i=0; i<fEventCollection[tTrn].size(); i++)
381 fEventCollection[tOrg].push_back(fEventCollection[tTrn][i]);
382 fClassEvents[tOrg] = fClassEvents[tTrn];
383 }
384 //reseting the event division vector
385 fBlockBelongToTraining.clear();
386 for (UInt_t i=0 ; i < blockNum ; i++) fBlockBelongToTraining.push_back(kTRUE);
387
388 ApplyTrainingSetDivision();
389}
390
391////////////////////////////////////////////////////////////////////////////////
392/// apply division of data set
393
395{
396 Int_t tOrg = TreeIndex(Types::kTrainingOriginal), tTrn = TreeIndex(Types::kTraining), tVld = TreeIndex(Types::kValidation);
397 fEventCollection[tTrn].clear();
398 if (fEventCollection[tVld].size()==0)
399 fEventCollection[tVld].resize(fEventCollection[tOrg].size());
400 fEventCollection[tVld].clear();
401
402 //creating the new events collections, notice that the events that can't be evenly divided belong to the last event
403 for (UInt_t i=0; i<fEventCollection[tOrg].size(); i++) {
404 if (fBlockBelongToTraining[i % fBlockBelongToTraining.size()])
405 fEventCollection[tTrn].push_back(fEventCollection[tOrg][i]);
406 else
407 fEventCollection[tVld].push_back(fEventCollection[tOrg][i]);
408 }
409}
410
411////////////////////////////////////////////////////////////////////////////////
412/// move training block
413
415{
417 fBlockBelongToTraining[blockInd]=kFALSE;
418 else
419 fBlockBelongToTraining[blockInd]=kTRUE;
420 if (applyChanges) ApplyTrainingSetDivision();
421}
422
423////////////////////////////////////////////////////////////////////////////////
424/// return number of signal test events in dataset
425
427{
428 return GetNClassEvents(Types::kTesting, fdsi->GetClassInfo("Signal")->GetNumber() );
429}
430
431////////////////////////////////////////////////////////////////////////////////
432/// return number of background test events in dataset
433
435{
436 return GetNClassEvents(Types::kTesting, fdsi->GetClassInfo("Background")->GetNumber() );
437}
438
439////////////////////////////////////////////////////////////////////////////////
440/// return number of signal training events in dataset
441
443{
444 return GetNClassEvents(Types::kTraining, fdsi->GetClassInfo("Signal")->GetNumber() );
445}
446
447////////////////////////////////////////////////////////////////////////////////
448/// return number of background training events in dataset
449
451{
452 return GetNClassEvents(Types::kTraining, fdsi->GetClassInfo("Background")->GetNumber() );
453}
454
455////////////////////////////////////////////////////////////////////////////////
456/// initialize random or importance sampling
457
458void TMVA::DataSet::InitSampling( Float_t fraction, Float_t weight, UInt_t seed )
459{
460 // add a random generator if not yet present
461 if (fSamplingRandom == 0 ) fSamplingRandom = new TRandom3( seed );
462
463 // first, clear the lists
464 std::vector< std::pair< Float_t, Long64_t >* > evtList;
465
466 Int_t treeIdx = TreeIndex( GetCurrentType() );
467
468 if (fSamplingEventList.size() < UInt_t(treeIdx+1) ) fSamplingEventList.resize(treeIdx+1);
469 if (fSamplingSelected.size() < UInt_t(treeIdx+1) ) fSamplingSelected.resize(treeIdx+1);
470
471 fSamplingEventList.at(treeIdx).clear();
472 fSamplingSelected.at(treeIdx).clear();
473
474 if (fSampling.size() < UInt_t(treeIdx+1) ) fSampling.resize(treeIdx+1);
475 if (fSamplingNEvents.size() < UInt_t(treeIdx+1) ) fSamplingNEvents.resize(treeIdx+1);
476 if (fSamplingWeight.size() < UInt_t(treeIdx+1) ) fSamplingWeight.resize(treeIdx+1);
477
478 if (fraction > 0.999999 || fraction < 0.0000001) {
479 fSampling.at( treeIdx ) = false;
480 fSamplingNEvents.at( treeIdx ) = 0;
481 fSamplingWeight.at( treeIdx ) = 1.0;
482 return;
483 }
484
485 // for the initialization, the sampling has to be turned off, afterwards we will turn it on
486 fSampling.at( treeIdx ) = false;
487
488 fSamplingNEvents.at( treeIdx ) = Int_t(fraction*GetNEvents());
489 fSamplingWeight.at( treeIdx ) = weight;
490
491 Long64_t nEvts = GetNEvents();
492 fSamplingEventList.at( treeIdx ).reserve( nEvts );
493 fSamplingSelected.at( treeIdx ).reserve( fSamplingNEvents.at(treeIdx) );
494 for (Long64_t ievt=0; ievt<nEvts; ievt++) {
495 std::pair<Float_t,Long64_t> p(1.0,ievt);
496 fSamplingEventList.at( treeIdx ).push_back( p );
497 }
498
499 // now turn sampling on
500 fSampling.at( treeIdx ) = true;
501}
502
503
504////////////////////////////////////////////////////////////////////////////////
505/// create an event sampling (random or importance sampling)
506
508{
509 Int_t treeIdx = TreeIndex( GetCurrentType() );
510
511 if (!fSampling.at(treeIdx) ) return;
512
513 if (fSamplingRandom == 0 )
514 Log() << kFATAL<< Form("Dataset[%s] : ",fdsi->GetName())
515 << "no random generator present for creating a random/importance sampling (initialized?)" << Endl;
516
517 // delete the previous selection
518 fSamplingSelected.at(treeIdx).clear();
519
520 // create a temporary event-list
521 std::vector< std::pair< Float_t, Long64_t > > evtList;
522 std::vector< std::pair< Float_t, Long64_t > >::iterator evtListIt;
523
524 // some variables
525 Float_t sumWeights = 0;
526
527 // make a copy of the event-list
528 evtList.assign( fSamplingEventList.at(treeIdx).begin(), fSamplingEventList.at(treeIdx).end() );
529
530 // sum up all the weights (internal weights for importance sampling)
531 for (evtListIt = evtList.begin(); evtListIt != evtList.end(); ++evtListIt) {
532 sumWeights += (*evtListIt).first;
533 }
534 evtListIt = evtList.begin();
535
536 // random numbers
537 std::vector< Float_t > rnds;
538 rnds.reserve(fSamplingNEvents.at(treeIdx));
539
540 Float_t pos = 0;
541 for (Int_t i = 0; i < fSamplingNEvents.at(treeIdx); i++) {
542 pos = fSamplingRandom->Rndm()*sumWeights;
543 rnds.push_back( pos );
544 }
545
546 // sort the random numbers
547 std::sort(rnds.begin(),rnds.end());
548
549 // select the events according to the random numbers
550 std::vector< Float_t >::iterator rndsIt = rnds.begin();
551 Float_t runningSum = 0.000000001;
552 for (evtListIt = evtList.begin(); evtListIt != evtList.end();) {
553 runningSum += (*evtListIt).first;
554 if (runningSum >= (*rndsIt)) {
555 fSamplingSelected.at(treeIdx).push_back( (*evtListIt) );
556 evtListIt = evtList.erase( evtListIt );
557
558 ++rndsIt;
559 if (rndsIt == rnds.end() ) break;
560 }
561 else {
562 ++evtListIt;
563 }
564 }
565}
566
567////////////////////////////////////////////////////////////////////////////////
568/// increase the importance sampling weight of the event
569/// when not successful and decrease it when successful
570
571void TMVA::DataSet::EventResult( Bool_t successful, Long64_t evtNumber )
572{
573
574 if (!fSampling.at(fCurrentTreeIdx)) return;
575 if (fSamplingWeight.at(fCurrentTreeIdx) > 0.99999999999) return;
576
577 Long64_t start = 0;
578 Long64_t stop = fSamplingEventList.at(fCurrentTreeIdx).size() -1;
579 if (evtNumber >= 0) {
580 start = evtNumber;
581 stop = evtNumber;
582 }
583 for ( Long64_t iEvt = start; iEvt <= stop; iEvt++ ){
584 if (Long64_t(fSamplingEventList.at(fCurrentTreeIdx).size()) < iEvt) {
585 Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "event number (" << iEvt
586 << ") larger than number of sampled events ("
587 << fSamplingEventList.at(fCurrentTreeIdx).size() << " of tree " << fCurrentTreeIdx << ")" << Endl;
588 return;
589 }
590 Float_t weight = fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first;
591 if (!successful) {
592 // weight /= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
593 weight /= fSamplingWeight.at(fCurrentTreeIdx);
594 if (weight > 1.0 ) weight = 1.0;
595 }
596 else {
597 // weight *= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
598 weight *= fSamplingWeight.at(fCurrentTreeIdx);
599 }
600 fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first = weight;
601 }
602}
603
604////////////////////////////////////////////////////////////////////////////////
605/// create the test/trainings tree with all the variables, the weights, the
606/// classes, the targets, the spectators, the MVA outputs
607
609{
610 Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "GetTree(" << ( type==Types::kTraining ? "training" : "testing" ) << ")" << Endl;
611
612 // the dataset does not hold the tree, this function returns a new tree every time it is called
613
614 if (type!=Types::kTraining && type!=Types::kTesting) return 0;
615
616 Types::ETreeType savedType = GetCurrentType();
617
618 SetCurrentType(type);
619 const UInt_t t = TreeIndex(type);
620 if (fResults.size() <= t) {
621 Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "No results for treetype " << ( type==Types::kTraining ? "training" : "testing" )
622 << " found. Size=" << fResults.size() << Endl;
623 }
624
625 // return number of background training events in dataset
626 TString treeName( (type == Types::kTraining ? "TrainTree" : "TestTree" ) );
627 TTree *tree = new TTree(treeName,treeName);
628
629 Float_t *varVals = new Float_t[fdsi->GetNVariables()];
630 Float_t *tgtVals = new Float_t[fdsi->GetNTargets()];
631 Float_t *visVals = new Float_t[fdsi->GetNSpectators()];
632
633 UInt_t cls;
634 Float_t weight;
635 // TObjString *className = new TObjString();
636 char className[40];
637
638
639 //Float_t metVals[fResults.at(t).size()][Int_t(fdsi->GetNTargets()+1)];
640 // replace by: [Joerg]
641 Float_t **metVals = new Float_t*[fResults.at(t).size()];
642 for(UInt_t i=0; i<fResults.at(t).size(); i++ )
643 metVals[i] = new Float_t[fdsi->GetNTargets()+fdsi->GetNClasses()];
644
645 // create branches for event-variables
646 tree->Branch( "classID", &cls, "classID/I" );
647 tree->Branch( "className", className, "className/C" );
648
649 // create all branches for the variables
650 Int_t n = 0;
651 Int_t ivar_array = 0;
652 Int_t arraySize = -1;
653 for (std::vector<VariableInfo>::const_iterator itVars = fdsi->GetVariableInfos().begin();
654 itVars != fdsi->GetVariableInfos().end(); ++itVars) {
655
656 // has to be changed to take care of types different than float: TODO
657 if (!itVars->TestBit(DataSetInfo::kIsArrayVariable) )
658 tree->Branch( (*itVars).GetInternalName(), &varVals[n], (*itVars).GetInternalName()+TString("/F") );
659 else {
660 // variable is an array
661 if (ivar_array == 0) {
662 TString name = (*itVars).GetInternalName();
663 name.ReplaceAll("[0]", "");
664 arraySize = fdsi->GetVarArraySize((*itVars).GetExpression());
665 tree->Branch(name, &varVals[n], name + TString::Format("[%d]/F", arraySize));
666 Log() << kDEBUG << "creating branch for array " << name << " with size " << arraySize << Endl;
667 }
668 ivar_array++;
669 if (ivar_array == arraySize)
670 ivar_array = 0;
671 }
672 n++;
673 }
674 // create the branches for the targets
675 n = 0;
676 for (std::vector<VariableInfo>::const_iterator itTgts = fdsi->GetTargetInfos().begin();
677 itTgts != fdsi->GetTargetInfos().end(); ++itTgts) {
678 // has to be changed to take care of types different than float: TODO
679 tree->Branch( (*itTgts).GetInternalName(), &tgtVals[n], (*itTgts).GetInternalName()+TString("/F") );
680 n++;
681 }
682 // create the branches for the spectator variables
683 n = 0;
684 for (std::vector<VariableInfo>::const_iterator itVis = fdsi->GetSpectatorInfos().begin();
685 itVis != fdsi->GetSpectatorInfos().end(); ++itVis) {
686 // has to be changed to take care of types different than float: TODO
687 tree->Branch( (*itVis).GetInternalName(), &visVals[n], (*itVis).GetInternalName()+TString("/F") );
688 n++;
689 }
690
691 tree->Branch( "weight", &weight, "weight/F" );
692
693 // create all the branches for the results
694 n = 0;
695 for (std::map< TString, Results* >::iterator itMethod = fResults.at(t).begin();
696 itMethod != fResults.at(t).end(); ++itMethod) {
697
698
699 Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "analysis type: " << (itMethod->second->GetAnalysisType()==Types::kRegression ? "Regression" :
700 (itMethod->second->GetAnalysisType()==Types::kMulticlass ? "Multiclass" : "Classification" )) << Endl;
701
702 if (itMethod->second->GetAnalysisType() == Types::kClassification) {
703 // classification
704 tree->Branch( itMethod->first, &(metVals[n][0]), itMethod->first + "/F" );
705 }
706 else if (itMethod->second->GetAnalysisType() == Types::kMulticlass) {
707 // multiclass classification
708 TString leafList("");
709 for (UInt_t iCls = 0; iCls < fdsi->GetNClasses(); iCls++) {
710 if (iCls > 0) leafList.Append( ":" );
711 leafList.Append( fdsi->GetClassInfo( iCls )->GetName() );
712 leafList.Append( "/F" );
713 }
714 Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "itMethod->first " << itMethod->first << " LEAFLIST: "
715 << leafList << " itMethod->second " << itMethod->second << Endl;
716 tree->Branch( itMethod->first, (metVals[n]), leafList );
717 }
718 else if (itMethod->second->GetAnalysisType() == Types::kRegression) {
719 // regression
720 TString leafList("");
721 for (UInt_t iTgt = 0; iTgt < fdsi->GetNTargets(); iTgt++) {
722 if (iTgt > 0) leafList.Append( ":" );
723 leafList.Append( fdsi->GetTargetInfo( iTgt ).GetInternalName() );
724 // leafList.Append( fdsi->GetTargetInfo( iTgt ).GetLabel() );
725 leafList.Append( "/F" );
726 }
727 Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "itMethod->first " << itMethod->first << " LEAFLIST: "
728 << leafList << " itMethod->second " << itMethod->second << Endl;
729 tree->Branch( itMethod->first, (metVals[n]), leafList );
730 }
731 else {
732 Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "Unknown analysis type for result found when writing TestTree." << Endl;
733 }
734 n++;
735
736 }
737
738 // Sanity check, ensure all result sets have the expected number of events
739 for (auto && itMethod : fResults.at(t)) {
740 auto numEvents = GetNEvents(type);
741 auto results = itMethod.second;
742 auto resultsName = itMethod.first;
743
744 Long64_t numEventsResults = 0;
745 auto analysisType = results->GetAnalysisType();
746 if (analysisType == Types::kClassification) {
747 numEventsResults = dynamic_cast<ResultsClassification *>(results)->GetSize();
748 } else if (analysisType == Types::kMulticlass) {
749 numEventsResults = dynamic_cast<ResultsMulticlass *>(results)->GetSize();
750 } else if (analysisType == Types::kRegression) {
751 numEventsResults = dynamic_cast<ResultsRegression *>(results)->GetSize();
752 } else {
753 Log() << kFATAL << "Unexpected analysisType." << Endl;
754 }
755
756 if (numEventsResults != numEvents) {
757 Log() << kFATAL << "An error occurred in DataSet::GetTree. "
758 "Inconsistent size of result for result with name '"
759 << resultsName << "'."
760 << " Size is '" << std::to_string(numEventsResults)
761 << "'.'"
762 << " Expected '" << numEvents << "'." << Endl;
763 }
764 }
765
766 // loop through all the events
767 for (Long64_t iEvt = 0; iEvt < GetNEvents( type ); iEvt++) {
768 // write the event-variables
769 const Event* ev = GetEvent( iEvt );
770 // write the classnumber and the classname
771 cls = ev->GetClass();
772 weight = ev->GetWeight();
773 strlcpy(className, fdsi->GetClassInfo( cls )->GetName(), sizeof(className));
774
775 // write the variables, targets and spectator variables
776 for (UInt_t ivar = 0; ivar < ev->GetNVariables(); ivar++) varVals[ivar] = ev->GetValue( ivar );
777 for (UInt_t itgt = 0; itgt < ev->GetNTargets(); itgt++) tgtVals[itgt] = ev->GetTarget( itgt );
778 for (UInt_t ivis = 0; ivis < ev->GetNSpectators(); ivis++) visVals[ivis] = ev->GetSpectator( ivis );
779
780
781 // loop through all the results and write the branches
782 auto iMethod = 0;
783 for (auto && itMethod : fResults.at(t)) {
784 auto & results = *itMethod.second;
785 auto analysisType = results.GetAnalysisType();
786
787 auto const & vals = results[iEvt];
788
789 if (analysisType == Types::kClassification) {
790 metVals[iMethod][0] = vals[0];
791 } else if (analysisType == Types::kMulticlass) {
792 for (UInt_t nCls = 0; nCls < fdsi->GetNClasses(); nCls++) {
793 Float_t val = vals.at(nCls);
794 metVals[iMethod][nCls] = val;
795 }
796 } else if (analysisType == Types::kRegression) {
797 for (UInt_t nTgts = 0; nTgts < fdsi->GetNTargets(); nTgts++) {
798 Float_t val = vals.at(nTgts);
799 metVals[iMethod][nTgts] = val;
800 }
801 }
802 ++iMethod;
803 }
804 // fill the variables into the tree
805 tree->Fill();
806 }
807
808 Log() << kHEADER //<< Form("[%s] : ",fdsi.GetName())
809 << "Created tree '" << tree->GetName() << "' with " << tree->GetEntries() << " events" << Endl << Endl;
810
811 SetCurrentType(savedType);
812
813 delete[] varVals;
814 delete[] tgtVals;
815 delete[] visVals;
816
817 for(UInt_t i=0; i<fResults.at(t).size(); i++ )
818 delete[] metVals[i];
819 delete[] metVals;
820
821 return tree;
822}
823
int Int_t
Definition: RtypesCore.h:43
unsigned int UInt_t
Definition: RtypesCore.h:44
const Bool_t kFALSE
Definition: RtypesCore.h:90
long long Long64_t
Definition: RtypesCore.h:71
float Float_t
Definition: RtypesCore.h:55
const Bool_t kTRUE
Definition: RtypesCore.h:89
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
char * Form(const char *fmt,...)
Class that contains all the information of a class.
Definition: ClassInfo.h:49
Class that contains all the data information.
Definition: DataSetInfo.h:60
void DivideTrainingSet(UInt_t blockNum)
divide training set
Definition: DataSet.cxx:370
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Definition: DataSet.cxx:240
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:426
std::vector< Char_t > fSampling
Definition: DataSet.h:160
std::vector< Float_t > fSamplingWeight
Definition: DataSet.h:162
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:223
void ClearNClassEvents(Int_t type)
Definition: DataSet.cxx:159
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:442
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition: DataSet.cxx:571
void SetEventCollection(std::vector< Event * > *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
Definition: DataSet.cxx:249
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets,...
Definition: DataSet.cxx:608
const Event * GetEvent() const
Definition: DataSet.cxx:201
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:264
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:167
std::vector< Char_t > fBlockBelongToTraining
Definition: DataSet.h:176
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:231
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
Definition: DataSet.cxx:414
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition: DataSet.cxx:215
std::vector< Int_t > fSamplingNEvents
Definition: DataSet.h:161
DataSet()
constructor
Definition: DataSet.cxx:90
virtual ~DataSet()
destructor
Definition: DataSet.cxx:122
std::vector< std::vector< Long64_t > > fClassEvents
Definition: DataSet.h:169
void DeleteAllResults(Types::ETreeType type, Types::EAnalysisType analysistype)
Deletes all results currently in the dataset.
Definition: DataSet.cxx:342
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition: DataSet.cxx:458
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:150
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particular Method instance.
Definition: DataSet.cxx:315
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition: DataSet.cxx:507
TRandom3 * fSamplingRandom
Definition: DataSet.h:165
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:450
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
Definition: DataSet.cxx:188
void ApplyTrainingSetDivision()
apply division of data set
Definition: DataSet.cxx:394
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:434
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:236
UInt_t GetNSpectators() const
accessor to the number of spectators
Definition: Event.cxx:327
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:308
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:319
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition: Event.cxx:381
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Definition: Event.cxx:261
UInt_t GetClass() const
Definition: Event.h:86
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Class that is the base-class for a vector of result.
Definition: Results.h:57
void SetTreeType(Types::ETreeType type)
Definition: Results.h:67
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kNoAnalysisType
Definition: Types.h:131
@ kClassification
Definition: Types.h:128
@ kMaxAnalysisType
Definition: Types.h:132
@ kRegression
Definition: Types.h:129
@ kTrainingOriginal
Definition: Types.h:148
@ kTraining
Definition: Types.h:144
@ kValidation
Definition: Types.h:147
@ kTesting
Definition: Types.h:145
The TNamed class is the base class for all named ROOT classes.
Definition: TNamed.h:29
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Random number generator class based on M.
Definition: TRandom3.h:27
Basic string class.
Definition: TString.h:131
TString & Append(const char *cs)
Definition: TString.h:559
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2311
A TTree represents a columnar dataset.
Definition: TTree.h:78
const Int_t n
Definition: legend1.C:16
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Log(Double_t x)
Definition: TMath.h:750
Definition: tree.py:1
#define dest(otri, vertexptr)
Definition: triangle.c:1040