Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
DataSetFactory.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Helge Voss
3
4/*****************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : DataSetFactory *
8 * *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - MSU, USA *
17 * Eckhard von Toerne <evt@physik.uni-bonn.de> - U. of Bonn, Germany *
18 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19 * *
20 * Copyright (c) 2009: *
21 * CERN, Switzerland *
22 * MPI-K Heidelberg, Germany *
23 * U. of Bonn, Germany *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (see tmva/doc/LICENSE) *
27 *****************************************************************************/
28
29/*! \class TMVA::DataSetFactory
30\ingroup TMVA
31
32Class that contains all the data information
33
34*/
35
36#include <assert.h>
37
38#include <map>
39#include <vector>
40#include <iomanip>
41#include <iostream>
42
43#include <algorithm>
44#include <functional>
45#include <numeric>
46#include <random>
47
48#include "TMVA/DataSetFactory.h"
49
50#include "TEventList.h"
51#include "TFile.h"
52#include "TRandom3.h"
53#include "TMatrixF.h"
54#include "TVectorF.h"
55#include "TMath.h"
56#include "TTree.h"
57#include "TBranch.h"
58
59#include "TMVA/MsgLogger.h"
60#include "TMVA/Configurable.h"
64#include "TMVA/DataSet.h"
65#include "TMVA/DataSetInfo.h"
67#include "TMVA/Event.h"
68
69#include "TMVA/Tools.h"
70#include "TMVA/Types.h"
71#include "TMVA/VariableInfo.h"
72
73using std::setiosflags, std::ios;
74
75//TMVA::DataSetFactory* TMVA::DataSetFactory::fgInstance = 0;
76
77namespace TMVA {
78 // calculate the largest common divider
79 // this function is not happy if numbers are negative!
81 {
82 if (a<b) {Int_t tmp = a; a=b; b=tmp; } // achieve a>=b
83 if (b==0) return a;
84 Int_t fullFits = a/b;
85 return LargestCommonDivider(b,a-b*fullFits);
86 }
87}
88
89
90////////////////////////////////////////////////////////////////////////////////
91/// constructor
92
94 fVerbose(kFALSE),
95 fVerboseLevel(TString("Info")),
96 fScaleWithPreselEff(0),
97 fCurrentTree(0),
98 fCurrentEvtIdx(0),
99 fInputFormulas(0),
100 fLogger( new MsgLogger("DataSetFactory", kINFO) )
101{
102}
103
104////////////////////////////////////////////////////////////////////////////////
105/// destructor
106
108{
109 std::vector<TTreeFormula*>::const_iterator formIt;
110
111 for (formIt = fInputFormulas.begin() ; formIt!=fInputFormulas.end() ; ++formIt) if (*formIt) delete *formIt;
112 for (formIt = fTargetFormulas.begin() ; formIt!=fTargetFormulas.end() ; ++formIt) if (*formIt) delete *formIt;
113 for (formIt = fCutFormulas.begin() ; formIt!=fCutFormulas.end() ; ++formIt) if (*formIt) delete *formIt;
114 for (formIt = fWeightFormula.begin() ; formIt!=fWeightFormula.end() ; ++formIt) if (*formIt) delete *formIt;
115 for (formIt = fSpectatorFormulas.begin(); formIt!=fSpectatorFormulas.end(); ++formIt) if (*formIt) delete *formIt;
116
117 delete fLogger;
118}
119
120////////////////////////////////////////////////////////////////////////////////
121/// steering the creation of a new dataset
122
124 TMVA::DataInputHandler& dataInput )
125{
126 // build the first dataset from the data input
127 DataSet * ds = BuildInitialDataSet( dsi, dataInput );
128
129 if (ds->GetNEvents() > 1 && fComputeCorrelations ) {
130 CalcMinMax(ds,dsi);
131
132 // from the final dataset build the correlation matrix
133 for (UInt_t cl = 0; cl< dsi.GetNClasses(); cl++) {
134 const TString className = dsi.GetClassInfo(cl)->GetName();
135 dsi.SetCorrelationMatrix( className, CalcCorrelationMatrix( ds, cl ) );
136 if (fCorrelations) {
137 dsi.PrintCorrelationMatrix(className);
138 }
139 }
140 //Log() << kHEADER << Endl;
141 Log() << kHEADER << Form("[%s] : ",dsi.GetName()) << " " << Endl << Endl;
142 }
143
144 return ds;
145}
146
147////////////////////////////////////////////////////////////////////////////////
148
150{
151 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName()) << "Build DataSet consisting of one Event with dynamically changing variables" << Endl;
152 DataSet* ds = new DataSet(dsi);
153
154 // create a DataSet with one Event which uses dynamic variables
155 // (pointers to variables)
156 if(dsi.GetNClasses()==0){
157 dsi.AddClass( "data" );
158 dsi.GetClassInfo( "data" )->SetNumber(0);
159 }
160
161 std::vector<Float_t*>* evdyn = new std::vector<Float_t*>(0);
162
163 std::vector<VariableInfo>& varinfos = dsi.GetVariableInfos();
164
165 if (varinfos.empty())
166 Log() << kFATAL << Form("Dataset[%s] : ",dsi.GetName()) << "Dynamic data set cannot be built, since no variable informations are present. Apparently no variables have been set. This should not happen, please contact the TMVA authors." << Endl;
167
168 std::vector<VariableInfo>::iterator it = varinfos.begin(), itEnd=varinfos.end();
169 for (;it!=itEnd;++it) {
170 Float_t* external=(Float_t*)(*it).GetExternalLink();
171 if (external==0)
172 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName()) << "The link to the external variable is NULL while I am trying to build a dynamic data set. In this case fTmpEvent from MethodBase HAS TO BE USED in the method to get useful values in variables." << Endl;
173 else evdyn->push_back (external);
174 }
175
176 std::vector<VariableInfo>& spectatorinfos = dsi.GetSpectatorInfos();
177 std::vector<char> spectatorTypes;
178 spectatorTypes.reserve(spectatorinfos.size());
179 for (auto &&info: spectatorinfos) {
180 evdyn->push_back( (Float_t*)info.GetExternalLink() );
181 spectatorTypes.push_back(info.GetVarType());
182 }
183
184 TMVA::Event * ev = new Event((const std::vector<Float_t*>*&)evdyn, varinfos.size());
185 ev->SetSpectatorTypes(spectatorTypes);
186 std::vector<Event *> *newEventVector = new std::vector<Event *>;
187 newEventVector->push_back(ev);
188
189 ds->SetEventCollection(newEventVector, Types::kTraining);
191 ds->SetCurrentEvent( 0 );
192
193 delete newEventVector;
194 return ds;
195}
196
197////////////////////////////////////////////////////////////////////////////////
198/// if no entries, than create a DataSet with one Event which uses
199/// dynamic variables (pointers to variables)
200
203 DataInputHandler& dataInput )
204{
205 if (dataInput.GetEntries()==0) return BuildDynamicDataSet( dsi );
206 // -------------------------------------------------------------------------
207
208 // register the classes in the datasetinfo-object
209 // information comes from the trees in the dataInputHandler-object
210 std::vector< TString >* classList = dataInput.GetClassList();
211 for (std::vector<TString>::iterator it = classList->begin(); it< classList->end(); ++it) {
212 dsi.AddClass( (*it) );
213 }
214 delete classList;
215
216 EvtStatsPerClass eventCounts(dsi.GetNClasses());
217 TString normMode;
218 TString splitMode;
219 TString mixMode;
220 UInt_t splitSeed;
221
222 InitOptions( dsi, eventCounts, normMode, splitSeed, splitMode , mixMode );
223 // ======= build event-vector from input, apply preselection ===============
224 EventVectorOfClassesOfTreeType tmpEventVector;
225 BuildEventVector( dsi, dataInput, tmpEventVector, eventCounts );
226
227 DataSet* ds = MixEvents( dsi, tmpEventVector, eventCounts,
228 splitMode, mixMode, normMode, splitSeed );
229
230 const Bool_t showCollectedOutput = kFALSE;
231 if (showCollectedOutput) {
232 Int_t maxL = dsi.GetClassNameMaxLength();
233 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << "Collected:" << Endl;
234 for (UInt_t cl = 0; cl < dsi.GetNClasses(); cl++) {
235 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << " "
236 << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName()
237 << " training entries: " << ds->GetNClassEvents( 0, cl ) << Endl;
238 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << " "
239 << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName()
240 << " testing entries: " << ds->GetNClassEvents( 1, cl ) << Endl;
241 }
242 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << " " << Endl;
243 }
244
245 return ds;
246}
247
248////////////////////////////////////////////////////////////////////////////////
249/// checks a TTreeFormula for problems
250
252 const TString& expression,
253 Bool_t& hasDollar )
254{
255 Bool_t worked = kTRUE;
256
257 if( ttf->GetNdim() <= 0 )
258 Log() << kFATAL << "Expression " << expression.Data()
259 << " could not be resolved to a valid formula. " << Endl;
260 if( ttf->GetNdata() == 0 ){
261 Log() << kWARNING << "Expression: " << expression.Data()
262 << " does not provide data for this event. "
263 << "This event is not taken into account. --> please check if you use as a variable "
264 << "an entry of an array which is not filled for some events "
265 << "(e.g. arr[4] when arr has only 3 elements)." << Endl;
266 Log() << kWARNING << "If you want to take the event into account you can do something like: "
267 << "\"Alt$(arr[4],0)\" where in cases where arr doesn't have a 4th element, "
268 << " 0 is taken as an alternative." << Endl;
269 worked = kFALSE;
270 }
271 if( expression.Contains("$") )
272 hasDollar = kTRUE;
273 else
274 {
275 for (int i = 0, iEnd = ttf->GetNcodes (); i < iEnd; ++i)
276 {
277 TLeaf* leaf = ttf->GetLeaf (i);
278 if (!leaf->IsOnTerminalBranch())
279 hasDollar = kTRUE;
280 }
281 }
282 return worked;
283}
284
285
286////////////////////////////////////////////////////////////////////////////////
287/// While the data gets copied into the local training and testing
288/// trees, the input tree can change (for instance when changing from
289/// signal to background tree, or using TChains as input) The
290/// TTreeFormulas, that hold the input expressions need to be
291/// re-associated with the new tree, which is done here
292
294{
295 TTree *tr = tinfo.GetTree()->GetTree();
296
297 //tr->SetBranchStatus("*",1); // nor needed when using TTReeFormula
299
300 Bool_t hasDollar = kTRUE; // Set to false if wants to enable only some branch in the tree
301
302 // 1) the input variable formulas
303 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << " create input formulas for tree " << tr->GetName() << Endl;
304 std::vector<TTreeFormula*>::const_iterator formIt, formItEnd;
305 for (formIt = fInputFormulas.begin(), formItEnd=fInputFormulas.end(); formIt!=formItEnd; ++formIt) if (*formIt) delete *formIt;
306 fInputFormulas.clear();
307 TTreeFormula* ttf = 0;
308 fInputTableFormulas.clear(); // this contains shallow pointer copies
309
310 bool firstArrayVar = kTRUE;
311 int firstArrayVarIndex = -1;
312 int arraySize = -1;
313 for (UInt_t i = 0; i < dsi.GetNVariables(); i++) {
314
315 // create TTreeformula
316 if (! dsi.IsVariableFromArray(i) ) {
317 ttf = new TTreeFormula(Form("Formula%s", dsi.GetVariableInfo(i).GetInternalName().Data()),
318 dsi.GetVariableInfo(i).GetExpression().Data(), tr);
319 CheckTTreeFormula(ttf, dsi.GetVariableInfo(i).GetExpression(), hasDollar);
320 fInputFormulas.emplace_back(ttf);
321 fInputTableFormulas.emplace_back(std::make_pair(ttf, (Int_t) 0));
322 } else {
323 // it is a variable from an array
324 if (firstArrayVar) {
325
326 // create a new TFormula
327 ttf = new TTreeFormula(Form("Formula%s", dsi.GetVariableInfo(i).GetInternalName().Data()),
328 dsi.GetVariableInfo(i).GetExpression().Data(), tr);
329 CheckTTreeFormula(ttf, dsi.GetVariableInfo(i).GetExpression(), hasDollar);
330 fInputFormulas.push_back(ttf);
331
332 arraySize = dsi.GetVarArraySize(dsi.GetVariableInfo(i).GetExpression());
333 firstArrayVar = kFALSE;
334 firstArrayVarIndex = i;
335
336 Log() << kINFO << "Using variable " << dsi.GetVariableInfo(i).GetInternalName() <<
337 " from array expression " << dsi.GetVariableInfo(i).GetExpression() << " of size " << arraySize << Endl;
338 }
339 fInputTableFormulas.push_back(std::make_pair(ttf, (Int_t) i-firstArrayVarIndex));
340 if (int(i)-firstArrayVarIndex == arraySize-1 ) {
341 // I am the last element of the array
342 firstArrayVar = kTRUE;
343 firstArrayVarIndex = -1;
344 Log() << kDEBUG << "Using Last variable from array : " << dsi.GetVariableInfo(i).GetInternalName() << Endl;
345 }
346 }
347
348 }
349
350 //
351 // targets
352 //
353 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName()) << "transform regression targets" << Endl;
354 for (formIt = fTargetFormulas.begin(), formItEnd = fTargetFormulas.end(); formIt!=formItEnd; ++formIt) if (*formIt) delete *formIt;
355 fTargetFormulas.clear();
356 for (UInt_t i=0; i<dsi.GetNTargets(); i++) {
357 ttf = new TTreeFormula( TString::Format( "Formula%s", dsi.GetTargetInfo(i).GetInternalName().Data() ),
358 dsi.GetTargetInfo(i).GetExpression().Data(), tr );
359 CheckTTreeFormula( ttf, dsi.GetTargetInfo(i).GetExpression(), hasDollar );
360 fTargetFormulas.push_back( ttf );
361 }
362
363 //
364 // spectators
365 //
366 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName()) << "transform spectator variables" << Endl;
367 for (formIt = fSpectatorFormulas.begin(), formItEnd = fSpectatorFormulas.end(); formIt!=formItEnd; ++formIt) if (*formIt) delete *formIt;
368 fSpectatorFormulas.clear();
369 for (UInt_t i=0; i<dsi.GetNSpectators(); i++) {
370 ttf = new TTreeFormula( TString::Format( "Formula%s", dsi.GetSpectatorInfo(i).GetInternalName().Data() ),
371 dsi.GetSpectatorInfo(i).GetExpression().Data(), tr );
372 CheckTTreeFormula( ttf, dsi.GetSpectatorInfo(i).GetExpression(), hasDollar );
373 fSpectatorFormulas.push_back( ttf );
374 }
375
376 //
377 // the cuts (one per class, if non-existent: formula pointer = 0)
378 //
379 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName()) << "transform cuts" << Endl;
380 for (formIt = fCutFormulas.begin(), formItEnd = fCutFormulas.end(); formIt!=formItEnd; ++formIt) if (*formIt) delete *formIt;
381 fCutFormulas.clear();
382 for (UInt_t clIdx=0; clIdx<dsi.GetNClasses(); clIdx++) {
383 const TCut& tmpCut = dsi.GetClassInfo(clIdx)->GetCut();
384 const TString tmpCutExp(tmpCut.GetTitle());
385 ttf = 0;
386 if (tmpCutExp!="") {
387 ttf = new TTreeFormula( Form("CutClass%i",clIdx), tmpCutExp, tr );
388 Bool_t worked = CheckTTreeFormula( ttf, tmpCutExp, hasDollar );
389 if( !worked ){
390 Log() << kWARNING << "Please check class \"" << dsi.GetClassInfo(clIdx)->GetName()
391 << "\" cut \"" << dsi.GetClassInfo(clIdx)->GetCut() << Endl;
392 }
393 }
394 fCutFormulas.push_back( ttf );
395 }
396
397 //
398 // the weights (one per class, if non-existent: formula pointer = 0)
399 //
400 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName()) << "transform weights" << Endl;
401 for (formIt = fWeightFormula.begin(), formItEnd = fWeightFormula.end(); formIt!=formItEnd; ++formIt) if (*formIt) delete *formIt;
402 fWeightFormula.clear();
403 for (UInt_t clIdx=0; clIdx<dsi.GetNClasses(); clIdx++) {
404 const TString tmpWeight = dsi.GetClassInfo(clIdx)->GetWeight();
405
406 if (dsi.GetClassInfo(clIdx)->GetName() != tinfo.GetClassName() ) { // if the tree is of another class
407 fWeightFormula.push_back( 0 );
408 continue;
409 }
410
411 ttf = 0;
412 if (tmpWeight!="") {
413 ttf = new TTreeFormula( "FormulaWeight", tmpWeight, tr );
414 Bool_t worked = CheckTTreeFormula( ttf, tmpWeight, hasDollar );
415 if( !worked ){
416 Log() << kWARNING << Form("Dataset[%s] : ",dsi.GetName()) << "Please check class \"" << dsi.GetClassInfo(clIdx)->GetName()
417 << "\" weight \"" << dsi.GetClassInfo(clIdx)->GetWeight() << Endl;
418 }
419 }
420 else {
421 ttf = 0;
422 }
423 fWeightFormula.push_back( ttf );
424 }
425 return;
426 // all this code below is not needed when using TTReeFormula
427
428 Log() << kDEBUG << Form("Dataset[%s] : ", dsi.GetName()) << "enable branches" << Endl;
429 // now enable only branches that are needed in any input formula, target, cut, weight
430
431 if (!hasDollar) {
432 tr->SetBranchStatus("*",0);
433 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << "enable branches: input variables" << Endl;
434 // input vars
435 for (formIt = fInputFormulas.begin(); formIt!=fInputFormulas.end(); ++formIt) {
436 ttf = *formIt;
437 for (Int_t bi = 0; bi<ttf->GetNcodes(); bi++) {
438 tr->SetBranchStatus( ttf->GetLeaf(bi)->GetBranch()->GetName(), 1 );
439 }
440 }
441 // targets
442 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << "enable branches: targets" << Endl;
443 for (formIt = fTargetFormulas.begin(); formIt!=fTargetFormulas.end(); ++formIt) {
444 ttf = *formIt;
445 for (Int_t bi = 0; bi<ttf->GetNcodes(); bi++)
446 tr->SetBranchStatus( ttf->GetLeaf(bi)->GetBranch()->GetName(), 1 );
447 }
448 // spectators
449 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName()) << "enable branches: spectators" << Endl;
450 for (formIt = fSpectatorFormulas.begin(); formIt!=fSpectatorFormulas.end(); ++formIt) {
451 ttf = *formIt;
452 for (Int_t bi = 0; bi<ttf->GetNcodes(); bi++)
453 tr->SetBranchStatus( ttf->GetLeaf(bi)->GetBranch()->GetName(), 1 );
454 }
455 // cuts
456 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName()) << "enable branches: cuts" << Endl;
457 for (formIt = fCutFormulas.begin(); formIt!=fCutFormulas.end(); ++formIt) {
458 ttf = *formIt;
459 if (!ttf) continue;
460 for (Int_t bi = 0; bi<ttf->GetNcodes(); bi++)
461 tr->SetBranchStatus( ttf->GetLeaf(bi)->GetBranch()->GetName(), 1 );
462 }
463 // weights
464 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName()) << "enable branches: weights" << Endl;
465 for (formIt = fWeightFormula.begin(); formIt!=fWeightFormula.end(); ++formIt) {
466 ttf = *formIt;
467 if (!ttf) continue;
468 for (Int_t bi = 0; bi<ttf->GetNcodes(); bi++)
469 tr->SetBranchStatus( ttf->GetLeaf(bi)->GetBranch()->GetName(), 1 );
470 }
471 }
472 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << "tree initialized" << Endl;
473 return;
474}
475
476////////////////////////////////////////////////////////////////////////////////
477/// compute covariance matrix
478
480{
481 const UInt_t nvar = ds->GetNVariables();
482 const UInt_t ntgts = ds->GetNTargets();
483 const UInt_t nvis = ds->GetNSpectators();
484
485 Float_t *min = new Float_t[nvar];
486 Float_t *max = new Float_t[nvar];
487 Float_t *tgmin = new Float_t[ntgts];
488 Float_t *tgmax = new Float_t[ntgts];
489 Float_t *vmin = new Float_t[nvis];
490 Float_t *vmax = new Float_t[nvis];
491
492 for (UInt_t ivar=0; ivar<nvar ; ivar++) { min[ivar] = FLT_MAX; max[ivar] = -FLT_MAX; }
493 for (UInt_t ivar=0; ivar<ntgts; ivar++) { tgmin[ivar] = FLT_MAX; tgmax[ivar] = -FLT_MAX; }
494 for (UInt_t ivar=0; ivar<nvis; ivar++) { vmin[ivar] = FLT_MAX; vmax[ivar] = -FLT_MAX; }
495
496 // perform event loop
497
498 for (Int_t i=0; i<ds->GetNEvents(); i++) {
499 const Event * ev = ds->GetEvent(i);
500 for (UInt_t ivar=0; ivar<nvar; ivar++) {
501 Double_t v = ev->GetValue(ivar);
502 if (v<min[ivar]) min[ivar] = v;
503 if (v>max[ivar]) max[ivar] = v;
504 }
505 for (UInt_t itgt=0; itgt<ntgts; itgt++) {
506 Double_t v = ev->GetTarget(itgt);
507 if (v<tgmin[itgt]) tgmin[itgt] = v;
508 if (v>tgmax[itgt]) tgmax[itgt] = v;
509 }
510 for (UInt_t ivis=0; ivis<nvis; ivis++) {
511 Double_t v = ev->GetSpectator(ivis);
512 if (v<vmin[ivis]) vmin[ivis] = v;
513 if (v>vmax[ivis]) vmax[ivis] = v;
514 }
515 }
516
517 for (UInt_t ivar=0; ivar<nvar; ivar++) {
518 dsi.GetVariableInfo(ivar).SetMin(min[ivar]);
519 dsi.GetVariableInfo(ivar).SetMax(max[ivar]);
520 if( TMath::Abs(max[ivar]-min[ivar]) <= FLT_MIN )
521 Log() << kWARNING << Form("Dataset[%s] : ",dsi.GetName()) << "Variable " << dsi.GetVariableInfo(ivar).GetExpression().Data() << " is constant. Please remove the variable." << Endl;
522 }
523 for (UInt_t ivar=0; ivar<ntgts; ivar++) {
524 dsi.GetTargetInfo(ivar).SetMin(tgmin[ivar]);
525 dsi.GetTargetInfo(ivar).SetMax(tgmax[ivar]);
526 if( TMath::Abs(tgmax[ivar]-tgmin[ivar]) <= FLT_MIN )
527 Log() << kFATAL << Form("Dataset[%s] : ",dsi.GetName()) << "Target " << dsi.GetTargetInfo(ivar).GetExpression().Data() << " is constant. Please remove the variable." << Endl;
528 }
529 for (UInt_t ivar=0; ivar<nvis; ivar++) {
530 dsi.GetSpectatorInfo(ivar).SetMin(vmin[ivar]);
531 dsi.GetSpectatorInfo(ivar).SetMax(vmax[ivar]);
532 // if( TMath::Abs(vmax[ivar]-vmin[ivar]) <= FLT_MIN )
533 // Log() << kWARNING << "Spectator variable " << dsi.GetSpectatorInfo(ivar).GetExpression().Data() << " is constant." << Endl;
534 }
535 delete [] min;
536 delete [] max;
537 delete [] tgmin;
538 delete [] tgmax;
539 delete [] vmin;
540 delete [] vmax;
541}
542
543////////////////////////////////////////////////////////////////////////////////
544/// computes correlation matrix for variables "theVars" in tree;
545/// "theType" defines the required event "type"
546/// ("type" variable must be present in tree)
547
549{
550 // first compute variance-covariance
551 TMatrixD* mat = CalcCovarianceMatrix( ds, classNumber );
552
553 // now the correlation
554 UInt_t nvar = ds->GetNVariables(), ivar, jvar;
555
556 for (ivar=0; ivar<nvar; ivar++) {
557 for (jvar=0; jvar<nvar; jvar++) {
558 if (ivar != jvar) {
559 Double_t d = (*mat)(ivar, ivar)*(*mat)(jvar, jvar);
560 if (d > 0) (*mat)(ivar, jvar) /= sqrt(d);
561 else {
562 Log() << kWARNING << Form("Dataset[%s] : ",DataSetInfo().GetName())<< "<GetCorrelationMatrix> Zero variances for variables "
563 << "(" << ivar << ", " << jvar << ") = " << d
564 << Endl;
565 (*mat)(ivar, jvar) = 0;
566 }
567 }
568 }
569 }
570
571 for (ivar=0; ivar<nvar; ivar++) (*mat)(ivar, ivar) = 1.0;
572
573 return mat;
574}
575
576////////////////////////////////////////////////////////////////////////////////
577/// compute covariance matrix
578
580{
581 UInt_t nvar = ds->GetNVariables();
582 UInt_t ivar = 0, jvar = 0;
583
584 TMatrixD* mat = new TMatrixD( nvar, nvar );
585
586 // init matrices
587 TVectorD vec(nvar);
588 TMatrixD mat2(nvar, nvar);
589 for (ivar=0; ivar<nvar; ivar++) {
590 vec(ivar) = 0;
591 for (jvar=0; jvar<nvar; jvar++) mat2(ivar, jvar) = 0;
592 }
593
594 // perform event loop
595 Double_t ic = 0;
596 for (Int_t i=0; i<ds->GetNEvents(); i++) {
597
598 const Event * ev = ds->GetEvent(i);
599 if (ev->GetClass() != classNumber ) continue;
600
601 Double_t weight = ev->GetWeight();
602 ic += weight; // count used events
603
604 for (ivar=0; ivar<nvar; ivar++) {
605
606 Double_t xi = ev->GetValue(ivar);
607 vec(ivar) += xi*weight;
608 mat2(ivar, ivar) += (xi*xi*weight);
609
610 for (jvar=ivar+1; jvar<nvar; jvar++) {
611 Double_t xj = ev->GetValue(jvar);
612 mat2(ivar, jvar) += (xi*xj*weight);
613 }
614 }
615 }
616
617 for (ivar=0; ivar<nvar; ivar++)
618 for (jvar=ivar+1; jvar<nvar; jvar++)
619 mat2(jvar, ivar) = mat2(ivar, jvar); // symmetric matrix
620
621
622 // variance-covariance
623 for (ivar=0; ivar<nvar; ivar++) {
624 for (jvar=0; jvar<nvar; jvar++) {
625 (*mat)(ivar, jvar) = mat2(ivar, jvar)/ic - vec(ivar)*vec(jvar)/(ic*ic);
626 }
627 }
628
629 return mat;
630}
631
632// --------------------------------------- new versions
633
634////////////////////////////////////////////////////////////////////////////////
635/// the dataset splitting
636
637void
639 EvtStatsPerClass& nEventRequests,
640 TString& normMode,
641 UInt_t& splitSeed,
642 TString& splitMode,
643 TString& mixMode)
644{
645 Configurable splitSpecs( dsi.GetSplitOptions() );
646 splitSpecs.SetConfigName("DataSetFactory");
647 splitSpecs.SetConfigDescription( "Configuration options given in the \"PrepareForTrainingAndTesting\" call; these options define the creation of the data sets used for training and expert validation by TMVA" );
648
649 splitMode = "Random"; // the splitting mode
650 splitSpecs.DeclareOptionRef( splitMode, "SplitMode",
651 "Method of picking training and testing events (default: random)" );
652 splitSpecs.AddPreDefVal(TString("Random"));
653 splitSpecs.AddPreDefVal(TString("Alternate"));
654 splitSpecs.AddPreDefVal(TString("Block"));
655
656 mixMode = "SameAsSplitMode"; // the splitting mode
657 splitSpecs.DeclareOptionRef( mixMode, "MixMode",
658 "Method of mixing events of different classes into one dataset (default: SameAsSplitMode)" );
659 splitSpecs.AddPreDefVal(TString("SameAsSplitMode"));
660 splitSpecs.AddPreDefVal(TString("Random"));
661 splitSpecs.AddPreDefVal(TString("Alternate"));
662 splitSpecs.AddPreDefVal(TString("Block"));
663
664 splitSeed = 100;
665 splitSpecs.DeclareOptionRef( splitSeed, "SplitSeed",
666 "Seed for random event shuffling" );
667
668 normMode = "EqualNumEvents"; // the weight normalisation modes
669 splitSpecs.DeclareOptionRef( normMode, "NormMode",
670 "Overall renormalisation of event-by-event weights used in the training (NumEvents: average weight of 1 per event, independently for signal and background; EqualNumEvents: average weight of 1 per event for signal, and sum of weights for background equal to sum of weights for signal)" );
671 splitSpecs.AddPreDefVal(TString("None"));
672 splitSpecs.AddPreDefVal(TString("NumEvents"));
673 splitSpecs.AddPreDefVal(TString("EqualNumEvents"));
674
675 splitSpecs.DeclareOptionRef(fScaleWithPreselEff=kFALSE,"ScaleWithPreselEff","Scale the number of requested events by the eff. of the preselection cuts (or not)" );
676
677 // the number of events
678
679 // fill in the numbers
680 for (UInt_t cl = 0; cl < dsi.GetNClasses(); cl++) {
681 TString clName = dsi.GetClassInfo(cl)->GetName();
682 TString titleTrain = TString().Format("Number of training events of class %s (default: 0 = all)",clName.Data()).Data();
683 TString titleTest = TString().Format("Number of test events of class %s (default: 0 = all)",clName.Data()).Data();
684 TString titleSplit = TString().Format("Split in training and test events of class %s (default: 0 = deactivated)",clName.Data()).Data();
685
686 splitSpecs.DeclareOptionRef( nEventRequests.at(cl).nTrainingEventsRequested, TString("nTrain_")+clName, titleTrain );
687 splitSpecs.DeclareOptionRef( nEventRequests.at(cl).nTestingEventsRequested , TString("nTest_")+clName , titleTest );
688 splitSpecs.DeclareOptionRef( nEventRequests.at(cl).TrainTestSplitRequested , TString("TrainTestSplit_")+clName , titleTest );
689 }
690
691 splitSpecs.DeclareOptionRef( fVerbose, "V", "Verbosity (default: true)" );
692
693 splitSpecs.DeclareOptionRef( fVerboseLevel=TString("Info"), "VerboseLevel", "VerboseLevel (Debug/Verbose/Info)" );
694 splitSpecs.AddPreDefVal(TString("Debug"));
695 splitSpecs.AddPreDefVal(TString("Verbose"));
696 splitSpecs.AddPreDefVal(TString("Info"));
697
698 fCorrelations = kTRUE;
699 splitSpecs.DeclareOptionRef(fCorrelations, "Correlations", "Boolean to show correlation output (Default: true)");
700 fComputeCorrelations = kTRUE;
701 splitSpecs.DeclareOptionRef(fComputeCorrelations, "CalcCorrelations", "Compute correlations and also some variable statistics, e.g. min/max (Default: true )");
702
703 splitSpecs.ParseOptions();
704 splitSpecs.CheckForUnusedOptions();
705
706 // output logging verbosity
707 if (Verbose()) fLogger->SetMinType( kVERBOSE );
708 if (fVerboseLevel.CompareTo("Debug") ==0) fLogger->SetMinType( kDEBUG );
709 if (fVerboseLevel.CompareTo("Verbose") ==0) fLogger->SetMinType( kVERBOSE );
710 if (fVerboseLevel.CompareTo("Info") ==0) fLogger->SetMinType( kINFO );
711
712 // put all to upper case
713 splitMode.ToUpper(); mixMode.ToUpper(); normMode.ToUpper();
714 // adjust mixmode if same as splitmode option has been set
715 Log() << kDEBUG //<< Form("Dataset[%s] : ",dsi.GetName())
716 << "\tSplitmode is: \"" << splitMode << "\" the mixmode is: \"" << mixMode << "\"" << Endl;
717 if (mixMode=="SAMEASSPLITMODE") mixMode = splitMode;
718 else if (mixMode!=splitMode)
719 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << "DataSet splitmode="<<splitMode
720 <<" differs from mixmode="<<mixMode<<Endl;
721}
722
723////////////////////////////////////////////////////////////////////////////////
724/// build empty event vectors
725/// distributes events between kTraining/kTesting/kMaxTreeType
726
727void
729 TMVA::DataInputHandler& dataInput,
731 EvtStatsPerClass& eventCounts)
732{
733 const UInt_t nclasses = dsi.GetNClasses();
734
735 eventsmap[ Types::kTraining ] = EventVectorOfClasses(nclasses);
736 eventsmap[ Types::kTesting ] = EventVectorOfClasses(nclasses);
737 eventsmap[ Types::kMaxTreeType ] = EventVectorOfClasses(nclasses);
738
739 // create the type, weight and boostweight branches
740 const UInt_t nvars = dsi.GetNVariables();
741 const UInt_t ntgts = dsi.GetNTargets();
742 const UInt_t nvis = dsi.GetNSpectators();
743
744 for (size_t i=0; i<nclasses; i++) {
745 eventCounts[i].varAvLength = new Float_t[nvars];
746 for (UInt_t ivar=0; ivar<nvars; ivar++)
747 eventCounts[i].varAvLength[ivar] = 0;
748 }
749
750 //Bool_t haveArrayVariable = kFALSE;
751 //Bool_t *varIsArray = new Bool_t[nvars];
752
753 // If there are NaNs in the tree:
754 // => warn if used variables/cuts/weights contain nan (no problem if event is cut out)
755 // => fatal if cut value is nan or (event not cut out and nans somewhere)
756 // Count & collect all these warnings/errors and output them at the end.
757 std::map<TString, int> nanInfWarnings;
758 std::map<TString, int> nanInfErrors;
759
760 // if we work with chains we need to remember the current tree if
761 // the chain jumps to a new tree we have to reset the formulas
762 for (UInt_t cl=0; cl<nclasses; cl++) {
763
764 //Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << "Create training and testing trees -- looping over class \"" << dsi.GetClassInfo(cl)->GetName() << "\" ..." << Endl;
765
766 EventStats& classEventCounts = eventCounts[cl];
767
768 // info output for weights
769 Log() << kDEBUG //<< Form("Dataset[%s] : ",dsi.GetName())
770 << "\tWeight expression for class \'" << dsi.GetClassInfo(cl)->GetName() << "\': \""
771 << dsi.GetClassInfo(cl)->GetWeight() << "\"" << Endl;
772
773 // used for chains only
774 TString currentFileName("");
775
776 std::vector<TreeInfo>::const_iterator treeIt(dataInput.begin(dsi.GetClassInfo(cl)->GetName()));
777 for (;treeIt!=dataInput.end(dsi.GetClassInfo(cl)->GetName()); ++treeIt) {
778
779 // read first the variables
780 std::vector<Float_t> vars(nvars);
781 std::vector<Float_t> tgts(ntgts);
782 std::vector<Float_t> vis(nvis);
783 TreeInfo currentInfo = *treeIt;
784
785 Log() << kINFO << "Building event vectors for type " << currentInfo.GetTreeType() << " " << currentInfo.GetClassName() << Endl;
786
787 EventVector& event_v = eventsmap[currentInfo.GetTreeType()].at(cl);
788
789 Bool_t isChain = (TString("TChain") == currentInfo.GetTree()->ClassName());
790 currentInfo.GetTree()->LoadTree(0);
791 // create the TTReeFormula to evalute later on on each single event
792 ChangeToNewTree( currentInfo, dsi );
793
794 // count number of events in tree before cut
795 classEventCounts.nInitialEvents += currentInfo.GetTree()->GetEntries();
796
797 // flag to control a warning message when size of array in disk are bigger than what requested
798 Bool_t foundLargerArraySize = kFALSE;
799
800 // loop over events in ntuple
801 const UInt_t nEvts = currentInfo.GetTree()->GetEntries();
802 for (Long64_t evtIdx = 0; evtIdx < nEvts; evtIdx++) {
803 currentInfo.GetTree()->LoadTree(evtIdx);
804
805 // may need to reload tree in case of chains
806 if (isChain) {
807 if (currentInfo.GetTree()->GetTree()->GetDirectory()->GetFile()->GetName() != currentFileName) {
808 currentFileName = currentInfo.GetTree()->GetTree()->GetDirectory()->GetFile()->GetName();
809 ChangeToNewTree( currentInfo, dsi );
810 }
811 }
812 currentInfo.GetTree()->GetEntry(evtIdx);
813 Int_t sizeOfArrays = 1;
814 Int_t prevArrExpr = 0;
815 Bool_t haveAllArrayData = kFALSE;
816
817 // ======= evaluate all formulas =================
818
819 // first we check if some of the formulas are arrays
820 // This is the case when all inputs (variables, targets and spectetors are array and a TMVA event is not
821 // an event of the tree but an event + array index). In this case we set the flag haveAllArrayData = true
822 // Otherwise we support for arrays of variables where each
823 // element of the array corresponds to a different variable like in the case of image
824 // In that case the VAriableInfo has a bit, IsVariableFromArray that is set and we have a single formula for the array
825 // fInputFormulaTable contains a map of the formula and the variable index to evaluate the formula
826 for (UInt_t ivar = 0; ivar < nvars; ivar++) {
827 // distinguish case where variable is not from an array
828 if (dsi.IsVariableFromArray(ivar)) continue;
829 auto inputFormula = fInputTableFormulas[ivar].first;
830
831 Int_t ndata = inputFormula->GetNdata();
832
833 classEventCounts.varAvLength[ivar] += ndata;
834 if (ndata == 1) continue;
835 haveAllArrayData = kTRUE;
836 //varIsArray[ivar] = kTRUE;
837 //std::cout << "Found array !!!" << std::endl;
838 if (sizeOfArrays == 1) {
839 sizeOfArrays = ndata;
840 prevArrExpr = ivar;
841 }
842 else if (sizeOfArrays!=ndata) {
843 Log() << kERROR << Form("Dataset[%s] : ",dsi.GetName())<< "ERROR while preparing training and testing trees:" << Endl;
844 Log() << Form("Dataset[%s] : ",dsi.GetName())<< " multiple array-type expressions of different length were encountered" << Endl;
845 Log() << Form("Dataset[%s] : ",dsi.GetName())<< " location of error: event " << evtIdx
846 << " in tree " << currentInfo.GetTree()->GetName()
847 << " of file " << currentInfo.GetTree()->GetCurrentFile()->GetName() << Endl;
848 Log() << Form("Dataset[%s] : ",dsi.GetName())<< " expression " << inputFormula->GetTitle() << " has "
849 << Form("Dataset[%s] : ",dsi.GetName()) << ndata << " entries, while" << Endl;
850 Log() << Form("Dataset[%s] : ",dsi.GetName())<< " expression " << fInputTableFormulas[prevArrExpr].first->GetTitle() << " has "
851 << Form("Dataset[%s] : ",dsi.GetName())<< fInputTableFormulas[prevArrExpr].first->GetNdata() << " entries" << Endl;
852 Log() << kFATAL << Form("Dataset[%s] : ",dsi.GetName())<< "Need to abort" << Endl;
853 }
854 }
855
856 // now we read the information
857 for (Int_t idata = 0; idata<sizeOfArrays; idata++) {
858 Bool_t contains_NaN_or_inf = kFALSE;
859
860 auto checkNanInf = [&](std::map<TString, int> &msgMap, Float_t value, const char *what, const char *formulaTitle) {
861 if (TMath::IsNaN(value)) {
862 contains_NaN_or_inf = kTRUE;
863 ++msgMap[TString::Format("Dataset[%s] : %s expression resolves to indeterminate value (NaN): %s", dsi.GetName(), what, formulaTitle)];
864 } else if (!TMath::Finite(value)) {
865 contains_NaN_or_inf = kTRUE;
866 ++msgMap[TString::Format("Dataset[%s] : %s expression resolves to infinite value (+inf or -inf): %s", dsi.GetName(), what, formulaTitle)];
867 }
868 };
869
870 TTreeFormula* formula = 0;
871
872 // the cut expression
873 Double_t cutVal = 1.;
874 formula = fCutFormulas[cl];
875 if (formula) {
876 Int_t ndata = formula->GetNdata();
877 cutVal = (ndata==1 ?
878 formula->EvalInstance(0) :
879 formula->EvalInstance(idata));
880 checkNanInf(nanInfErrors, cutVal, "Cut", formula->GetTitle());
881 }
882
883 // if event is cut out, add to warnings, else add to errors.
884 auto &nanMessages = cutVal < 0.5 ? nanInfWarnings : nanInfErrors;
885
886 // the input variable
887 for (UInt_t ivar=0; ivar<nvars; ivar++) {
888 auto formulaMap = fInputTableFormulas[ivar];
889 formula = formulaMap.first;
890 int inputVarIndex = formulaMap.second;
891 // check fomula ndata size (in case of arrays variable)
892 // enough to check for ivarindex = 0 then formula is the same
893 // this check might take some time. Maybe do only in debug mode
894 if (inputVarIndex == 0 && dsi.IsVariableFromArray(ivar)) {
895 Int_t ndata = formula->GetNdata();
896 Int_t arraySize = dsi.GetVarArraySize(dsi.GetVariableInfo(ivar).GetExpression());
897 if (ndata < arraySize) {
898 Log() << kFATAL << "Size of array " << dsi.GetVariableInfo(ivar).GetExpression()
899 << " in the current tree " << currentInfo.GetTree()->GetName() << " for the event " << evtIdx
900 << " is " << ndata << " instead of " << arraySize << Endl;
901 } else if (ndata > arraySize && !foundLargerArraySize) {
902 Log() << kWARNING << "Size of array " << dsi.GetVariableInfo(ivar).GetExpression()
903 << " in the current tree " << currentInfo.GetTree()->GetName() << " for the event "
904 << evtIdx << " is " << ndata << ", larger than " << arraySize << Endl;
905 Log() << kWARNING << "Some data will then be ignored. This WARNING is printed only once, "
906 << " check in case for the other variables and events " << Endl;
907 // note that following warnings will be suppressed
908 foundLargerArraySize = kTRUE;
909 }
910 }
911 formula->SetQuickLoad(true); // is this needed ???
912
913 vars[ivar] = ( !haveAllArrayData ?
914 formula->EvalInstance(inputVarIndex) :
915 formula->EvalInstance(idata));
916 checkNanInf(nanMessages, vars[ivar], "Input", formula->GetTitle());
917 }
918
919 // the targets
920 for (UInt_t itrgt=0; itrgt<ntgts; itrgt++) {
921 formula = fTargetFormulas[itrgt];
922 Int_t ndata = formula->GetNdata();
923 tgts[itrgt] = (ndata == 1 ?
924 formula->EvalInstance(0) :
925 formula->EvalInstance(idata));
926 checkNanInf(nanMessages, tgts[itrgt], "Target", formula->GetTitle());
927 }
928
929 // the spectators
930 for (UInt_t itVis=0; itVis<nvis; itVis++) {
931 formula = fSpectatorFormulas[itVis];
932 Int_t ndata = formula->GetNdata();
933 vis[itVis] = (ndata == 1 ?
934 formula->EvalInstance(0) :
935 formula->EvalInstance(idata));
936 checkNanInf(nanMessages, vis[itVis], "Spectator", formula->GetTitle());
937 }
938
939
940 // the weight
941 Float_t weight = currentInfo.GetWeight(); // multiply by tree weight
942 formula = fWeightFormula[cl];
943 if (formula!=0) {
944 Int_t ndata = formula->GetNdata();
945 weight *= (ndata == 1 ?
946 formula->EvalInstance() :
947 formula->EvalInstance(idata));
948 checkNanInf(nanMessages, weight, "Weight", formula->GetTitle());
949 }
950
951 // Count the events before rejection due to cut or NaN
952 // value (weighted and unweighted)
953 classEventCounts.nEvBeforeCut++;
954 if (!TMath::IsNaN(weight))
955 classEventCounts.nWeEvBeforeCut += weight;
956
957 // apply the cut, skip rest if cut is not fulfilled
958 if (cutVal<0.5) continue;
959
960 // global flag if negative weights exist -> can be used
961 // by classifiers who may require special data
962 // treatment (also print warning)
963 if (weight < 0) classEventCounts.nNegWeights++;
964
965 // now read the event-values (variables and regression targets)
966
967 if (contains_NaN_or_inf) {
968 Log() << kWARNING << Form("Dataset[%s] : ",dsi.GetName())<< "NaN or +-inf in Event " << evtIdx << Endl;
969 if (sizeOfArrays>1) Log() << kWARNING << Form("Dataset[%s] : ",dsi.GetName())<< " rejected" << Endl;
970 continue;
971 }
972
973 // Count the events after rejection due to cut or NaN value
974 // (weighted and unweighted)
975 classEventCounts.nEvAfterCut++;
976 classEventCounts.nWeEvAfterCut += weight;
977
978 // event accepted, fill temporary ntuple
979 event_v.push_back(new Event(vars, tgts , vis, cl , weight));
980 }
981 }
982 currentInfo.GetTree()->ResetBranchAddresses();
983 }
984 }
985
986 if (!nanInfWarnings.empty()) {
987 Log() << kWARNING << "Found events with NaN and/or +-inf values" << Endl;
988 for (const auto &warning : nanInfWarnings) {
989 auto &log = Log() << kWARNING << warning.first;
990 if (warning.second > 1) log << " (" << warning.second << " times)";
991 log << Endl;
992 }
993 Log() << kWARNING << "These NaN and/or +-infs were all removed by the specified cut, continuing." << Endl;
994 Log() << Endl;
995 }
996
997 if (!nanInfErrors.empty()) {
998 Log() << kWARNING << "Found events with NaN and/or +-inf values (not removed by cut)" << Endl;
999 for (const auto &error : nanInfErrors) {
1000 auto &log = Log() << kWARNING << error.first;
1001 if (error.second > 1) log << " (" << error.second << " times)";
1002 log << Endl;
1003 }
1004 Log() << kFATAL << "How am I supposed to train a NaN or +-inf?!" << Endl;
1005 }
1006
1007 // for output format, get the maximum class name length
1008 Int_t maxL = dsi.GetClassNameMaxLength();
1009
1010 Log() << kHEADER << Form("[%s] : ",dsi.GetName()) << "Number of events in input trees" << Endl;
1011 Log() << kDEBUG << "(after possible flattening of arrays):" << Endl;
1012
1013
1014 for (UInt_t cl = 0; cl < dsi.GetNClasses(); cl++) {
1015 Log() << kDEBUG //<< Form("[%s] : ",dsi.GetName())
1016 << " "
1017 << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName()
1018 << " -- number of events : "
1019 << std::setw(5) << eventCounts[cl].nEvBeforeCut
1020 << " / sum of weights: " << std::setw(5) << eventCounts[cl].nWeEvBeforeCut << Endl;
1021 }
1022
1023 for (UInt_t cl = 0; cl < dsi.GetNClasses(); cl++) {
1024 Log() << kDEBUG //<< Form("Dataset[%s] : ",dsi.GetName())
1025 << " " << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName()
1026 <<" tree -- total number of entries: "
1027 << std::setw(5) << dataInput.GetEntries(dsi.GetClassInfo(cl)->GetName()) << Endl;
1028 }
1029
1030 if (fScaleWithPreselEff)
1031 Log() << kDEBUG //<< Form("Dataset[%s] : ",dsi.GetName())
1032 << "\tPreselection: (will affect number of requested training and testing events)" << Endl;
1033 else
1034 Log() << kDEBUG //<< Form("Dataset[%s] : ",dsi.GetName())
1035 << "\tPreselection: (will NOT affect number of requested training and testing events)" << Endl;
1036
1037 if (dsi.HasCuts()) {
1038 for (UInt_t cl = 0; cl< dsi.GetNClasses(); cl++) {
1039 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << " " << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName()
1040 << " requirement: \"" << dsi.GetClassInfo(cl)->GetCut() << "\"" << Endl;
1041 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << " "
1042 << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName()
1043 << " -- number of events passed: "
1044 << std::setw(5) << eventCounts[cl].nEvAfterCut
1045 << " / sum of weights: " << std::setw(5) << eventCounts[cl].nWeEvAfterCut << Endl;
1046 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << " "
1047 << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName()
1048 << " -- efficiency : "
1049 << std::setw(6) << eventCounts[cl].nWeEvAfterCut/eventCounts[cl].nWeEvBeforeCut << Endl;
1050 }
1051 }
1052 else Log() << kDEBUG //<< Form("Dataset[%s] : ",dsi.GetName())
1053 << " No preselection cuts applied on event classes" << Endl;
1054
1055 //delete[] varIsArray;
1056
1057}
1058
1059////////////////////////////////////////////////////////////////////////////////
1060/// Select and distribute unassigned events to kTraining and kTesting
1061
1064 EventVectorOfClassesOfTreeType& tmpEventVector,
1065 EvtStatsPerClass& eventCounts,
1066 const TString& splitMode,
1067 const TString& mixMode,
1068 const TString& normMode,
1069 UInt_t splitSeed)
1070{
1071 TMVA::RandomGenerator<TRandom3> rndm(splitSeed);
1072
1073 // ==== splitting of undefined events to kTraining and kTesting
1074
1075 // if splitMode contains "RANDOM", then shuffle the undefined events
1076 if (splitMode.Contains( "RANDOM" ) /*&& !emptyUndefined*/ ) {
1077 // random shuffle the undefined events of each class
1078 for( UInt_t cls = 0; cls < dsi.GetNClasses(); ++cls ){
1079 EventVector& unspecifiedEvents = tmpEventVector[Types::kMaxTreeType].at(cls);
1080 if( ! unspecifiedEvents.empty() ) {
1081 Log() << kDEBUG << "randomly shuffling "
1082 << unspecifiedEvents.size()
1083 << " events of class " << cls
1084 << " which are not yet associated to testing or training" << Endl;
1085 std::shuffle(unspecifiedEvents.begin(), unspecifiedEvents.end(), rndm);
1086 }
1087 }
1088 }
1089
1090 // check for each class the number of training and testing events, the requested number and the available number
1091 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "SPLITTING ========" << Endl;
1092 for( UInt_t cls = 0; cls < dsi.GetNClasses(); ++cls ){
1093 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "---- class " << cls << Endl;
1094 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "check number of training/testing events, requested and available number of events and for class " << cls << Endl;
1095
1096 // check if enough or too many events are already in the training/testing eventvectors of the class cls
1097 EventVector& eventVectorTraining = tmpEventVector[ Types::kTraining ].at(cls);
1098 EventVector& eventVectorTesting = tmpEventVector[ Types::kTesting ].at(cls);
1099 EventVector& eventVectorUndefined = tmpEventVector[ Types::kMaxTreeType ].at(cls);
1100
1101 Int_t availableTraining = eventVectorTraining.size();
1102 Int_t availableTesting = eventVectorTesting.size();
1103 Int_t availableUndefined = eventVectorUndefined.size();
1104
1105 Float_t presel_scale;
1106 if (fScaleWithPreselEff) {
1107 presel_scale = eventCounts[cls].cutScaling();
1108 if (presel_scale < 1)
1109 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << " you have opted for scaling the number of requested training/testing events\n to be scaled by the preselection efficiency"<< Endl;
1110 }else{
1111 presel_scale = 1.; // this scaling was too confusing to most people, including me! Sorry... (Helge)
1112 if (eventCounts[cls].cutScaling() < 1)
1113 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << " you have opted for interpreting the requested number of training/testing events\n to be the number of events AFTER your preselection cuts" << Endl;
1114
1115 }
1116
1117 // If TrainTestSplit_<class> is set, set number of requested training events to split*num_all_events
1118 // Requested number of testing events is set to zero and therefore takes all other events
1119 // The option TrainTestSplit_<class> overrides nTrain_<class> or nTest_<class>
1120 if(eventCounts[cls].TrainTestSplitRequested < 1.0 && eventCounts[cls].TrainTestSplitRequested > 0.0){
1121 eventCounts[cls].nTrainingEventsRequested = Int_t(eventCounts[cls].TrainTestSplitRequested*(availableTraining+availableTesting+availableUndefined));
1122 eventCounts[cls].nTestingEventsRequested = Int_t(0);
1123 }
1124 else if(eventCounts[cls].TrainTestSplitRequested != 0.0) Log() << kFATAL << Form("The option TrainTestSplit_<class> has to be in range (0, 1] but is set to %f.",eventCounts[cls].TrainTestSplitRequested) << Endl;
1125 Int_t requestedTraining = Int_t(eventCounts[cls].nTrainingEventsRequested * presel_scale);
1126 Int_t requestedTesting = Int_t(eventCounts[cls].nTestingEventsRequested * presel_scale);
1127
1128 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "events in training trees : " << availableTraining << Endl;
1129 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "events in testing trees : " << availableTesting << Endl;
1130 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "events in unspecified trees : " << availableUndefined << Endl;
1131 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "requested for training : " << requestedTraining << Endl;;
1132
1133 if(presel_scale<1)
1134 Log() << " ( " << eventCounts[cls].nTrainingEventsRequested
1135 << " * " << presel_scale << " preselection efficiency)" << Endl;
1136 else
1137 Log() << Endl;
1138 Log() << kDEBUG << "requested for testing : " << requestedTesting;
1139 if(presel_scale<1)
1140 Log() << " ( " << eventCounts[cls].nTestingEventsRequested
1141 << " * " << presel_scale << " preselection efficiency)" << Endl;
1142 else
1143 Log() << Endl;
1144
1145 // nomenclature r = available training
1146 // s = available testing
1147 // u = available undefined
1148 // R = requested training
1149 // S = requested testing
1150 // nR = to be used to select training events
1151 // nS = to be used to select test events
1152 // we have the constraint: nR + nS < r+s+u,
1153 // since we can not use more events than we have
1154 // free events: Nfree = u-Thet(R-r)-Thet(S-s)
1155 // nomenclature: Thet(x) = x, if x>0 else 0
1156 // nR = max(R,r) + 0.5 * Nfree
1157 // nS = max(S,s) + 0.5 * Nfree
1158 // nR +nS = R+S + u-R+r-S+s = u+r+s= ok! for R>r
1159 // nR +nS = r+S + u-S+s = u+r+s= ok! for r>R
1160
1161 // three different cases might occur here
1162 //
1163 // Case a
1164 // requestedTraining and requestedTesting >0
1165 // free events: Nfree = u-Thet(R-r)-Thet(S-s)
1166 // nR = Max(R,r) + 0.5 * Nfree
1167 // nS = Max(S,s) + 0.5 * Nfree
1168 //
1169 // Case b
1170 // exactly one of requestedTraining or requestedTesting >0
1171 // assume training R >0
1172 // nR = max(R,r)
1173 // nS = s+u+r-nR
1174 // and s=nS
1175 //
1176 // Case c
1177 // requestedTraining=0, requestedTesting=0
1178 // Nfree = u-|r-s|
1179 // if NFree >=0
1180 // R = Max(r,s) + 0.5 * Nfree = S
1181 // else if r>s
1182 // R = r; S=s+u
1183 // else
1184 // R = r+u; S=s
1185 //
1186 // Next steps:
1187 // Determination of Event numbers R,S, nR, nS
1188 // distribute undefined events according to nR, nS
1189 // finally determine actual sub samples from nR and nS to be used in training / testing
1190 //
1191
1192 Int_t useForTesting(0),useForTraining(0);
1193 Int_t allAvailable(availableUndefined + availableTraining + availableTesting);
1194
1195 if( (requestedTraining == 0) && (requestedTesting == 0)){
1196
1197 // Case C: balance the number of training and testing events
1198
1199 if ( availableUndefined >= TMath::Abs(availableTraining - availableTesting) ) {
1200 // enough unspecified are available to equal training and testing
1201 useForTraining = useForTesting = allAvailable/2;
1202 } else {
1203 // all unspecified are assigned to the smaller of training / testing
1204 useForTraining = availableTraining;
1205 useForTesting = availableTesting;
1206 if (availableTraining < availableTesting)
1207 useForTraining += availableUndefined;
1208 else
1209 useForTesting += availableUndefined;
1210 }
1211 requestedTraining = useForTraining;
1212 requestedTesting = useForTesting;
1213 }
1214
1215 else if (requestedTesting == 0){
1216 // case B
1217 useForTraining = TMath::Max(requestedTraining,availableTraining);
1218 if (allAvailable < useForTraining) {
1219 Log() << kFATAL << Form("Dataset[%s] : ",dsi.GetName())<< "More events requested for training ("
1220 << requestedTraining << ") than available ("
1221 << allAvailable << ")!" << Endl;
1222 }
1223 useForTesting = allAvailable - useForTraining; // the rest
1224 requestedTesting = useForTesting;
1225 }
1226
1227 else if (requestedTraining == 0){ // case B)
1228 useForTesting = TMath::Max(requestedTesting,availableTesting);
1229 if (allAvailable < useForTesting) {
1230 Log() << kFATAL << Form("Dataset[%s] : ",dsi.GetName())<< "More events requested for testing ("
1231 << requestedTesting << ") than available ("
1232 << allAvailable << ")!" << Endl;
1233 }
1234 useForTraining= allAvailable - useForTesting; // the rest
1235 requestedTraining = useForTraining;
1236 }
1237
1238 else {
1239 // Case A
1240 // requestedTraining R and requestedTesting S >0
1241 // free events: Nfree = u-Thet(R-r)-Thet(S-s)
1242 // nR = Max(R,r) + 0.5 * Nfree
1243 // nS = Max(S,s) + 0.5 * Nfree
1244 Int_t stillNeedForTraining = TMath::Max(requestedTraining-availableTraining,0);
1245 Int_t stillNeedForTesting = TMath::Max(requestedTesting-availableTesting,0);
1246
1247 int NFree = availableUndefined - stillNeedForTraining - stillNeedForTesting;
1248 if (NFree <0) NFree = 0;
1249 useForTraining = TMath::Max(requestedTraining,availableTraining) + NFree/2;
1250 useForTesting= allAvailable - useForTraining; // the rest
1251 }
1252
1253 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "determined event sample size to select training sample from="<<useForTraining<<Endl;
1254 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "determined event sample size to select test sample from="<<useForTesting<<Endl;
1255
1256
1257
1258 // associate undefined events
1259 if( splitMode == "ALTERNATE" ){
1260 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "split 'ALTERNATE'" << Endl;
1261 Int_t nTraining = availableTraining;
1262 for( EventVector::iterator it = eventVectorUndefined.begin(), itEnd = eventVectorUndefined.end(); it != itEnd; ){
1263 ++nTraining;
1264 if( nTraining <= requestedTraining ){
1265 eventVectorTraining.insert( eventVectorTraining.end(), (*it) );
1266 ++it;
1267 }
1268 if( it != itEnd ){
1269 eventVectorTesting.insert( eventVectorTesting.end(), (*it) );
1270 ++it;
1271 }
1272 }
1273 } else {
1274 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "split '" << splitMode << "'" << Endl;
1275
1276 // test if enough events are available
1277 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "availableundefined : " << availableUndefined << Endl;
1278 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "useForTraining : " << useForTraining << Endl;
1279 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "useForTesting : " << useForTesting << Endl;
1280 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "availableTraining : " << availableTraining << Endl;
1281 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "availableTesting : " << availableTesting << Endl;
1282
1283 if( availableUndefined<(useForTraining-availableTraining) ||
1284 availableUndefined<(useForTesting -availableTesting ) ||
1285 availableUndefined<(useForTraining+useForTesting-availableTraining-availableTesting ) ){
1286 Log() << kFATAL << Form("Dataset[%s] : ",dsi.GetName())<< "More events requested than available!" << Endl;
1287 }
1288
1289 // select the events
1290 if (useForTraining>availableTraining){
1291 eventVectorTraining.insert( eventVectorTraining.end() , eventVectorUndefined.begin(), eventVectorUndefined.begin()+ useForTraining- availableTraining );
1292 eventVectorUndefined.erase( eventVectorUndefined.begin(), eventVectorUndefined.begin() + useForTraining- availableTraining);
1293 }
1294 if (useForTesting>availableTesting){
1295 eventVectorTesting.insert( eventVectorTesting.end() , eventVectorUndefined.begin(), eventVectorUndefined.begin()+ useForTesting- availableTesting );
1296 }
1297 }
1298 eventVectorUndefined.clear();
1299
1300 // finally shorten the event vectors to the requested size by removing random events
1301 if (splitMode.Contains( "RANDOM" )){
1302 UInt_t sizeTraining = eventVectorTraining.size();
1303 if( sizeTraining > UInt_t(requestedTraining) ){
1304 std::vector<UInt_t> indicesTraining( sizeTraining );
1305 // make indices
1306 std::generate( indicesTraining.begin(), indicesTraining.end(), TMVA::Increment<UInt_t>(0) );
1307 // shuffle indices
1308 std::shuffle(indicesTraining.begin(), indicesTraining.end(), rndm);
1309 // erase indices of not needed events
1310 indicesTraining.erase( indicesTraining.begin()+sizeTraining-UInt_t(requestedTraining), indicesTraining.end() );
1311 // delete all events with the given indices
1312 for( std::vector<UInt_t>::iterator it = indicesTraining.begin(), itEnd = indicesTraining.end(); it != itEnd; ++it ){
1313 delete eventVectorTraining.at( (*it) ); // delete event
1314 eventVectorTraining.at( (*it) ) = NULL; // set pointer to NULL
1315 }
1316 // now remove and erase all events with pointer==NULL
1317 eventVectorTraining.erase( std::remove( eventVectorTraining.begin(), eventVectorTraining.end(), (void*)NULL ), eventVectorTraining.end() );
1318 }
1319
1320 UInt_t sizeTesting = eventVectorTesting.size();
1321 if( sizeTesting > UInt_t(requestedTesting) ){
1322 std::vector<UInt_t> indicesTesting( sizeTesting );
1323 // make indices
1324 std::generate( indicesTesting.begin(), indicesTesting.end(), TMVA::Increment<UInt_t>(0) );
1325 // shuffle indices
1326 std::shuffle(indicesTesting.begin(), indicesTesting.end(), rndm);
1327 // erase indices of not needed events
1328 indicesTesting.erase( indicesTesting.begin()+sizeTesting-UInt_t(requestedTesting), indicesTesting.end() );
1329 // delete all events with the given indices
1330 for( std::vector<UInt_t>::iterator it = indicesTesting.begin(), itEnd = indicesTesting.end(); it != itEnd; ++it ){
1331 delete eventVectorTesting.at( (*it) ); // delete event
1332 eventVectorTesting.at( (*it) ) = NULL; // set pointer to NULL
1333 }
1334 // now remove and erase all events with pointer==NULL
1335 eventVectorTesting.erase( std::remove( eventVectorTesting.begin(), eventVectorTesting.end(), (void*)NULL ), eventVectorTesting.end() );
1336 }
1337 }
1338 else { // erase at end if size larger than requested
1339 if( eventVectorTraining.size() < UInt_t(requestedTraining) )
1340 Log() << kWARNING << Form("Dataset[%s] : ",dsi.GetName())<< "DataSetFactory/requested number of training samples larger than size of eventVectorTraining.\n"
1341 << "There is probably an issue. Please contact the TMVA developers." << Endl;
1342 else if (eventVectorTraining.size() > UInt_t(requestedTraining)) {
1343 std::for_each( eventVectorTraining.begin()+requestedTraining, eventVectorTraining.end(), DeleteFunctor<Event>() );
1344 eventVectorTraining.erase(eventVectorTraining.begin()+requestedTraining,eventVectorTraining.end());
1345 }
1346 if( eventVectorTesting.size() < UInt_t(requestedTesting) )
1347 Log() << kWARNING << Form("Dataset[%s] : ",dsi.GetName())<< "DataSetFactory/requested number of testing samples larger than size of eventVectorTesting.\n"
1348 << "There is probably an issue. Please contact the TMVA developers." << Endl;
1349 else if ( eventVectorTesting.size() > UInt_t(requestedTesting) ) {
1350 std::for_each( eventVectorTesting.begin()+requestedTesting, eventVectorTesting.end(), DeleteFunctor<Event>() );
1351 eventVectorTesting.erase(eventVectorTesting.begin()+requestedTesting,eventVectorTesting.end());
1352 }
1353 }
1354 }
1355
1356 TMVA::DataSetFactory::RenormEvents( dsi, tmpEventVector, eventCounts, normMode );
1357
1358 Int_t trainingSize = 0;
1359 Int_t testingSize = 0;
1360
1361 // sum up number of training and testing events
1362 for( UInt_t cls = 0; cls < dsi.GetNClasses(); ++cls ){
1363 trainingSize += tmpEventVector[Types::kTraining].at(cls).size();
1364 testingSize += tmpEventVector[Types::kTesting].at(cls).size();
1365 }
1366
1367 // --- collect all training (testing) events into the training (testing) eventvector
1368
1369 // create event vectors reserve enough space
1370 EventVector* trainingEventVector = new EventVector();
1371 EventVector* testingEventVector = new EventVector();
1372
1373 trainingEventVector->reserve( trainingSize );
1374 testingEventVector->reserve( testingSize );
1375
1376
1377 // collect the events
1378
1379 // mixing of kTraining and kTesting data sets
1380 Log() << kDEBUG << " MIXING ============= " << Endl;
1381
1382 if( mixMode == "ALTERNATE" ){
1383 // Inform user if he tries to use alternate mixmode for
1384 // event classes with different number of events, this works but the alternation stops at the last event of the smaller class
1385 for( UInt_t cls = 1; cls < dsi.GetNClasses(); ++cls ){
1386 if (tmpEventVector[Types::kTraining].at(cls).size() != tmpEventVector[Types::kTraining].at(0).size()){
1387 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << "Training sample: You are trying to mix events in alternate mode although the classes have different event numbers. This works but the alternation stops at the last event of the smaller class."<<Endl;
1388 }
1389 if (tmpEventVector[Types::kTesting].at(cls).size() != tmpEventVector[Types::kTesting].at(0).size()){
1390 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << "Testing sample: You are trying to mix events in alternate mode although the classes have different event numbers. This works but the alternation stops at the last event of the smaller class."<<Endl;
1391 }
1392 }
1393 typedef EventVector::iterator EvtVecIt;
1394 EvtVecIt itEvent, itEventEnd;
1395
1396 // insert first class
1397 Log() << kDEBUG << "insert class 0 into training and test vector" << Endl;
1398 trainingEventVector->insert( trainingEventVector->end(), tmpEventVector[Types::kTraining].at(0).begin(), tmpEventVector[Types::kTraining].at(0).end() );
1399 testingEventVector->insert( testingEventVector->end(), tmpEventVector[Types::kTesting].at(0).begin(), tmpEventVector[Types::kTesting].at(0).end() );
1400
1401 // insert other classes
1402 EvtVecIt itTarget;
1403 for( UInt_t cls = 1; cls < dsi.GetNClasses(); ++cls ){
1404 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "insert class " << cls << Endl;
1405 // training vector
1406 itTarget = trainingEventVector->begin() - 1; // start one before begin
1407 // loop over source
1408 for( itEvent = tmpEventVector[Types::kTraining].at(cls).begin(), itEventEnd = tmpEventVector[Types::kTraining].at(cls).end(); itEvent != itEventEnd; ++itEvent ){
1409 // if( std::distance( itTarget, trainingEventVector->end()) < Int_t(cls+1) ) {
1410 if( (trainingEventVector->end() - itTarget) < Int_t(cls+1) ) {
1411 itTarget = trainingEventVector->end();
1412 trainingEventVector->insert( itTarget, itEvent, itEventEnd ); // fill in the rest without mixing
1413 break;
1414 }else{
1415 itTarget += cls+1;
1416 trainingEventVector->insert( itTarget, (*itEvent) ); // fill event
1417 }
1418 }
1419 // testing vector
1420 itTarget = testingEventVector->begin() - 1;
1421 // loop over source
1422 for( itEvent = tmpEventVector[Types::kTesting].at(cls).begin(), itEventEnd = tmpEventVector[Types::kTesting].at(cls).end(); itEvent != itEventEnd; ++itEvent ){
1423 // if( std::distance( itTarget, testingEventVector->end()) < Int_t(cls+1) ) {
1424 if( ( testingEventVector->end() - itTarget ) < Int_t(cls+1) ) {
1425 itTarget = testingEventVector->end();
1426 testingEventVector->insert( itTarget, itEvent, itEventEnd ); // fill in the rest without mixing
1427 break;
1428 }else{
1429 itTarget += cls+1;
1430 testingEventVector->insert( itTarget, (*itEvent) ); // fill event
1431 }
1432 }
1433 }
1434 }else{
1435 for( UInt_t cls = 0; cls < dsi.GetNClasses(); ++cls ){
1436 trainingEventVector->insert( trainingEventVector->end(), tmpEventVector[Types::kTraining].at(cls).begin(), tmpEventVector[Types::kTraining].at(cls).end() );
1437 testingEventVector->insert ( testingEventVector->end(), tmpEventVector[Types::kTesting].at(cls).begin(), tmpEventVector[Types::kTesting].at(cls).end() );
1438 }
1439 }
1440 // delete the tmpEventVector (but not the events therein)
1441 tmpEventVector[Types::kTraining].clear();
1442 tmpEventVector[Types::kTesting].clear();
1443
1444 tmpEventVector[Types::kMaxTreeType].clear();
1445
1446 if (mixMode == "RANDOM") {
1447 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "shuffling events"<<Endl;
1448
1449 std::shuffle(trainingEventVector->begin(), trainingEventVector->end(), rndm);
1450 std::shuffle(testingEventVector->begin(), testingEventVector->end(), rndm);
1451 }
1452
1453 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "trainingEventVector " << trainingEventVector->size() << Endl;
1454 Log() << kDEBUG << Form("Dataset[%s] : ",dsi.GetName())<< "testingEventVector " << testingEventVector->size() << Endl;
1455
1456 // create dataset
1457 DataSet* ds = new DataSet(dsi);
1458
1459 // Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << "Create internal training tree" << Endl;
1460 ds->SetEventCollection(trainingEventVector, Types::kTraining );
1461 // Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << "Create internal testing tree" << Endl;
1462 ds->SetEventCollection(testingEventVector, Types::kTesting );
1463
1464
1465 if (ds->GetNTrainingEvents() < 1){
1466 Log() << kFATAL << "Dataset " << std::string(dsi.GetName()) << " does not have any training events, I better stop here and let you fix that one first " << Endl;
1467 }
1468
1469 if (ds->GetNTestEvents() < 1) {
1470 Log() << kERROR << "Dataset " << std::string(dsi.GetName()) << " does not have any testing events, guess that will cause problems later..but for now, I continue " << Endl;
1471 }
1472
1473 delete trainingEventVector;
1474 delete testingEventVector;
1475 return ds;
1476
1477}
1478
1479////////////////////////////////////////////////////////////////////////////////
1480/// renormalisation of the TRAINING event weights
1481/// - none (kind of obvious) .. use the weights as supplied by the
1482/// user.. (we store however the relative weight for later use)
1483/// - numEvents
1484/// - equalNumEvents reweight the training events such that the sum of all
1485/// backgr. (class > 0) weights equal that of the signal (class 0)
1486
1487void
1489 EventVectorOfClassesOfTreeType& tmpEventVector,
1490 const EvtStatsPerClass& eventCounts,
1491 const TString& normMode )
1492{
1493
1494
1495 // print rescaling info
1496 // ---------------------------------
1497 // compute sums of weights
1498 ValuePerClass trainingSumWeightsPerClass( dsi.GetNClasses() );
1499 ValuePerClass testingSumWeightsPerClass( dsi.GetNClasses() );
1500
1501 NumberPerClass trainingSizePerClass( dsi.GetNClasses() );
1502 NumberPerClass testingSizePerClass( dsi.GetNClasses() );
1503
1504 Double_t trainingSumSignalWeights = 0;
1505 Double_t trainingSumBackgrWeights = 0; // Backgr. includes all classes that are not signal
1506 Double_t testingSumSignalWeights = 0;
1507 Double_t testingSumBackgrWeights = 0; // Backgr. includes all classes that are not signal
1508
1509
1510
1511 for( UInt_t cls = 0, clsEnd = dsi.GetNClasses(); cls < clsEnd; ++cls ){
1512 trainingSizePerClass.at(cls) = tmpEventVector[Types::kTraining].at(cls).size();
1513 testingSizePerClass.at(cls) = tmpEventVector[Types::kTesting].at(cls).size();
1514
1515 // the functional solution
1516 // sum up the weights in Double_t although the individual weights are Float_t to prevent rounding issues in addition of floating points
1517 //
1518 // accumulate --> does what the name says
1519 // begin() and end() denote the range of the vector to be accumulated
1520 // Double_t(0) tells accumulate the type and the starting value
1521 // compose_binary creates a BinaryFunction of ...
1522 // std::plus<Double_t>() knows how to sum up two doubles
1523 // null<Double_t>() leaves the first argument (the running sum) unchanged and returns it
1524 //
1525 // all together sums up all the event-weights of the events in the vector and returns it
1526 trainingSumWeightsPerClass.at(cls) =
1527 std::accumulate(tmpEventVector[Types::kTraining].at(cls).begin(),
1528 tmpEventVector[Types::kTraining].at(cls).end(),
1529 Double_t(0), [](Double_t w, const TMVA::Event *E) { return w + E->GetOriginalWeight(); });
1530
1531 testingSumWeightsPerClass.at(cls) =
1532 std::accumulate(tmpEventVector[Types::kTesting].at(cls).begin(),
1533 tmpEventVector[Types::kTesting].at(cls).end(),
1534 Double_t(0), [](Double_t w, const TMVA::Event *E) { return w + E->GetOriginalWeight(); });
1535
1536 if ( cls == dsi.GetSignalClassIndex()){
1537 trainingSumSignalWeights += trainingSumWeightsPerClass.at(cls);
1538 testingSumSignalWeights += testingSumWeightsPerClass.at(cls);
1539 }else{
1540 trainingSumBackgrWeights += trainingSumWeightsPerClass.at(cls);
1541 testingSumBackgrWeights += testingSumWeightsPerClass.at(cls);
1542 }
1543 }
1544
1545 // ---------------------------------
1546 // compute renormalization factors
1547
1548 ValuePerClass renormFactor( dsi.GetNClasses() );
1549
1550
1551 // for information purposes
1552 dsi.SetNormalization( normMode );
1553 // !! these will be overwritten later by the 'rescaled' ones if
1554 // NormMode != None !!!
1555 dsi.SetTrainingSumSignalWeights(trainingSumSignalWeights);
1556 dsi.SetTrainingSumBackgrWeights(trainingSumBackgrWeights);
1557 dsi.SetTestingSumSignalWeights(testingSumSignalWeights);
1558 dsi.SetTestingSumBackgrWeights(testingSumBackgrWeights);
1559
1560
1561 if (normMode == "NONE") {
1562 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << "No weight renormalisation applied: use original global and event weights" << Endl;
1563 return;
1564 }
1565 //changed by Helge 27.5.2013 What on earth was done here before? I still remember the idea behind this which apparently was
1566 //NOT understood by the 'programmer' :) .. the idea was to have SAME amount of effective TRAINING data for signal and background.
1567 // Testing events are totally irrelevant for this and might actually skew the whole normalisation!!
1568 else if (normMode == "NUMEVENTS") {
1569 Log() << kDEBUG //<< Form("Dataset[%s] : ",dsi.GetName())
1570 << "\tWeight renormalisation mode: \"NumEvents\": renormalises all event classes " << Endl;
1571 Log() << kDEBUG //<< Form("Dataset[%s] : ",dsi.GetName())
1572 << " such that the effective (weighted) number of events in each class equals the respective " << Endl;
1573 Log() << kDEBUG //<< Form("Dataset[%s] : ",dsi.GetName())
1574 << " number of events (entries) that you demanded in PrepareTrainingAndTestTree(\"\",\"nTrain_Signal=.. )" << Endl;
1575 Log() << kDEBUG //<< Form("Dataset[%s] : ",dsi.GetName())
1576 << " ... i.e. such that Sum[i=1..N_j]{w_i} = N_j, j=0,1,2..." << Endl;
1577 Log() << kDEBUG //<< Form("Dataset[%s] : ",dsi.GetName())
1578 << " ... (note that N_j is the sum of TRAINING events (nTrain_j...with j=Signal,Background.." << Endl;
1579 Log() << kDEBUG //<< Form("Dataset[%s] : ",dsi.GetName())
1580 << " ..... Testing events are not renormalised nor included in the renormalisation factor! )"<< Endl;
1581
1582 for( UInt_t cls = 0, clsEnd = dsi.GetNClasses(); cls < clsEnd; ++cls ){
1583 // renormFactor.at(cls) = ( (trainingSizePerClass.at(cls) + testingSizePerClass.at(cls))/
1584 // (trainingSumWeightsPerClass.at(cls) + testingSumWeightsPerClass.at(cls)) );
1585 //changed by Helge 27.5.2013
1586 renormFactor.at(cls) = ((Float_t)trainingSizePerClass.at(cls) )/
1587 (trainingSumWeightsPerClass.at(cls)) ;
1588 }
1589 }
1590 else if (normMode == "EQUALNUMEVENTS") {
1591 //changed by Helge 27.5.2013 What on earth was done here before? I still remember the idea behind this which apparently was
1592 //NOT understood by the 'programmer' :) .. the idea was to have SAME amount of effective TRAINING data for signal and background.
1593 //done here was something like having each data source normalized to its number of entries and this even for training+testing together.
1594 // what should this have been good for ???
1595
1596 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << "Weight renormalisation mode: \"EqualNumEvents\": renormalises all event classes ..." << Endl;
1597 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << " such that the effective (weighted) number of events in each class is the same " << Endl;
1598 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << " (and equals the number of events (entries) given for class=0 )" << Endl;
1599 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << "... i.e. such that Sum[i=1..N_j]{w_i} = N_classA, j=classA, classB, ..." << Endl;
1600 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << "... (note that N_j is the sum of TRAINING events" << Endl;
1601 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << " ..... Testing events are not renormalised nor included in the renormalisation factor!)" << Endl;
1602
1603 // normalize to size of first class
1604 UInt_t referenceClass = 0;
1605 for (UInt_t cls = 0, clsEnd = dsi.GetNClasses(); cls < clsEnd; ++cls ) {
1606 renormFactor.at(cls) = Float_t(trainingSizePerClass.at(referenceClass))/
1607 (trainingSumWeightsPerClass.at(cls));
1608 }
1609 }
1610 else {
1611 Log() << kFATAL << Form("Dataset[%s] : ",dsi.GetName())<< "<PrepareForTrainingAndTesting> Unknown NormMode: " << normMode << Endl;
1612 }
1613
1614 // ---------------------------------
1615 // now apply the normalization factors
1616 Int_t maxL = dsi.GetClassNameMaxLength();
1617 for (UInt_t cls = 0, clsEnd = dsi.GetNClasses(); cls<clsEnd; ++cls) {
1618 Log() << kDEBUG //<< Form("Dataset[%s] : ",dsi.GetName())
1619 << "--> Rescale " << setiosflags(ios::left) << std::setw(maxL)
1620 << dsi.GetClassInfo(cls)->GetName() << " event weights by factor: " << renormFactor.at(cls) << Endl;
1621 for (EventVector::iterator it = tmpEventVector[Types::kTraining].at(cls).begin(),
1622 itEnd = tmpEventVector[Types::kTraining].at(cls).end(); it != itEnd; ++it){
1623 (*it)->SetWeight ((*it)->GetWeight() * renormFactor.at(cls));
1624 }
1625
1626 }
1627
1628
1629 // print out the result
1630 // (same code as before --> this can be done nicer )
1631 //
1632
1633 Log() << kINFO //<< Form("Dataset[%s] : ",dsi.GetName())
1634 << "Number of training and testing events" << Endl;
1635 Log() << kDEBUG << "\tafter rescaling:" << Endl;
1636 Log() << kINFO //<< Form("Dataset[%s] : ",dsi.GetName())
1637 << "---------------------------------------------------------------------------" << Endl;
1638
1639 trainingSumSignalWeights = 0;
1640 trainingSumBackgrWeights = 0; // Backgr. includes all classes that are not signal
1641 testingSumSignalWeights = 0;
1642 testingSumBackgrWeights = 0; // Backgr. includes all classes that are not signal
1643
1644 for( UInt_t cls = 0, clsEnd = dsi.GetNClasses(); cls < clsEnd; ++cls ){
1645 trainingSumWeightsPerClass.at(cls) =
1646 std::accumulate(tmpEventVector[Types::kTraining].at(cls).begin(),
1647 tmpEventVector[Types::kTraining].at(cls).end(),
1648 Double_t(0), [](Double_t w, const TMVA::Event *E) { return w + E->GetOriginalWeight(); });
1649
1650 testingSumWeightsPerClass.at(cls) =
1651 std::accumulate(tmpEventVector[Types::kTesting].at(cls).begin(),
1652 tmpEventVector[Types::kTesting].at(cls).end(),
1653 Double_t(0), [](Double_t w, const TMVA::Event *E) { return w + E->GetOriginalWeight(); });
1654
1655 if ( cls == dsi.GetSignalClassIndex()){
1656 trainingSumSignalWeights += trainingSumWeightsPerClass.at(cls);
1657 testingSumSignalWeights += testingSumWeightsPerClass.at(cls);
1658 }else{
1659 trainingSumBackgrWeights += trainingSumWeightsPerClass.at(cls);
1660 testingSumBackgrWeights += testingSumWeightsPerClass.at(cls);
1661 }
1662
1663 // output statistics
1664
1665 Log() << kINFO //<< Form("Dataset[%s] : ",dsi.GetName())
1666 << setiosflags(ios::left) << std::setw(maxL)
1667 << dsi.GetClassInfo(cls)->GetName() << " -- "
1668 << "training events : " << trainingSizePerClass.at(cls) << Endl;
1669 Log() << kDEBUG << "\t(sum of weights: " << trainingSumWeightsPerClass.at(cls) << ")"
1670 << " - requested were " << eventCounts[cls].nTrainingEventsRequested << " events" << Endl;
1671 Log() << kINFO //<< Form("Dataset[%s] : ",dsi.GetName())
1672 << setiosflags(ios::left) << std::setw(maxL)
1673 << dsi.GetClassInfo(cls)->GetName() << " -- "
1674 << "testing events : " << testingSizePerClass.at(cls) << Endl;
1675 Log() << kDEBUG << "\t(sum of weights: " << testingSumWeightsPerClass.at(cls) << ")"
1676 << " - requested were " << eventCounts[cls].nTestingEventsRequested << " events" << Endl;
1677 Log() << kINFO //<< Form("Dataset[%s] : ",dsi.GetName())
1678 << setiosflags(ios::left) << std::setw(maxL)
1679 << dsi.GetClassInfo(cls)->GetName() << " -- "
1680 << "training and testing events: "
1681 << (trainingSizePerClass.at(cls)+testingSizePerClass.at(cls)) << Endl;
1682 Log() << kDEBUG << "\t(sum of weights: "
1683 << (trainingSumWeightsPerClass.at(cls)+testingSumWeightsPerClass.at(cls)) << ")" << Endl;
1684 if(eventCounts[cls].nEvAfterCut<eventCounts[cls].nEvBeforeCut) {
1685 Log() << kINFO << Form("Dataset[%s] : ",dsi.GetName()) << setiosflags(ios::left) << std::setw(maxL)
1686 << dsi.GetClassInfo(cls)->GetName() << " -- "
1687 << "due to the preselection a scaling factor has been applied to the numbers of requested events: "
1688 << eventCounts[cls].cutScaling() << Endl;
1689 }
1690 }
1691 Log() << kINFO << Endl;
1692
1693 // for information purposes
1694 dsi.SetTrainingSumSignalWeights(trainingSumSignalWeights);
1695 dsi.SetTrainingSumBackgrWeights(trainingSumBackgrWeights);
1696 dsi.SetTestingSumSignalWeights(testingSumSignalWeights);
1697 dsi.SetTestingSumBackgrWeights(testingSumBackgrWeights);
1698
1699
1700}
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define a(i)
Definition RSha256.hxx:99
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
int Int_t
Definition RtypesCore.h:45
unsigned int UInt_t
Definition RtypesCore.h:46
float Float_t
Definition RtypesCore.h:57
constexpr Bool_t kFALSE
Definition RtypesCore.h:101
double Double_t
Definition RtypesCore.h:59
long long Long64_t
Definition RtypesCore.h:80
constexpr Bool_t kTRUE
Definition RtypesCore.h:100
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
TMatrixT< Double_t > TMatrixD
Definition TMatrixDfwd.h:23
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2489
virtual Int_t GetNdim() const
Definition TFormula.h:237
A specialized string object used for TTree selections.
Definition TCut.h:25
virtual TFile * GetFile() const
Definition TDirectory.h:220
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
virtual bool IsOnTerminalBranch() const
Definition TLeaf.h:148
TBranch * GetBranch() const
Definition TLeaf.h:116
const TCut & GetCut() const
Definition ClassInfo.h:64
void SetNumber(const UInt_t index)
Definition ClassInfo.h:59
const TString & GetWeight() const
Definition ClassInfo.h:63
void SetConfigDescription(const char *d)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
void SetConfigName(const char *n)
virtual void ParseOptions()
options parser
void CheckForUnusedOptions() const
checks for unused options in option string
Class that contains all the data information.
UInt_t GetEntries(const TString &name) const
std::vector< TreeInfo >::const_iterator end(const TString &className) const
std::vector< TString > * GetClassList() const
std::vector< TreeInfo >::const_iterator begin(const TString &className) const
DataSet * BuildInitialDataSet(DataSetInfo &, TMVA::DataInputHandler &)
if no entries, than create a DataSet with one Event which uses dynamic variables (pointers to variabl...
DataSetFactory()
constructor
std::map< Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType
void ChangeToNewTree(TreeInfo &, const DataSetInfo &)
While the data gets copied into the local training and testing trees, the input tree can change (for ...
void BuildEventVector(DataSetInfo &dsi, DataInputHandler &dataInput, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts)
build empty event vectors distributes events between kTraining/kTesting/kMaxTreeType
DataSet * CreateDataSet(DataSetInfo &, DataInputHandler &)
steering the creation of a new dataset
DataSet * MixEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts, const TString &splitMode, const TString &mixMode, const TString &normMode, UInt_t splitSeed)
Select and distribute unassigned events to kTraining and kTesting.
std::vector< int > NumberPerClass
std::vector< EventVector > EventVectorOfClasses
void InitOptions(DataSetInfo &dsi, EvtStatsPerClass &eventsmap, TString &normMode, UInt_t &splitSeed, TString &splitMode, TString &mixMode)
the dataset splitting
void CalcMinMax(DataSet *, DataSetInfo &dsi)
compute covariance matrix
std::vector< Double_t > ValuePerClass
DataSet * BuildDynamicDataSet(DataSetInfo &)
std::vector< EventStats > EvtStatsPerClass
Bool_t CheckTTreeFormula(TTreeFormula *ttf, const TString &expression, Bool_t &hasDollar)
checks a TTreeFormula for problems
void RenormEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, const EvtStatsPerClass &eventCounts, const TString &normMode)
renormalisation of the TRAINING event weights
TMatrixD * CalcCorrelationMatrix(DataSet *, const UInt_t classNumber)
computes correlation matrix for variables "theVars" in tree; "theType" defines the required event "ty...
TMatrixD * CalcCovarianceMatrix(DataSet *, const UInt_t classNumber)
compute covariance matrix
std::vector< Event * > EventVector
Class that contains all the data information.
Definition DataSetInfo.h:62
std::vector< VariableInfo > & GetVariableInfos()
Bool_t HasCuts() const
UInt_t GetNVariables() const
UInt_t GetNSpectators(bool all=kTRUE) const
Int_t GetVarArraySize(const TString &expression) const
ClassInfo * AddClass(const TString &className)
virtual const char * GetName() const
Returns name of object.
Definition DataSetInfo.h:71
Bool_t IsVariableFromArray(Int_t i) const
std::vector< VariableInfo > & GetSpectatorInfos()
void SetNormalization(const TString &norm)
UInt_t GetNClasses() const
const TString & GetSplitOptions() const
UInt_t GetNTargets() const
void SetTestingSumSignalWeights(Double_t testingSumSignalWeights)
UInt_t GetSignalClassIndex()
void SetTrainingSumSignalWeights(Double_t trainingSumSignalWeights)
ClassInfo * GetClassInfo(Int_t clNum) const
void SetTestingSumBackgrWeights(Double_t testingSumBackgrWeights)
Int_t GetClassNameMaxLength() const
void PrintCorrelationMatrix(const TString &className)
calculates the correlation matrices for signal and background, prints them to standard output,...
VariableInfo & GetVariableInfo(Int_t i)
void SetTrainingSumBackgrWeights(Double_t trainingSumBackgrWeights)
VariableInfo & GetTargetInfo(Int_t i)
VariableInfo & GetSpectatorInfo(Int_t i)
void SetCorrelationMatrix(const TString &className, TMatrixD *matrix)
Class that contains all the data information.
Definition DataSet.h:58
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition DataSet.cxx:224
void SetEventCollection(std::vector< Event * > *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
Definition DataSet.cxx:250
Long64_t GetNTestEvents() const
Definition DataSet.h:69
const Event * GetEvent() const
returns event without transformations
Definition DataSet.cxx:202
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition DataSet.h:206
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Definition DataSet.cxx:168
Long64_t GetNTrainingEvents() const
Definition DataSet.h:68
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
Definition DataSet.cxx:232
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition DataSet.cxx:216
void SetCurrentType(Types::ETreeType type) const
Definition DataSet.h:89
void SetCurrentEvent(Long64_t ievt) const
Definition DataSet.h:88
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition Event.cxx:236
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition Event.cxx:389
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Definition Event.cxx:261
UInt_t GetClass() const
Definition Event.h:86
void SetSpectatorTypes(const std::vector< char > &types)
Definition Event.h:119
Float_t GetTarget(UInt_t itgt) const
Definition Event.h:102
ostringstream derivative to redirect and format output
Definition MsgLogger.h:57
Types::ETreeType GetTreeType() const
const TString & GetClassName() const
Double_t GetWeight() const
TTree * GetTree() const
@ kMaxTreeType
also used as temporary storage for trees not yet assigned for testing;training...
Definition Types.h:145
@ kTraining
Definition Types.h:143
void SetMax(Double_t v)
const TString & GetExpression() const
void SetMin(Double_t v)
const TString & GetInternalName() const
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
const char * GetTitle() const override
Returns title of object.
Definition TNamed.h:48
virtual const char * ClassName() const
Returns name of class to which the object belongs.
Definition TObject.cxx:207
Basic string class.
Definition TString.h:139
const char * Data() const
Definition TString.h:376
void ToUpper()
Change string to upper case.
Definition TString.cxx:1195
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2378
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition TString.h:632
Used to pass a selection expression to the Tree drawing routine.
virtual TLeaf * GetLeaf(Int_t n) const
Return leaf corresponding to serial number n.
virtual Int_t GetNcodes() const
T EvalInstance(Int_t i=0, const char *stringStack[]=nullptr)
Evaluate this treeformula.
void SetQuickLoad(bool quick)
virtual Int_t GetNdata()
Return number of available instances in the formula.
A TTree represents a columnar dataset.
Definition TTree.h:79
virtual void SetBranchStatus(const char *bname, bool status=true, UInt_t *found=nullptr)
Set branch status to Process or DoNotProcess.
Definition TTree.cxx:8529
virtual Int_t GetEntry(Long64_t entry, Int_t getall=0)
Read all branches of entry and return total number of bytes read.
Definition TTree.cxx:5638
TFile * GetCurrentFile() const
Return pointer to the current file.
Definition TTree.cxx:5479
TDirectory * GetDirectory() const
Definition TTree.h:462
virtual Long64_t GetEntries() const
Definition TTree.h:463
virtual TTree * GetTree() const
Definition TTree.h:517
virtual Long64_t LoadTree(Long64_t entry)
Set current entry.
Definition TTree.cxx:6473
virtual void ResetBranchAddresses()
Tell all of our branches to drop their current objects and allocate new ones.
Definition TTree.cxx:8075
create variable transformations
Int_t LargestCommonDivider(Int_t a, Int_t b)
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Bool_t IsNaN(Double_t x)
Definition TMath.h:892
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Definition TMathBase.h:250
Int_t Finite(Double_t x)
Check if it is finite with a mask in order to be consistent in presence of fast math.
Definition TMath.h:770
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition TMathBase.h:123
static const char * what
Definition stlLoader.cc:5