Logo ROOT  
Reference Guide
DataLoader.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Omar Zapata
3 // Mentors: Lorenzo Moneta, Sergei Gleyzer
4 //NOTE: Based on TMVA::Factory
5 
6 /**********************************************************************************
7  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
8  * Package: TMVA *
9  * Class : DataLoader *
10  * Web : http://tmva.sourceforge.net *
11  * *
12  * Description: *
13  * This is a class to load datasets into every booked method *
14  * *
15  * Authors (alphabetical): *
16  * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
17  * Omar Zapata <Omar.Zapata@cern.ch> - ITM/UdeA, Colombia *
18  * Sergei Gleyzer<sergei.gleyzer@cern.ch> - CERN, Switzerland *
19  * *
20  * Copyright (c) 2005-2015: *
21  * CERN, Switzerland *
22  * ITM/UdeA, Colombia *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 
30 /*! \class TMVA::DataLoader
31 \ingroup TMVA
32 
33 */
34 
35 #include "TTree.h"
36 #include "TH2.h"
37 #include "TMatrixD.h"
38 
39 #include "TMVA/DataLoader.h"
40 #include "TMVA/Config.h"
41 #include "TMVA/CvSplit.h"
42 #include "TMVA/Tools.h"
43 #include "TMVA/IMethod.h"
44 #include "TMVA/MethodBase.h"
45 #include "TMVA/DataInputHandler.h"
46 #include "TMVA/DataSetManager.h"
47 #include "TMVA/DataSetInfo.h"
48 #include "TMVA/MethodBoost.h"
49 #include "TMVA/MethodCategory.h"
50 
51 #include "TMVA/VariableInfo.h"
58 
60 
61 ////////////////////////////////////////////////////////////////////////////////
62 /*** Create a data loader
63  \param[in] thedlName name of DataLoader object. This name will be used as the
64  top directory name where the training results
65  (weights, i.e .XML and .C files) will be stored.
66  The results will be stored by default in the `theDlName/weights`
67  directory and relative to the current directory. If the directory is not existing,
68  a new one will be created automatically.
69  For using a different location (i.e. a different path to the current directory) one
70  can set an absolute path location in `TMVA::gConfig()::GetIONames().fWeightFileDirPrefix`
71  For example, by setting
72 ~~~~~~~~~~~~~~~{.cpp}
73  TMVA::gConfig()::GetIONames().fWeightFileDirPrefix = "/tmp";
74  TMVA::gConfig()::GetIONames().fWeightFileDir = "myTrainigResults";
75 ~~~~~~~~~~~~~~~
76  The training results will be stored in the `/tmp/thedlName/myTrainingResults`
77  directory.
78 **/
79 
81 : Configurable( ),
82  fDataSetManager ( NULL ), //DSMTEST
83  fDataInputHandler ( new DataInputHandler ),
84  fTransformations ( "I" ),
85  fVerbose ( kFALSE ),
86  fDataAssignType ( kAssignEvents ),
87  fATreeEvent (0)
88 {
90  SetName(thedlName.Data());
91  fLogger->SetSource("DataLoader");
92 }
93 
94 ////////////////////////////////////////////////////////////////////////////////
95 
97 {
98  // destructor
99 
100  std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
101  for (;trfIt != fDefaultTrfs.end(); ++trfIt) delete (*trfIt);
102 
103  delete fDataInputHandler;
104 
105  // destroy singletons
106  // DataSetManager::DestroyInstance(); // DSMTEST replaced by following line
107  delete fDataSetManager; // DSMTEST
108 
109  // problem with call of REGISTER_METHOD macro ...
110  // ClassifierDataLoader::DestroyInstance();
111  // Types::DestroyInstance();
112  //Tools::DestroyInstance();
113  //Config::DestroyInstance();
114 }
115 
116 
117 ////////////////////////////////////////////////////////////////////////////////
118 
120 {
121  return fDataSetManager->AddDataSetInfo(dsi); // DSMTEST
122 }
123 
124 ////////////////////////////////////////////////////////////////////////////////
125 
127 {
128  DataSetInfo* dsi = fDataSetManager->GetDataSetInfo(dsiName); // DSMTEST
129 
130  if (dsi!=0) return *dsi;
131 
132  return fDataSetManager->AddDataSetInfo(*(new DataSetInfo(dsiName))); // DSMTEST
133 }
134 
135 ////////////////////////////////////////////////////////////////////////////////
136 
138 {
139  return DefaultDataSetInfo(); // DSMTEST
140 }
141 
142 ////////////////////////////////////////////////////////////////////////////////
143 /// Transforms the variables and return a new DataLoader with the transformed
144 /// variables
145 
147 {
148  TString trOptions = "0";
149  TString trName = "None";
150  if (trafoDefinition.Contains("(")) {
151 
152  // contains transformation parameters
153  Ssiz_t parStart = trafoDefinition.Index( "(" );
154  Ssiz_t parLen = trafoDefinition.Index( ")", parStart )-parStart+1;
155 
156  trName = trafoDefinition(0,parStart);
157  trOptions = trafoDefinition(parStart,parLen);
158  trOptions.Remove(parLen-1,1);
159  trOptions.Remove(0,1);
160  }
161  else
162  trName = trafoDefinition;
163 
164  VarTransformHandler* handler = new VarTransformHandler(this);
165  // variance threshold variable transformation
166  if (trName == "VT") {
167 
168  // find threshold value from given input
169  Double_t threshold = 0.0;
170  if (!trOptions.IsFloat()){
171  Log() << kFATAL << " VT transformation must be passed a floating threshold value" << Endl;
172  delete handler;
173  return this;
174  }
175  else
176  threshold = trOptions.Atof();
177  TMVA::DataLoader *transformedLoader = handler->VarianceThreshold(threshold);
178  delete handler;
179  return transformedLoader;
180  }
181  else {
182  Log() << kFATAL << "Incorrect transformation string provided, please check" << Endl;
183  }
184  Log() << kINFO << "No transformation applied, returning original loader" << Endl;
185  return this;
186 }
187 
188 ////////////////////////////////////////////////////////////////////////////////
189 // the next functions are to assign events directly
190 
191 ////////////////////////////////////////////////////////////////////////////////
192 /// create the data assignment tree (for event-wise data assignment by user)
193 
195 {
196  TTree * assignTree = new TTree( name, name );
197  assignTree->SetDirectory(0);
198  assignTree->Branch( "type", &fATreeType, "ATreeType/I" );
199  assignTree->Branch( "weight", &fATreeWeight, "ATreeWeight/F" );
200 
201  std::vector<VariableInfo>& vars = DefaultDataSetInfo().GetVariableInfos();
202  std::vector<VariableInfo>& tgts = DefaultDataSetInfo().GetTargetInfos();
203  std::vector<VariableInfo>& spec = DefaultDataSetInfo().GetSpectatorInfos();
204 
205  if (fATreeEvent.size()==0) fATreeEvent.resize(vars.size()+tgts.size()+spec.size());
206  // add variables
207  for (UInt_t ivar=0; ivar<vars.size(); ivar++) {
208  TString vname = vars[ivar].GetExpression();
209  assignTree->Branch( vname, &fATreeEvent[ivar], vname + "/F" );
210  }
211  // add targets
212  for (UInt_t itgt=0; itgt<tgts.size(); itgt++) {
213  TString vname = tgts[itgt].GetExpression();
214  assignTree->Branch( vname, &fATreeEvent[vars.size()+itgt], vname + "/F" );
215  }
216  // add spectators
217  for (UInt_t ispc=0; ispc<spec.size(); ispc++) {
218  TString vname = spec[ispc].GetExpression();
219  assignTree->Branch( vname, &fATreeEvent[vars.size()+tgts.size()+ispc], vname + "/F" );
220  }
221  return assignTree;
222 }
223 
224 ////////////////////////////////////////////////////////////////////////////////
225 /// add signal training event
226 
227 void TMVA::DataLoader::AddSignalTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
228 {
229  AddEvent( "Signal", Types::kTraining, event, weight );
230 }
231 
232 ////////////////////////////////////////////////////////////////////////////////
233 /// add signal testing event
234 
235 void TMVA::DataLoader::AddSignalTestEvent( const std::vector<Double_t>& event, Double_t weight )
236 {
237  AddEvent( "Signal", Types::kTesting, event, weight );
238 }
239 
240 ////////////////////////////////////////////////////////////////////////////////
241 /// add signal training event
242 
243 void TMVA::DataLoader::AddBackgroundTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
244 {
245  AddEvent( "Background", Types::kTraining, event, weight );
246 }
247 
248 ////////////////////////////////////////////////////////////////////////////////
249 /// add signal training event
250 
251 void TMVA::DataLoader::AddBackgroundTestEvent( const std::vector<Double_t>& event, Double_t weight )
252 {
253  AddEvent( "Background", Types::kTesting, event, weight );
254 }
255 
256 ////////////////////////////////////////////////////////////////////////////////
257 /// add signal training event
258 
259 void TMVA::DataLoader::AddTrainingEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
260 {
261  AddEvent( className, Types::kTraining, event, weight );
262 }
263 
264 ////////////////////////////////////////////////////////////////////////////////
265 /// add signal test event
266 
267 void TMVA::DataLoader::AddTestEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
268 {
269  AddEvent( className, Types::kTesting, event, weight );
270 }
271 
272 ////////////////////////////////////////////////////////////////////////////////
273 /// add event
274 /// vector event : the order of values is: variables + targets + spectators
275 
277  const std::vector<Double_t>& event, Double_t weight )
278 {
279  ClassInfo* theClass = DefaultDataSetInfo().AddClass(className); // returns class (creates it if necessary)
280  UInt_t clIndex = theClass->GetNumber();
281 
282 
283  // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
284  if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
285  fAnalysisType = Types::kMulticlass;
286 
287 
288  if (clIndex>=fTrainAssignTree.size()) {
289  fTrainAssignTree.resize(clIndex+1, 0);
290  fTestAssignTree.resize(clIndex+1, 0);
291  }
292 
293  if (fTrainAssignTree[clIndex]==0) { // does not exist yet
294  fTrainAssignTree[clIndex] = CreateEventAssignTrees( Form("TrainAssignTree_%s", className.Data()) );
295  fTestAssignTree[clIndex] = CreateEventAssignTrees( Form("TestAssignTree_%s", className.Data()) );
296  }
297 
298  fATreeType = clIndex;
299  fATreeWeight = weight;
300  for (UInt_t ivar=0; ivar<event.size(); ivar++) fATreeEvent[ivar] = event[ivar];
301 
302  if(tt==Types::kTraining) fTrainAssignTree[clIndex]->Fill();
303  else fTestAssignTree[clIndex]->Fill();
304 
305 }
306 
307 ////////////////////////////////////////////////////////////////////////////////
308 ///
309 
311 {
312  return fTrainAssignTree[clIndex]!=0;
313 }
314 
315 ////////////////////////////////////////////////////////////////////////////////
316 /// assign event-wise local trees to data set
317 
319 {
320  UInt_t size = fTrainAssignTree.size();
321  for(UInt_t i=0; i<size; i++) {
322  if(!UserAssignEvents(i)) continue;
323  const TString& className = DefaultDataSetInfo().GetClassInfo(i)->GetName();
324  SetWeightExpression( "weight", className );
325  AddTree(fTrainAssignTree[i], className, 1.0, TCut(""), Types::kTraining );
326  AddTree(fTestAssignTree[i], className, 1.0, TCut(""), Types::kTesting );
327  }
328 }
329 
330 ////////////////////////////////////////////////////////////////////////////////
331 /// number of signal events (used to compute significance)
332 
333 void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
334  const TCut& cut, const TString& treetype )
335 {
337  TString tmpTreeType = treetype; tmpTreeType.ToLower();
338  if (tmpTreeType.Contains( "train" ) && tmpTreeType.Contains( "test" )) tt = Types::kMaxTreeType;
339  else if (tmpTreeType.Contains( "train" )) tt = Types::kTraining;
340  else if (tmpTreeType.Contains( "test" )) tt = Types::kTesting;
341  else {
342  Log() << kFATAL << "<AddTree> cannot interpret tree type: \"" << treetype
343  << "\" should be \"Training\" or \"Test\" or \"Training and Testing\"" << Endl;
344  }
345  AddTree( tree, className, weight, cut, tt );
346 }
347 
348 ////////////////////////////////////////////////////////////////////////////////
349 
350 void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
351  const TCut& cut, Types::ETreeType tt )
352 {
353  if(!tree)
354  Log() << kFATAL << "Tree does not exist (empty pointer)." << Endl;
355 
356  DefaultDataSetInfo().AddClass( className );
357 
358  // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
359  if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
360  fAnalysisType = Types::kMulticlass;
361 
362  Log() << kINFO<< "Add Tree " << tree->GetName() << " of type " << className
363  << " with " << tree->GetEntries() << " events" << Endl;
364  DataInput().AddTree( tree, className, weight, cut, tt );
365 }
366 
367 ////////////////////////////////////////////////////////////////////////////////
368 /// number of signal events (used to compute significance)
369 
371 {
372  AddTree( signal, "Signal", weight, TCut(""), treetype );
373 }
374 
375 ////////////////////////////////////////////////////////////////////////////////
376 /// add signal tree from text file
377 
379 {
380  // create trees from these ascii files
381  TTree* signalTree = new TTree( "TreeS", "Tree (S)" );
382  signalTree->ReadFile( datFileS );
383 
384  Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Signal file : \""
385  << datFileS << Endl;
386 
387  // number of signal events (used to compute significance)
388  AddTree( signalTree, "Signal", weight, TCut(""), treetype );
389 }
390 
391 ////////////////////////////////////////////////////////////////////////////////
392 
393 void TMVA::DataLoader::AddSignalTree( TTree* signal, Double_t weight, const TString& treetype )
394 {
395  AddTree( signal, "Signal", weight, TCut(""), treetype );
396 }
397 
398 ////////////////////////////////////////////////////////////////////////////////
399 /// number of signal events (used to compute significance)
400 
402 {
403  AddTree( signal, "Background", weight, TCut(""), treetype );
404 }
405 
406 ////////////////////////////////////////////////////////////////////////////////
407 /// add background tree from text file
408 
410 {
411  // create trees from these ascii files
412  TTree* bkgTree = new TTree( "TreeB", "Tree (B)" );
413  bkgTree->ReadFile( datFileB );
414 
415  Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Background file : \""
416  << datFileB << Endl;
417 
418  // number of signal events (used to compute significance)
419  AddTree( bkgTree, "Background", weight, TCut(""), treetype );
420 }
421 
422 ////////////////////////////////////////////////////////////////////////////////
423 
424 void TMVA::DataLoader::AddBackgroundTree( TTree* signal, Double_t weight, const TString& treetype )
425 {
426  AddTree( signal, "Background", weight, TCut(""), treetype );
427 }
428 
429 ////////////////////////////////////////////////////////////////////////////////
430 
432 {
433  AddTree( tree, "Signal", weight );
434 }
435 
436 ////////////////////////////////////////////////////////////////////////////////
437 
439 {
440  AddTree( tree, "Background", weight );
441 }
442 
443 ////////////////////////////////////////////////////////////////////////////////
444 /// set background tree
445 
446 void TMVA::DataLoader::SetTree( TTree* tree, const TString& className, Double_t weight )
447 {
448  AddTree( tree, className, weight, TCut(""), Types::kMaxTreeType );
449 }
450 
451 ////////////////////////////////////////////////////////////////////////////////
452 /// define the input trees for signal and background; no cuts are applied
453 
454 void TMVA::DataLoader::SetInputTrees( TTree* signal, TTree* background,
455  Double_t signalWeight, Double_t backgroundWeight )
456 {
457  AddTree( signal, "Signal", signalWeight, TCut(""), Types::kMaxTreeType );
458  AddTree( background, "Background", backgroundWeight, TCut(""), Types::kMaxTreeType );
459 }
460 
461 ////////////////////////////////////////////////////////////////////////////////
462 
463 void TMVA::DataLoader::SetInputTrees( const TString& datFileS, const TString& datFileB,
464  Double_t signalWeight, Double_t backgroundWeight )
465 {
466  DataInput().AddTree( datFileS, "Signal", signalWeight );
467  DataInput().AddTree( datFileB, "Background", backgroundWeight );
468 }
469 
470 ////////////////////////////////////////////////////////////////////////////////
471 /// define the input trees for signal and background from single input tree,
472 /// containing both signal and background events distinguished by the type
473 /// identifiers: SigCut and BgCut
474 
475 void TMVA::DataLoader::SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut )
476 {
477  AddTree( inputTree, "Signal", 1.0, SigCut, Types::kMaxTreeType );
478  AddTree( inputTree, "Background", 1.0, BgCut , Types::kMaxTreeType );
479 }
480 
481 ////////////////////////////////////////////////////////////////////////////////
482 /// user inserts discriminating variable in data set info
483 
484 void TMVA::DataLoader::AddVariable( const TString& expression, const TString& title, const TString& unit,
485  char type, Double_t min, Double_t max )
486 {
487  DefaultDataSetInfo().AddVariable( expression, title, unit, min, max, type );
488 }
489 
490 ////////////////////////////////////////////////////////////////////////////////
491 /// user inserts discriminating variable in data set info
492 
493 void TMVA::DataLoader::AddVariable( const TString& expression, char type,
494  Double_t min, Double_t max )
495 {
496  DefaultDataSetInfo().AddVariable( expression, "", "", min, max, type );
497 }
498 
499 ////////////////////////////////////////////////////////////////////////////////
500 /// user inserts discriminating array of variables in data set info
501 /// in case input tree provides an array of values
502 
503 void TMVA::DataLoader::AddVariablesArray(const TString &expression, int size, char type,
504  Double_t min, Double_t max)
505 {
506  DefaultDataSetInfo().AddVariablesArray(expression, size, "", "", min, max, type);
507 }
508 ////////////////////////////////////////////////////////////////////////////////
509 /// user inserts target in data set info
510 
511 void TMVA::DataLoader::AddTarget( const TString& expression, const TString& title, const TString& unit,
512  Double_t min, Double_t max )
513 {
514  if( fAnalysisType == Types::kNoAnalysisType )
515  fAnalysisType = Types::kRegression;
516 
517  DefaultDataSetInfo().AddTarget( expression, title, unit, min, max );
518 }
519 
520 ////////////////////////////////////////////////////////////////////////////////
521 /// user inserts target in data set info
522 
523 void TMVA::DataLoader::AddSpectator( const TString& expression, const TString& title, const TString& unit,
524  Double_t min, Double_t max )
525 {
526  DefaultDataSetInfo().AddSpectator( expression, title, unit, min, max );
527 }
528 
529 ////////////////////////////////////////////////////////////////////////////////
530 /// default creation
531 
533 {
534  return AddDataSet( fName );
535 }
536 
537 ////////////////////////////////////////////////////////////////////////////////
538 /// fill input variables in data set
539 
540 void TMVA::DataLoader::SetInputVariables( std::vector<TString>* theVariables )
541 {
542  for (std::vector<TString>::iterator it=theVariables->begin();
543  it!=theVariables->end(); ++it) AddVariable(*it);
544 }
545 
546 ////////////////////////////////////////////////////////////////////////////////
547 
549 {
550  DefaultDataSetInfo().SetWeightExpression(variable, "Signal");
551 }
552 
553 ////////////////////////////////////////////////////////////////////////////////
554 
556 {
557  DefaultDataSetInfo().SetWeightExpression(variable, "Background");
558 }
559 
560 ////////////////////////////////////////////////////////////////////////////////
561 
562 void TMVA::DataLoader::SetWeightExpression( const TString& variable, const TString& className )
563 {
564  //Log() << kWarning << DefaultDataSetInfo().GetNClasses() /*fClasses.size()*/ << Endl;
565  if (className=="") {
566  SetSignalWeightExpression(variable);
567  SetBackgroundWeightExpression(variable);
568  }
569  else DefaultDataSetInfo().SetWeightExpression( variable, className );
570 }
571 
572 ////////////////////////////////////////////////////////////////////////////////
573 
574 void TMVA::DataLoader::SetCut( const TString& cut, const TString& className ) {
575  SetCut( TCut(cut), className );
576 }
577 
578 ////////////////////////////////////////////////////////////////////////////////
579 
580 void TMVA::DataLoader::SetCut( const TCut& cut, const TString& className )
581 {
582  DefaultDataSetInfo().SetCut( cut, className );
583 }
584 
585 ////////////////////////////////////////////////////////////////////////////////
586 
587 void TMVA::DataLoader::AddCut( const TString& cut, const TString& className )
588 {
589  AddCut( TCut(cut), className );
590 }
591 
592 ////////////////////////////////////////////////////////////////////////////////
593 void TMVA::DataLoader::AddCut( const TCut& cut, const TString& className )
594 {
595  DefaultDataSetInfo().AddCut( cut, className );
596 }
597 
598 ////////////////////////////////////////////////////////////////////////////////
599 /// prepare the training and test trees
600 
602  Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest,
603  const TString& otherOpt )
604 {
605  SetInputTreesFromEventAssignTrees();
606 
607  AddCut( cut );
608 
609  DefaultDataSetInfo().SetSplitOptions( Form("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:%s",
610  NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.Data()) );
611 }
612 
613 ////////////////////////////////////////////////////////////////////////////////
614 /// prepare the training and test trees
615 /// kept for backward compatibility
616 
618 {
619  SetInputTreesFromEventAssignTrees();
620 
621  AddCut( cut );
622 
623  DefaultDataSetInfo().SetSplitOptions( Form("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:EqualTrainSample:!V",
624  Ntrain, Ntrain, Ntest, Ntest) );
625 }
626 
627 ////////////////////////////////////////////////////////////////////////////////
628 /// prepare the training and test trees
629 /// -> same cuts for signal and background
630 
632 {
633  SetInputTreesFromEventAssignTrees();
634 
635  DefaultDataSetInfo().PrintClasses();
636  AddCut( cut );
637  DefaultDataSetInfo().SetSplitOptions( opt );
638 }
639 
640 ////////////////////////////////////////////////////////////////////////////////
641 /// prepare the training and test trees
642 
643 void TMVA::DataLoader::PrepareTrainingAndTestTree( TCut sigcut, TCut bkgcut, const TString& splitOpt )
644 {
645  // if event-wise data assignment, add local trees to dataset first
646  SetInputTreesFromEventAssignTrees();
647 
648  //Log() << kINFO <<"Preparing trees for training and testing..."<< Endl;
649  AddCut( sigcut, "Signal" );
650  AddCut( bkgcut, "Background" );
651 
652  DefaultDataSetInfo().SetSplitOptions( splitOpt );
653 }
654 
655 ////////////////////////////////////////////////////////////////////////////////
656 /// Function required to split the training and testing datasets into a
657 /// number of folds. Required by the CrossValidation and HyperParameterOptimisation
658 /// classes. The option to split the training dataset into a training set and
659 /// a validation set is implemented but not currently used.
660 
662 {
663  s.MakeKFoldDataSet( DefaultDataSetInfo() );
664 }
665 
666 ////////////////////////////////////////////////////////////////////////////////
667 /// Function for assigning the correct folds to the testing or training set.
668 
670 {
671  s.PrepareFoldDataSet( DefaultDataSetInfo(), foldNumber, tt );
672 }
673 
674 
675 ////////////////////////////////////////////////////////////////////////////////
676 /// Recombines the dataset. The precise semantics depend on the actual split.
677 ///
678 /// Similar to the inverse operation of `MakeKFoldDataSet` but _will_ differ.
679 /// See documentation for each particular split for more information.
680 ///
681 
683 {
684  s.RecombineKFoldDataSet( DefaultDataSetInfo(), tt );
685 }
686 
687 ////////////////////////////////////////////////////////////////////////////////
688 /// Copy method use in VI and CV
689 
691 {
693  DataLoaderCopy(des,this);
694  return des;
695 }
696 
697 ////////////////////////////////////////////////////////////////////////////////
698 ///Loading Dataset from DataInputHandler for subseed
699 
701 {
702  for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Sbegin();treeinfo!=src->DataInput().Send();++treeinfo)
703  {
704  des->AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
705  }
706 
707  for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Bbegin();treeinfo!=src->DataInput().Bend();++treeinfo)
708  {
709  des->AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
710  }
711 }
712 
713 ////////////////////////////////////////////////////////////////////////////////
714 /// returns the correlation matrix of datasets
715 
717 {
718  const TMatrixD * m = DefaultDataSetInfo().CorrelationMatrix(className);
719  return DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
720  "CorrelationMatrix"+className, "Correlation Matrix ("+className+")");
721 }
TMVA::DataLoader::SetBackgroundTree
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
Definition: DataLoader.cxx:438
m
auto * m
Definition: textangle.C:8
TMVA::DataLoader::AddVariablesArray
void AddVariablesArray(const TString &expression, int size, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating array of variables in data set info in case input tree provides an array ...
Definition: DataLoader.cxx:503
TMVA::DataLoader::fDataInputHandler
DataInputHandler * fDataInputHandler
Definition: DataLoader.h:189
TMVA::DataLoader::AddBackgroundTrainingEvent
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:243
TCut
A specialized string object used for TTree selections.
Definition: TCut.h:25
TMVA::DataLoader::AddEvent
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators
Definition: DataLoader.cxx:276
VariablePCATransform.h
TMVA::DataLoader::DataLoader
DataLoader(TString thedlName="default")
Definition: DataLoader.cxx:80
TMVA::Types::kMulticlass
@ kMulticlass
Definition: Types.h:131
TMVA::DataLoader::SetCut
void SetCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:574
DataSetManager.h
tt
auto * tt
Definition: textangle.C:16
TMVA::DataInputHandler::Sbegin
std::vector< TreeInfo >::const_iterator Sbegin() const
Definition: DataInputHandler.h:113
TMVA::Configurable
Definition: Configurable.h:45
TMVA::DataLoader::PrepareTrainingAndTestTree
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: DataLoader.cxx:631
TNamed::SetName
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition: TNamed.cxx:140
TMVA::Types::kRegression
@ kRegression
Definition: Types.h:130
TMVA::DataLoader::AddSignalTestEvent
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
Definition: DataLoader.cxx:235
TMVA::ClassInfo
Class that contains all the information of a class.
Definition: ClassInfo.h:49
TMVA::DataInputHandler
Class that contains all the data information.
Definition: DataInputHandler.h:78
TMVA::DataLoader::MakeCopy
DataLoader * MakeCopy(TString name)
Copy method use in VI and CV.
Definition: DataLoader.cxx:690
TString::Data
const char * Data() const
Definition: TString.h:369
tree
Definition: tree.py:1
DataSetInfo.h
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
Form
char * Form(const char *fmt,...)
TMVA::DataLoaderCopy
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
TMVA::DataLoader::SetSignalWeightExpression
void SetSignalWeightExpression(const TString &variable)
Definition: DataLoader.cxx:548
IMethod.h
TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:760
TMVA::DataLoader::AddTree
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
Definition: DataLoader.cxx:350
TMVA::Types::kTesting
@ kTesting
Definition: Types.h:146
TString::Atof
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:2007
TTree
A TTree represents a columnar dataset.
Definition: TTree.h:79
DataLoader.h
VariableDecorrTransform.h
VariableInfo.h
TGeant4Unit::s
static constexpr double s
Definition: TGeant4SystemOfUnits.h:162
VariableIdentityTransform.h
TString::Contains
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:624
TMVA::DataInputHandler::Bend
std::vector< TreeInfo >::const_iterator Bend() const
Definition: DataInputHandler.h:116
MethodBase.h
TMVA::DataLoader::SetTree
void SetTree(TTree *tree, const TString &className, Double_t weight)
set background tree
Definition: DataLoader.cxx:446
TMVA::DataLoader::SetWeightExpression
void SetWeightExpression(const TString &variable, const TString &className="")
Definition: DataLoader.cxx:562
TString::IsFloat
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1811
VarTransformHandler.h
CvSplit.h
TTree.h
TMVA::DataLoader::AddSignalTree
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:370
TString
Basic string class.
Definition: TString.h:136
TMatrixT< Double_t >
bool
TMVA::DataLoader::AddSpectator
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:523
TMVA::VarTransformHandler
Definition: VarTransformHandler.h:41
TMVA::DataLoader::AddTarget
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:511
TMVA::DataLoader::SetInputTreesFromEventAssignTrees
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
Definition: DataLoader.cxx:318
TMVA::DataLoader::UserAssignEvents
Bool_t UserAssignEvents(UInt_t clIndex)
Definition: DataLoader.cxx:310
TMVA::DataSetInfo
Class that contains all the data information.
Definition: DataSetInfo.h:62
TMVA::DataLoader::SetBackgroundWeightExpression
void SetBackgroundWeightExpression(const TString &variable)
Definition: DataLoader.cxx:555
DataInputHandler.h
MethodBoost.h
TTree::Branch
TBranch * Branch(const char *name, T *obj, Int_t bufsize=32000, Int_t splitlevel=99)
Add a new branch, and infer the data type from the type of obj being passed.
Definition: TTree.h:349
TMVA::Types::ETreeType
ETreeType
Definition: Types.h:144
TString::Remove
TString & Remove(Ssiz_t pos)
Definition: TString.h:673
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:92
TMVA::DataLoader::AddBackgroundTestEvent
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:251
TH2
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
VariableGaussTransform.h
TMVA::CvSplit
Definition: CvSplit.h:37
TMVA::DataLoader::SetInputTrees
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
Definition: DataLoader.cxx:463
TMVA::DataLoader::GetCorrelationMatrix
TH2 * GetCorrelationMatrix(const TString &className)
returns the correlation matrix of datasets
Definition: DataLoader.cxx:716
VariableNormalizeTransform.h
TMVA::ClassInfo::GetNumber
UInt_t GetNumber() const
Definition: ClassInfo.h:65
TTree::SetDirectory
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
Definition: TTree.cxx:8805
TH2.h
TMVA::DataLoader::PrepareFoldDataSet
void PrepareFoldDataSet(CvSplit &s, UInt_t foldNumber, Types::ETreeType tt=Types::kTraining)
Function for assigning the correct folds to the testing or training set.
Definition: DataLoader.cxx:669
TMVA::Types::kMaxTreeType
@ kMaxTreeType
Definition: Types.h:147
TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Config.h
unsigned int
TMVA::DataLoader::AddCut
void AddCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:587
TMVA::DataLoader::CreateEventAssignTrees
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
Definition: DataLoader.cxx:194
TTree::ReadFile
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter=' ')
Create or simply read branches from filename.
Definition: TTree.cxx:7459
TMVA::Types::kTraining
@ kTraining
Definition: Types.h:145
TString::Index
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:639
TMVA::DataSetManager
Class that contains all the data information.
Definition: DataSetManager.h:51
TMVA::DataLoader::SetSignalTree
void SetSignalTree(TTree *signal, Double_t weight=1.0)
Definition: DataLoader.cxx:431
Double_t
double Double_t
Definition: RtypesCore.h:59
TMVA::DataLoader::fDataSetManager
DataSetManager * fDataSetManager
Definition: DataLoader.h:186
TMVA::VarTransformHandler::VarianceThreshold
TMVA::DataLoader * VarianceThreshold(Double_t threshold)
Computes variance of all the variables and returns a new DataLoader with the selected variables whose...
Definition: VarTransformHandler.cxx:84
TMVA::DataLoader::DataInput
DataInputHandler & DataInput()
Definition: DataLoader.h:172
TMVA::Configurable::fLogger
MsgLogger * fLogger
Definition: Configurable.h:128
name
char name[80]
Definition: TGX11.cxx:110
TMVA::DataLoader::RecombineKFoldDataSet
void RecombineKFoldDataSet(CvSplit &s, Types::ETreeType tt=Types::kTraining)
Recombines the dataset.
Definition: DataLoader.cxx:682
TMVA::MsgLogger::SetSource
void SetSource(const std::string &source)
Definition: MsgLogger.h:70
TMVA::DataInputHandler::Send
std::vector< TreeInfo >::const_iterator Send() const
Definition: DataInputHandler.h:114
TMVA::DataInputHandler::Bbegin
std::vector< TreeInfo >::const_iterator Bbegin() const
Definition: DataInputHandler.h:115
TMVA::DataLoader::AddBackgroundTree
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:401
Tools.h
TMVA::DataLoader::AddDataSet
DataSetInfo & AddDataSet(DataSetInfo &)
Definition: DataLoader.cxx:119
type
int type
Definition: TGX11.cxx:121
TMVA::DataLoader::AddVariable
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: DataLoader.cxx:484
launcher.spec
spec
Definition: launcher.py:30
TMatrixD.h
TString::ToLower
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1145
TMVA::DataLoader::MakeKFoldDataSet
void MakeKFoldDataSet(CvSplit &s)
Function required to split the training and testing datasets into a number of folds.
Definition: DataLoader.cxx:661
TMVA::DataLoader::SetInputVariables
void SetInputVariables(std::vector< TString > *theVariables)
fill input variables in data set
Definition: DataLoader.cxx:540
MethodCategory.h
TMVA::DataLoader::GetDataSetInfo
DataSetInfo & GetDataSetInfo()
Definition: DataLoader.cxx:137
TMVA::DataLoader::~DataLoader
virtual ~DataLoader()
Definition: DataLoader.cxx:96
TMVA::DataLoader::AddTestEvent
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
Definition: DataLoader.cxx:267
TMVA::DataLoader::VarTransform
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
Definition: DataLoader.cxx:146
TMVA::Types::kNoAnalysisType
@ kNoAnalysisType
Definition: Types.h:132
int
TMVA::DataLoader::AddSignalTrainingEvent
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:227
TMVA::DataLoader
Definition: DataLoader.h:50
TMVA::DataLoader::DefaultDataSetInfo
DataSetInfo & DefaultDataSetInfo()
default creation
Definition: DataLoader.cxx:532
TMVA::DataLoader::AddTrainingEvent
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
Definition: DataLoader.cxx:259