Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
DataLoader.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Omar Zapata
3// Mentors: Lorenzo Moneta, Sergei Gleyzer
4//NOTE: Based on TMVA::Factory
5
6/**********************************************************************************
7 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
8 * Package: TMVA *
9 * Class : DataLoader *
10 * *
11 * *
12 * Description: *
13 * This is a class to load datasets into every booked method *
14 * *
15 * Authors (alphabetical): *
16 * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
17 * Omar Zapata <Omar.Zapata@cern.ch> - ITM/UdeA, Colombia *
18 * Sergei Gleyzer<sergei.gleyzer@cern.ch> - CERN, Switzerland *
19 * *
20 * Copyright (c) 2005-2015: *
21 * CERN, Switzerland *
22 * ITM/UdeA, Colombia *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (see tmva/doc/LICENSE) *
27 **********************************************************************************/
28
29
30/*! \class TMVA::DataLoader
31\ingroup TMVA
32
33*/
34
35#include "TTree.h"
36#include "TH2.h"
37#include "TMatrixD.h"
38
39#include "TMVA/DataLoader.h"
40#include "TMVA/Config.h"
41#include "TMVA/CvSplit.h"
42#include "TMVA/Tools.h"
43#include "TMVA/IMethod.h"
44#include "TMVA/MethodBase.h"
46#include "TMVA/DataSetManager.h"
47#include "TMVA/DataSetInfo.h"
48#include "TMVA/MethodBoost.h"
49#include "TMVA/MethodCategory.h"
50
51#include "TMVA/VariableInfo.h"
58
59
60////////////////////////////////////////////////////////////////////////////////
61/*** Create a data loader
62 \param[in] thedlName name of DataLoader object. This name will be used as the
63 top directory name where the training results
64 (weights, i.e .XML and .C files) will be stored.
65 The results will be stored by default in the `theDlName/weights`
66 directory and relative to the current directory. If the directory is not existing,
67 a new one will be created automatically.
68 For using a different location (i.e. a different path to the current directory) one
69 can set an absolute path location in `TMVA::gConfig()::GetIONames().fWeightFileDirPrefix`
70 For example, by setting
71~~~~~~~~~~~~~~~{.cpp}
72 TMVA::gConfig()::GetIONames().fWeightFileDirPrefix = "/tmp";
73 TMVA::gConfig()::GetIONames().fWeightFileDir = "myTrainingResults";
74~~~~~~~~~~~~~~~
75 The training results will be stored in the `/tmp/thedlName/myTrainingResults`
76 directory.
77**/
78
80: Configurable( ),
81 fDataSetManager ( NULL ), //DSMTEST
82 fDataInputHandler ( new DataInputHandler ),
83 fTransformations ( "I" ),
84 fVerbose ( kFALSE ),
85 fDataAssignType ( kAssignEvents ),
86 fATreeEvent (0)
87{
89 SetName(thedlName.Data());
90 fLogger->SetSource("DataLoader");
91}
92
93////////////////////////////////////////////////////////////////////////////////
94
96{
97 // destructor
98
99 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
100 for (;trfIt != fDefaultTrfs.end(); ++trfIt) delete (*trfIt);
101
102 delete fDataInputHandler;
103
104 // destroy singletons
105 // DataSetManager::DestroyInstance(); // DSMTEST replaced by following line
106 delete fDataSetManager; // DSMTEST
107
108 // problem with call of REGISTER_METHOD macro ...
109 // ClassifierDataLoader::DestroyInstance();
110 // Types::DestroyInstance();
111 //Tools::DestroyInstance();
112 //Config::DestroyInstance();
113}
114
115
116////////////////////////////////////////////////////////////////////////////////
117
119{
120 return fDataSetManager->AddDataSetInfo(dsi); // DSMTEST
121}
122
123////////////////////////////////////////////////////////////////////////////////
124
126{
127 DataSetInfo* dsi = fDataSetManager->GetDataSetInfo(dsiName); // DSMTEST
128
129 if (dsi!=0) return *dsi;
130
131 return fDataSetManager->AddDataSetInfo(*(new DataSetInfo(dsiName))); // DSMTEST
132}
133
134////////////////////////////////////////////////////////////////////////////////
135
137{
138 return DefaultDataSetInfo(); // DSMTEST
139}
140
141////////////////////////////////////////////////////////////////////////////////
142/// Transforms the variables and return a new DataLoader with the transformed
143/// variables
144
146{
147 TString trOptions = "0";
148 TString trName = "None";
149 if (trafoDefinition.Contains("(")) {
150
151 // contains transformation parameters
152 Ssiz_t parStart = trafoDefinition.Index( "(" );
154
157 trOptions.Remove(parLen-1,1);
158 trOptions.Remove(0,1);
159 }
160 else
162
163 VarTransformHandler* handler = new VarTransformHandler(this);
164 // variance threshold variable transformation
165 if (trName == "VT") {
166
167 // find threshold value from given input
168 Double_t threshold = 0.0;
169 if (!trOptions.IsFloat()){
170 Log() << kFATAL << " VT transformation must be passed a floating threshold value" << Endl;
171 delete handler;
172 return this;
173 }
174 else
175 threshold = trOptions.Atof();
177 delete handler;
178 return transformedLoader;
179 }
180 else {
181 delete handler;
182 Log() << kFATAL << "Incorrect transformation string provided, please check" << Endl;
183 }
184 Log() << kINFO << "No transformation applied, returning original loader" << Endl;
185 return this;
186}
187
188////////////////////////////////////////////////////////////////////////////////
189// the next functions are to assign events directly
190
191////////////////////////////////////////////////////////////////////////////////
192/// create the data assignment tree (for event-wise data assignment by user)
193
195{
196 TTree * assignTree = new TTree( name, name );
197 assignTree->SetDirectory(nullptr);
198 assignTree->Branch( "type", &fATreeType, "ATreeType/I" );
199 assignTree->Branch( "weight", &fATreeWeight, "ATreeWeight/F" );
200
201 std::vector<VariableInfo>& vars = DefaultDataSetInfo().GetVariableInfos();
202 std::vector<VariableInfo>& tgts = DefaultDataSetInfo().GetTargetInfos();
203 std::vector<VariableInfo>& spec = DefaultDataSetInfo().GetSpectatorInfos();
204
205 if (fATreeEvent.size()==0) fATreeEvent.resize(vars.size()+tgts.size()+spec.size());
206 // add variables
207 for (UInt_t ivar=0; ivar<vars.size(); ivar++) {
208 TString vname = vars[ivar].GetExpression();
209 assignTree->Branch( vname, &fATreeEvent[ivar], vname + "/F" );
210 }
211 // add targets
212 for (UInt_t itgt=0; itgt<tgts.size(); itgt++) {
213 TString vname = tgts[itgt].GetExpression();
214 assignTree->Branch( vname, &fATreeEvent[vars.size()+itgt], vname + "/F" );
215 }
216 // add spectators
217 for (UInt_t ispc=0; ispc<spec.size(); ispc++) {
218 TString vname = spec[ispc].GetExpression();
219 assignTree->Branch( vname, &fATreeEvent[vars.size()+tgts.size()+ispc], vname + "/F" );
220 }
221 return assignTree;
222}
223
224////////////////////////////////////////////////////////////////////////////////
225/// add signal training event
226
227void TMVA::DataLoader::AddSignalTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
228{
229 AddEvent( "Signal", Types::kTraining, event, weight );
230}
231
232////////////////////////////////////////////////////////////////////////////////
233/// add signal testing event
234
235void TMVA::DataLoader::AddSignalTestEvent( const std::vector<Double_t>& event, Double_t weight )
236{
237 AddEvent( "Signal", Types::kTesting, event, weight );
238}
239
240////////////////////////////////////////////////////////////////////////////////
241/// add signal training event
242
243void TMVA::DataLoader::AddBackgroundTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
244{
245 AddEvent( "Background", Types::kTraining, event, weight );
246}
247
248////////////////////////////////////////////////////////////////////////////////
249/// add signal training event
250
251void TMVA::DataLoader::AddBackgroundTestEvent( const std::vector<Double_t>& event, Double_t weight )
252{
253 AddEvent( "Background", Types::kTesting, event, weight );
254}
255
256////////////////////////////////////////////////////////////////////////////////
257/// add signal training event
258
259void TMVA::DataLoader::AddTrainingEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
260{
261 AddEvent( className, Types::kTraining, event, weight );
262}
263
264////////////////////////////////////////////////////////////////////////////////
265/// add signal test event
266
267void TMVA::DataLoader::AddTestEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
268{
269 AddEvent( className, Types::kTesting, event, weight );
270}
271
272////////////////////////////////////////////////////////////////////////////////
273/// add event
274/// vector event : the order of values is: variables + targets + spectators
275
277 const std::vector<Double_t>& event, Double_t weight )
278{
279 ClassInfo* theClass = DefaultDataSetInfo().AddClass(className); // returns class (creates it if necessary)
280 UInt_t clIndex = theClass->GetNumber();
281
282
283 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
284 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
285 fAnalysisType = Types::kMulticlass;
286
287
288 if (clIndex>=fTrainAssignTree.size()) {
289 fTrainAssignTree.resize(clIndex+1, 0);
290 fTestAssignTree.resize(clIndex+1, 0);
291 }
292
293 if (fTrainAssignTree[clIndex]==0) { // does not exist yet
294 fTrainAssignTree[clIndex] = CreateEventAssignTrees( TString::Format("TrainAssignTree_%s", className.Data()).Data() );
295 fTestAssignTree[clIndex] = CreateEventAssignTrees( TString::Format("TestAssignTree_%s", className.Data()).Data() );
296 }
297
298 fATreeType = clIndex;
299 fATreeWeight = weight;
300 if (event.size() > fATreeEvent.size()) {
301 Error("AddEvent",
302 "Number of variables defined through DataLoader::AddVariable (%zu) is inconsistent"
303 " with number of variables given to DataLoader::Add*Event (%zu)."
304 " Please check your variable definitions and statement ordering."
305 " This event will not be added.", fATreeEvent.size(), event.size());
306 return;
307 }
308 for (UInt_t ivar=0; ivar<event.size(); ivar++) fATreeEvent[ivar] = event[ivar];
309
310 if(tt==Types::kTraining) fTrainAssignTree[clIndex]->Fill();
311 else fTestAssignTree[clIndex]->Fill();
312
313}
314
315////////////////////////////////////////////////////////////////////////////////
316///
317
319{
320 return fTrainAssignTree[clIndex]!=0;
321}
322
323////////////////////////////////////////////////////////////////////////////////
324/// assign event-wise local trees to data set
325
327{
328 UInt_t size = fTrainAssignTree.size();
329 for(UInt_t i=0; i<size; i++) {
330 if(!UserAssignEvents(i)) continue;
331 const TString& className = DefaultDataSetInfo().GetClassInfo(i)->GetName();
332 SetWeightExpression( "weight", className );
333 AddTree(fTrainAssignTree[i], className, 1.0, TCut(""), Types::kTraining );
334 AddTree(fTestAssignTree[i], className, 1.0, TCut(""), Types::kTesting );
335 }
336}
337
338////////////////////////////////////////////////////////////////////////////////
339/// number of signal events (used to compute significance)
340
341void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
342 const TCut& cut, const TString& treetype )
343{
346 if (tmpTreeType.Contains( "train" ) && tmpTreeType.Contains( "test" )) tt = Types::kMaxTreeType;
347 else if (tmpTreeType.Contains( "train" )) tt = Types::kTraining;
348 else if (tmpTreeType.Contains( "test" )) tt = Types::kTesting;
349 else {
350 Log() << kFATAL << "<AddTree> cannot interpret tree type: \"" << treetype
351 << "\" should be \"Training\" or \"Test\" or \"Training and Testing\"" << Endl;
352 }
353 AddTree( tree, className, weight, cut, tt );
354}
355
356////////////////////////////////////////////////////////////////////////////////
357
358void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
359 const TCut& cut, Types::ETreeType tt )
360{
361 if(!tree)
362 Log() << kFATAL << "Tree does not exist (empty pointer)." << Endl;
363
364 DefaultDataSetInfo().AddClass( className );
365
366 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
367 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
368 fAnalysisType = Types::kMulticlass;
369
370 Log() << kINFO<< "Add Tree " << tree->GetName() << " of type " << className
371 << " with " << tree->GetEntries() << " events" << Endl;
372 DataInput().AddTree( tree, className, weight, cut, tt );
373}
374
375////////////////////////////////////////////////////////////////////////////////
376/// number of signal events (used to compute significance)
377
379{
380 AddTree( signal, "Signal", weight, TCut(""), treetype );
381}
382
383////////////////////////////////////////////////////////////////////////////////
384/// add signal tree from text file
385
387{
388 // create trees from these ascii files
389 TTree* signalTree = new TTree( "TreeS", "Tree (S)" );
390 signalTree->ReadFile( datFileS );
391
392 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Signal file : \""
393 << datFileS << Endl;
394
395 // number of signal events (used to compute significance)
396 AddTree( signalTree, "Signal", weight, TCut(""), treetype );
397}
398
399////////////////////////////////////////////////////////////////////////////////
400
402{
403 AddTree( signal, "Signal", weight, TCut(""), treetype );
404}
405
406////////////////////////////////////////////////////////////////////////////////
407/// number of signal events (used to compute significance)
408
410{
411 AddTree( signal, "Background", weight, TCut(""), treetype );
412}
413
414////////////////////////////////////////////////////////////////////////////////
415/// add background tree from text file
416
418{
419 // create trees from these ascii files
420 TTree* bkgTree = new TTree( "TreeB", "Tree (B)" );
421 bkgTree->ReadFile( datFileB );
422
423 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Background file : \""
424 << datFileB << Endl;
425
426 // number of signal events (used to compute significance)
427 AddTree( bkgTree, "Background", weight, TCut(""), treetype );
428}
429
430////////////////////////////////////////////////////////////////////////////////
431
433{
434 AddTree( signal, "Background", weight, TCut(""), treetype );
435}
436
437////////////////////////////////////////////////////////////////////////////////
438
440{
441 AddTree( tree, "Signal", weight );
442}
443
444////////////////////////////////////////////////////////////////////////////////
445
447{
448 AddTree( tree, "Background", weight );
449}
450
451////////////////////////////////////////////////////////////////////////////////
452/// set background tree
453
454void TMVA::DataLoader::SetTree( TTree* tree, const TString& className, Double_t weight )
455{
456 AddTree( tree, className, weight, TCut(""), Types::kMaxTreeType );
457}
458
459////////////////////////////////////////////////////////////////////////////////
460/// define the input trees for signal and background; no cuts are applied
461
468
469////////////////////////////////////////////////////////////////////////////////
470
473{
474 DataInput().AddTree( datFileS, "Signal", signalWeight );
475 DataInput().AddTree( datFileB, "Background", backgroundWeight );
476}
477
478////////////////////////////////////////////////////////////////////////////////
479/// define the input trees for signal and background from single input tree,
480/// containing both signal and background events distinguished by the type
481/// identifiers: SigCut and BgCut
482
484{
485 AddTree( inputTree, "Signal", 1.0, SigCut, Types::kMaxTreeType );
486 AddTree( inputTree, "Background", 1.0, BgCut , Types::kMaxTreeType );
487}
488
489////////////////////////////////////////////////////////////////////////////////
490/// user inserts discriminating variable in data set info
491
492void TMVA::DataLoader::AddVariable( const TString& expression, const TString& title, const TString& unit,
493 char type, Double_t min, Double_t max )
494{
495 DefaultDataSetInfo().AddVariable( expression, title, unit, min, max, type );
496}
497
498////////////////////////////////////////////////////////////////////////////////
499/// user inserts discriminating variable in data set info
500
501void TMVA::DataLoader::AddVariable( const TString& expression, char type,
502 Double_t min, Double_t max )
503{
504 DefaultDataSetInfo().AddVariable( expression, "", "", min, max, type );
505}
506
507////////////////////////////////////////////////////////////////////////////////
508/// user inserts discriminating array of variables in data set info
509/// in case input tree provides an array of values
510
511void TMVA::DataLoader::AddVariablesArray(const TString &expression, int size, char type,
512 Double_t min, Double_t max)
513{
514 DefaultDataSetInfo().AddVariablesArray(expression, size, "", "", min, max, type);
515}
516////////////////////////////////////////////////////////////////////////////////
517/// user inserts target in data set info
518
519void TMVA::DataLoader::AddTarget( const TString& expression, const TString& title, const TString& unit,
520 Double_t min, Double_t max )
521{
522 if( fAnalysisType == Types::kNoAnalysisType )
523 fAnalysisType = Types::kRegression;
524
525 DefaultDataSetInfo().AddTarget( expression, title, unit, min, max );
526}
527
528////////////////////////////////////////////////////////////////////////////////
529/// user inserts target in data set info
530
531void TMVA::DataLoader::AddSpectator( const TString& expression, const TString& title, const TString& unit,
532 Double_t min, Double_t max )
533{
534 DefaultDataSetInfo().AddSpectator( expression, title, unit, min, max );
535}
536
537////////////////////////////////////////////////////////////////////////////////
538/// default creation
539
541{
542 return AddDataSet( fName );
543}
544
545////////////////////////////////////////////////////////////////////////////////
546/// fill input variables in data set
547
549{
550 for (std::vector<TString>::iterator it=theVariables->begin();
551 it!=theVariables->end(); ++it) AddVariable(*it);
552}
553
554////////////////////////////////////////////////////////////////////////////////
555
557{
558 DefaultDataSetInfo().SetWeightExpression(variable, "Signal");
559}
560
561////////////////////////////////////////////////////////////////////////////////
562
564{
565 DefaultDataSetInfo().SetWeightExpression(variable, "Background");
566}
567
568////////////////////////////////////////////////////////////////////////////////
569
570void TMVA::DataLoader::SetWeightExpression( const TString& variable, const TString& className )
571{
572 //Log() << kWarning << DefaultDataSetInfo().GetNClasses() /*fClasses.size()*/ << Endl;
573 if (className=="") {
574 SetSignalWeightExpression(variable);
575 SetBackgroundWeightExpression(variable);
576 }
577 else DefaultDataSetInfo().SetWeightExpression( variable, className );
578}
579
580////////////////////////////////////////////////////////////////////////////////
581
582void TMVA::DataLoader::SetCut( const TString& cut, const TString& className ) {
583 SetCut( TCut(cut), className );
584}
585
586////////////////////////////////////////////////////////////////////////////////
587
588void TMVA::DataLoader::SetCut( const TCut& cut, const TString& className )
589{
590 DefaultDataSetInfo().SetCut( cut, className );
591}
592
593////////////////////////////////////////////////////////////////////////////////
594
595void TMVA::DataLoader::AddCut( const TString& cut, const TString& className )
596{
597 AddCut( TCut(cut), className );
598}
599
600////////////////////////////////////////////////////////////////////////////////
601void TMVA::DataLoader::AddCut( const TCut& cut, const TString& className )
602{
603 DefaultDataSetInfo().AddCut( cut, className );
604}
605
606////////////////////////////////////////////////////////////////////////////////
607/// prepare the training and test trees
608
611 const TString& otherOpt )
612{
613 SetInputTreesFromEventAssignTrees();
614
615 AddCut( cut );
616
617 DefaultDataSetInfo().SetSplitOptions( TString::Format("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:%s",
618 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.Data()).Data() );
619}
620
621////////////////////////////////////////////////////////////////////////////////
622/// prepare the training and test trees
623/// kept for backward compatibility
624
626{
627 SetInputTreesFromEventAssignTrees();
628
629 AddCut( cut );
630
631 DefaultDataSetInfo().SetSplitOptions( TString::Format("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:EqualTrainSample:!V",
632 Ntrain, Ntrain, Ntest, Ntest).Data() );
633}
634
635////////////////////////////////////////////////////////////////////////////////
636/// prepare the training and test trees
637/// -> same cuts for signal and background
638
640{
641 SetInputTreesFromEventAssignTrees();
642
643 DefaultDataSetInfo().PrintClasses();
644 AddCut( cut );
645 DefaultDataSetInfo().SetSplitOptions( opt );
646}
647
648////////////////////////////////////////////////////////////////////////////////
649/// prepare the training and test trees
650
652{
653 // if event-wise data assignment, add local trees to dataset first
654 SetInputTreesFromEventAssignTrees();
655
656 //Log() << kINFO <<"Preparing trees for training and testing..."<< Endl;
657 AddCut( sigcut, "Signal" );
658 AddCut( bkgcut, "Background" );
659
660 DefaultDataSetInfo().SetSplitOptions( splitOpt );
661}
662
663////////////////////////////////////////////////////////////////////////////////
664/// Function required to split the training and testing datasets into a
665/// number of folds. Required by the CrossValidation and HyperParameterOptimisation
666/// classes. The option to split the training dataset into a training set and
667/// a validation set is implemented but not currently used.
668
670{
671 s.MakeKFoldDataSet( DefaultDataSetInfo() );
672}
673
674////////////////////////////////////////////////////////////////////////////////
675/// Function for assigning the correct folds to the testing or training set.
676
681
682
683////////////////////////////////////////////////////////////////////////////////
684/// Recombines the dataset. The precise semantics depend on the actual split.
685///
686/// Similar to the inverse operation of `MakeKFoldDataSet` but _will_ differ.
687/// See documentation for each particular split for more information.
688///
689
694
695////////////////////////////////////////////////////////////////////////////////
696/// Copy method use in VI and CV
697
704
705////////////////////////////////////////////////////////////////////////////////
706///Loading Dataset from DataInputHandler for subseed
707
709{
710 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Sbegin();treeinfo!=src->DataInput().Send();++treeinfo)
711 {
712 des->AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
713 }
714
715 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Bbegin();treeinfo!=src->DataInput().Bend();++treeinfo)
716 {
717 des->AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
718 }
719}
720
721////////////////////////////////////////////////////////////////////////////////
722/// returns the correlation matrix of datasets
723
725{
726 const TMatrixD * m = DefaultDataSetInfo().CorrelationMatrix(className);
727 return DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
728 "CorrelationMatrix"+className, "Correlation Matrix ("+className+")");
729}
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition TError.cxx:208
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
const_iterator begin() const
const_iterator end() const
A specialized string object used for TTree selections.
Definition TCut.h:25
Service class for 2-D histogram classes.
Definition TH2.h:30
Class that contains all the information of a class.
Definition ClassInfo.h:49
MsgLogger * fLogger
! message logger
virtual void RecombineKFoldDataSet(DataSetInfo &dsi, Types::ETreeType tt=Types::kTraining)
Definition CvSplit.cxx:112
virtual void MakeKFoldDataSet(DataSetInfo &dsi)=0
virtual void PrepareFoldDataSet(DataSetInfo &dsi, UInt_t foldNumber, Types::ETreeType tt)
Set training and test set vectors of dataset described by dsi.
Definition CvSplit.cxx:55
Class that contains all the data information.
DataInputHandler * fDataInputHandler
->
Definition DataLoader.h:189
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
void AddVariablesArray(const TString &expression, int size, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating array of variables in data set info in case input tree provides an array ...
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
DataSetInfo & AddDataSet(DataSetInfo &)
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
void SetTree(TTree *tree, const TString &className, Double_t weight)
deprecated
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
DataSetInfo & DefaultDataSetInfo()
default creation
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
DataSetManager * fDataSetManager
Definition DataLoader.h:186
DataLoader * MakeCopy(TString name)
Copy method use in VI and CV.
void SetSignalWeightExpression(const TString &variable)
void MakeKFoldDataSet(CvSplit &s)
Function required to split the training and testing datasets into a number of folds.
void SetWeightExpression(const TString &variable, const TString &className="")
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void RecombineKFoldDataSet(CvSplit &s, Types::ETreeType tt=Types::kTraining)
Recombines the dataset.
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
void SetBackgroundWeightExpression(const TString &variable)
void AddCut(const TString &cut, const TString &className="")
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators
DataLoader(TString thedlName="default")
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
DataSetInfo & GetDataSetInfo()
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
TH2 * GetCorrelationMatrix(const TString &className)
returns the correlation matrix of datasets
Bool_t UserAssignEvents(UInt_t clIndex)
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
void SetSignalTree(TTree *signal, Double_t weight=1.0)
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
virtual ~DataLoader()
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
void SetInputVariables(std::vector< TString > *theVariables)
deprecated
void SetCut(const TString &cut, const TString &className="")
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
void PrepareFoldDataSet(CvSplit &s, UInt_t foldNumber, Types::ETreeType tt=Types::kTraining)
Function for assigning the correct folds to the testing or training set.
Class that contains all the data information.
Definition DataSetInfo.h:62
Class that contains all the data information.
void SetSource(const std::string &source)
Definition MsgLogger.h:68
@ kMulticlass
Definition Types.h:129
@ kNoAnalysisType
Definition Types.h:130
@ kRegression
Definition Types.h:128
@ kMaxTreeType
also used as temporary storage for trees not yet assigned for testing;training...
Definition Types.h:145
@ kTraining
Definition Types.h:143
TMVA::DataLoader * VarianceThreshold(Double_t threshold)
Computes variance of all the variables and returns a new DataLoader with the selected variables whose...
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition TNamed.cxx:149
Basic string class.
Definition TString.h:138
const char * Data() const
Definition TString.h:384
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2384
A TTree represents a columnar dataset.
Definition TTree.h:89
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
TMarker m
Definition textangle.C:8
auto * tt
Definition textangle.C:16