Logo ROOT  
Reference Guide
TMVARegression.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_tmva
3/// \notebook -nodraw
4/// This macro provides examples for the training and testing of the
5/// TMVA classifiers.
6///
7/// As input data is used a toy-MC sample consisting of four Gaussian-distributed
8/// and linearly correlated input variables.
9///
10/// The methods to be used can be switched on and off by means of booleans, or
11/// via the prompt command, for example:
12///
13/// root -l TMVARegression.C\‍(\"LD,MLP\"\‍)
14///
15/// (note that the backslashes are mandatory)
16/// If no method given, a default set is used.
17///
18/// The output file "TMVAReg.root" can be analysed with the use of dedicated
19/// macros (simply say: root -l <macro.C>), which can be conveniently
20/// invoked through a GUI that will appear at the end of the run of this macro.
21/// - Project : TMVA - a Root-integrated toolkit for multivariate data analysis
22/// - Package : TMVA
23/// - Root Macro: TMVARegression
24///
25/// \macro_output
26/// \macro_code
27/// \author Andreas Hoecker
28
29#include <cstdlib>
30#include <iostream>
31#include <map>
32#include <string>
33
34#include "TChain.h"
35#include "TFile.h"
36#include "TTree.h"
37#include "TString.h"
38#include "TObjString.h"
39#include "TSystem.h"
40#include "TROOT.h"
41
42#include "TMVA/Tools.h"
43#include "TMVA/Factory.h"
44#include "TMVA/DataLoader.h"
45#include "TMVA/TMVARegGui.h"
46
47
48using namespace TMVA;
49
50void TMVARegression( TString myMethodList = "" )
51{
52 // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
53 // if you use your private .rootrc, or run from a different directory, please copy the
54 // corresponding lines from .rootrc
55
56 // methods to be processed can be given as an argument; use format:
57 //
58 // mylinux~> root -l TMVARegression.C\‍(\"myMethod1,myMethod2,myMethod3\"\‍)
59 //
60
61 //---------------------------------------------------------------
62 // This loads the library
64
65
66
67 // Default MVA methods to be trained + tested
68 std::map<std::string,int> Use;
69
70 // Mutidimensional likelihood and Nearest-Neighbour methods
71 Use["PDERS"] = 0;
72 Use["PDEFoam"] = 1;
73 Use["KNN"] = 1;
74 //
75 // Linear Discriminant Analysis
76 Use["LD"] = 1;
77 //
78 // Function Discriminant analysis
79 Use["FDA_GA"] = 0;
80 Use["FDA_MC"] = 0;
81 Use["FDA_MT"] = 0;
82 Use["FDA_GAMT"] = 0;
83 //
84 // Neural Network
85 Use["MLP"] = 0;
86 // Deep neural network (with CPU or GPU)
87#ifdef R__HAS_TMVAGPU
88 Use["DNN_GPU"] = 1;
89 Use["DNN_CPU"] = 0;
90#else
91 Use["DNN_GPU"] = 0;
92#ifdef R__HAS_TMVACPU
93 Use["DNN_CPU"] = 1;
94#else
95 Use["DNN_CPU"] = 0;
96#endif
97#endif
98 //
99 // Support Vector Machine
100 Use["SVM"] = 0;
101 //
102 // Boosted Decision Trees
103 Use["BDT"] = 0;
104 Use["BDTG"] = 1;
105 // ---------------------------------------------------------------
106
107 std::cout << std::endl;
108 std::cout << "==> Start TMVARegression" << std::endl;
109
110 // Select methods (don't look at this code - not of interest)
111 if (myMethodList != "") {
112 for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
113
114 std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
115 for (UInt_t i=0; i<mlist.size(); i++) {
116 std::string regMethod(mlist[i].Data());
117
118 if (Use.find(regMethod) == Use.end()) {
119 std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
120 for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
121 std::cout << std::endl;
122 return;
123 }
124 Use[regMethod] = 1;
125 }
126 }
127
128 // --------------------------------------------------------------------------------------------------
129
130 // Here the preparation phase begins
131
132 // Create a new root output file
133 TString outfileName( "TMVAReg.root" );
134 TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
135
136 // Create the factory object. Later you can choose the methods
137 // whose performance you'd like to investigate. The factory will
138 // then run the performance analysis for you.
139 //
140 // The first argument is the base of the name of all the
141 // weightfiles in the directory weight/
142 //
143 // The second argument is the output file for the training results
144 // All TMVA output can be suppressed by removing the "!" (not) in
145 // front of the "Silent" argument in the option string
146 TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile,
147 "!V:!Silent:Color:DrawProgressBar:AnalysisType=Regression" );
148
149
150 TMVA::DataLoader *dataloader=new TMVA::DataLoader("dataset");
151 // If you wish to modify default settings
152 // (please check "src/Config.h" to see all available global options)
153 //
154 // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
155 // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
156
157 // Define the input variables that shall be used for the MVA training
158 // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
159 // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
160 dataloader->AddVariable( "var1", "Variable 1", "units", 'F' );
161 dataloader->AddVariable( "var2", "Variable 2", "units", 'F' );
162
163 // You can add so-called "Spectator variables", which are not used in the MVA training,
164 // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
165 // input variables, the response values of all trained MVAs, and the spectator variables
166 dataloader->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' );
167 dataloader->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' );
168
169 // Add the variable carrying the regression target
170 dataloader->AddTarget( "fvalue" );
171
172 // It is also possible to declare additional targets for multi-dimensional regression, ie:
173 // factory->AddTarget( "fvalue2" );
174 // BUT: this is currently ONLY implemented for MLP
175
176 // Read training and test data (see TMVAClassification for reading ASCII files)
177 // load the signal and background event samples from ROOT trees
178 TFile *input(0);
179 TString fname = "./tmva_reg_example.root";
180 if (!gSystem->AccessPathName( fname )) {
181 input = TFile::Open( fname ); // check if file in local directory exists
182 }
183 else {
185 input = TFile::Open("http://root.cern.ch/files/tmva_reg_example.root", "CACHEREAD"); // if not: download from ROOT server
186 }
187 if (!input) {
188 std::cout << "ERROR: could not open data file" << std::endl;
189 exit(1);
190 }
191 std::cout << "--- TMVARegression : Using input file: " << input->GetName() << std::endl;
192
193 // Register the regression tree
194
195 TTree *regTree = (TTree*)input->Get("TreeR");
196
197 // global event weights per tree (see below for setting event-wise weights)
198 Double_t regWeight = 1.0;
199
200 // You can add an arbitrary number of regression trees
201 dataloader->AddRegressionTree( regTree, regWeight );
202
203 // This would set individual event weights (the variables defined in the
204 // expression need to exist in the original TTree)
205 dataloader->SetWeightExpression( "var1", "Regression" );
206
207 // Apply additional cuts on the signal and background samples (can be different)
208 TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";
209
210 // tell the DataLoader to use all remaining events in the trees after training for testing:
211 dataloader->PrepareTrainingAndTestTree( mycut,
212 "nTrain_Regression=1000:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" );
213 //
214 // dataloader->PrepareTrainingAndTestTree( mycut,
215 // "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" );
216
217 // If no numbers of events are given, half of the events in the tree are used
218 // for training, and the other half for testing:
219 //
220 // dataloader->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
221
222 // Book MVA methods
223 //
224 // Please lookup the various method configuration options in the corresponding cxx files, eg:
225 // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
226 // it is possible to preset ranges in the option string in which the cut optimisation should be done:
227 // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable
228
229 // PDE - RS method
230 if (Use["PDERS"])
231 factory->BookMethod( dataloader, TMVA::Types::kPDERS, "PDERS",
232 "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
233 // And the options strings for the MinMax and RMS methods, respectively:
234 //
235 // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
236 // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
237
238 if (Use["PDEFoam"])
239 factory->BookMethod( dataloader, TMVA::Types::kPDEFoam, "PDEFoam",
240 "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );
241
242 // K-Nearest Neighbour classifier (KNN)
243 if (Use["KNN"])
244 factory->BookMethod( dataloader, TMVA::Types::kKNN, "KNN",
245 "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
246
247 // Linear discriminant
248 if (Use["LD"])
249 factory->BookMethod( dataloader, TMVA::Types::kLD, "LD",
250 "!H:!V:VarTransform=None" );
251
252 // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
253 if (Use["FDA_MC"])
254 factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MC",
255 "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
256
257 if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
258 factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GA",
259 "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );
260
261 if (Use["FDA_MT"])
262 factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MT",
263 "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
264
265 if (Use["FDA_GAMT"])
266 factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GAMT",
267 "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
268
269 // Neural network (MLP)
270 if (Use["MLP"])
271 factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );
272
273 if (Use["DNN_CPU"] || Use["DNN_GPU"]) {
274
275 TString archOption = Use["DNN_GPU"] ? "GPU" : "CPU";
276
277 TString layoutString("Layout=TANH|50,TANH|50,TANH|50,LINEAR");
278
279
280 TString trainingStrategyString("TrainingStrategy=");
281
282 trainingStrategyString +="LearningRate=1e-3,Momentum=0.3,ConvergenceSteps=20,BatchSize=50,TestRepetitions=1,WeightDecay=0.0,Regularization=None,Optimizer=Adam";
283
284 TString nnOptions("!H:V:ErrorStrategy=SUMOFSQUARES:VarTransform=G:WeightInitialization=XAVIERUNIFORM:Architecture=");
285 nnOptions.Append(archOption);
286 nnOptions.Append(":");
287 nnOptions.Append(layoutString);
288 nnOptions.Append(":");
289 nnOptions.Append(trainingStrategyString);
290
291 TString methodName = TString("DNN_") + archOption;
292
293 factory->BookMethod(dataloader, TMVA::Types::kDL, methodName, nnOptions); // NN
294 }
295
296
297
298 // Support Vector Machine
299 if (Use["SVM"])
300 factory->BookMethod( dataloader, TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
301
302 // Boosted Decision Trees
303 if (Use["BDT"])
304 factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDT",
305 "!H:!V:NTrees=100:MinNodeSize=1.0%:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );
306
307 if (Use["BDTG"])
308 factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTG",
309 "!H:!V:NTrees=2000::BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=3:MaxDepth=4" );
310 // --------------------------------------------------------------------------------------------------
311
312 // Now you can tell the factory to train, test, and evaluate the MVAs
313
314 // Train MVAs using the set of training events
315 factory->TrainAllMethods();
316
317 // Evaluate all MVAs using the set of test events
318 factory->TestAllMethods();
319
320 // Evaluate and compare performance of all configured MVAs
321 factory->EvaluateAllMethods();
322
323 // --------------------------------------------------------------
324
325 // Save the output
326 outputFile->Close();
327
328 std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
329 std::cout << "==> TMVARegression is done!" << std::endl;
330
331 delete factory;
332 delete dataloader;
333
334 // Launch the GUI for the root macros
335 if (!gROOT->IsBatch()) TMVA::TMVARegGui( outfileName );
336}
337
338int main( int argc, char** argv )
339{
340 // Select methods (don't look at this code - not of interest)
341 TString methodList;
342 for (int i=1; i<argc; i++) {
343 TString regMethod(argv[i]);
344 if(regMethod=="-b" || regMethod=="--batch") continue;
345 if (!methodList.IsNull()) methodList += TString(",");
346 methodList += regMethod;
347 }
348 TMVARegression(methodList);
349 return 0;
350}
unsigned int UInt_t
Definition: RtypesCore.h:46
double Double_t
Definition: RtypesCore.h:59
#define gROOT
Definition: TROOT.h:404
R__EXTERN TSystem * gSystem
Definition: TSystem.h:559
int main(int argc, char *argv[])
Definition: cef_main.cxx:54
A specialized string object used for TTree selections.
Definition: TCut.h:25
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
static Bool_t SetCacheFileDir(ROOT::Internal::TStringView cacheDir, Bool_t operateDisconnected=kTRUE, Bool_t forceCacheread=kFALSE)
Definition: TFile.h:324
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:4011
void Close(Option_t *option="") override
Close a file.
Definition: TFile.cxx:889
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:524
void AddRegressionTree(TTree *tree, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
Definition: DataLoader.h:103
void SetWeightExpression(const TString &variable, const TString &className="")
Definition: DataLoader.cxx:563
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: DataLoader.cxx:632
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:512
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: DataLoader.cxx:485
This is the main MVA steering class.
Definition: Factory.h:80
void TrainAllMethods()
Iterates through all booked methods and calls training.
Definition: Factory.cxx:1114
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Definition: Factory.cxx:352
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
Definition: Factory.cxx:1271
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods.
Definition: Factory.cxx:1376
static Tools & Instance()
Definition: Tools.cxx:71
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at 'separator' and fills the list 'splitV' with the primitive strings
Definition: Tools.cxx:1199
@ kFDA
Definition: Types.h:92
@ kBDT
Definition: Types.h:86
@ kPDERS
Definition: Types.h:80
@ kPDEFoam
Definition: Types.h:94
@ kSVM
Definition: Types.h:89
@ kKNN
Definition: Types.h:83
@ kMLP
Definition: Types.h:90
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Basic string class.
Definition: TString.h:136
Bool_t IsNull() const
Definition: TString.h:407
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition: TSystem.cxx:1296
A TTree represents a columnar dataset.
Definition: TTree.h:79
create variable transformations
void TMVARegGui(const char *fName="TMVAReg.root", TString dataset="")
Tools & gTools()