Logo ROOT   6.10/09
Reference Guide
TMVAMulticlass.C File Reference

Detailed Description

View in nbviewer Open in SWAN This macro provides a simple example for the training and testing of the TMVA multiclass classification

Processing /mnt/build/workspace/root-makedoc-v610/rootspi/rdoc/src/v6-10-00-patches/tutorials/tmva/TMVAMulticlass.C...
==> Start TMVAMulticlass
Creating testdata....
... event: 0 (2000)
... event: 1000 (2000)
======> EVENT:0
var1 = -1.14361
var2 = -0.822373
var3 = -0.395426
var4 = -0.529427
created tree: TreeS
... event: 0 (2000)
... event: 1000 (2000)
======> EVENT:0
var1 = -1.54361
var2 = -1.42237
var3 = -1.39543
var4 = -2.02943
created tree: TreeB0
... event: 0 (2000)
... event: 1000 (2000)
======> EVENT:0
var1 = -1.54361
var2 = -0.822373
var3 = -0.395426
var4 = -2.02943
created tree: TreeB1
======> EVENT:0
var1 = 0.463304
var2 = 1.37192
var3 = -1.16769
var4 = -1.77551
created tree: TreeB2
created data file: tmva_example_multiple_background.root
created tmva_example_multiple_background.root for tests of the multiclass features
DataSetInfo : [dataset] : Added class "Signal"
: Add Tree TreeS of type Signal with 2000 events
DataSetInfo : [dataset] : Added class "bg0"
: Add Tree TreeB0 of type bg0 with 2000 events
DataSetInfo : [dataset] : Added class "bg1"
: Add Tree TreeB1 of type bg1 with 2000 events
DataSetInfo : [dataset] : Added class "bg2"
: Add Tree TreeB2 of type bg2 with 2000 events
: Dataset[dataset] : Class index : 0 name : Signal
: Dataset[dataset] : Class index : 1 name : bg0
: Dataset[dataset] : Class index : 2 name : bg1
: Dataset[dataset] : Class index : 3 name : bg2
Factory : Booking method: BDTG
:
: the option *InverseBoostNegWeights* does not exist for BoostType=Grad --> change
: to new default for GradBoost *Pray*
DataSetFactory : [dataset] : Number of events in input trees
:
:
:
:
: Number of training and testing events
: ---------------------------------------------------------------------------
: Signal -- training events : 1000
: Signal -- testing events : 1000
: Signal -- training and testing events: 2000
: bg0 -- training events : 1000
: bg0 -- testing events : 1000
: bg0 -- training and testing events: 2000
: bg1 -- training events : 1000
: bg1 -- testing events : 1000
: bg1 -- training and testing events: 2000
: bg2 -- training events : 1000
: bg2 -- testing events : 1000
: bg2 -- training and testing events: 2000
:
DataSetInfo : Correlation matrix (Signal):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 +0.380 +0.597 +0.819
: var2: +0.380 +1.000 +0.706 +0.744
: var3: +0.597 +0.706 +1.000 +0.853
: var4: +0.819 +0.744 +0.853 +1.000
: ----------------------------------------
DataSetInfo : Correlation matrix (bg0):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 +0.406 +0.621 +0.837
: var2: +0.406 +1.000 +0.696 +0.727
: var3: +0.621 +0.696 +1.000 +0.853
: var4: +0.837 +0.727 +0.853 +1.000
: ----------------------------------------
DataSetInfo : Correlation matrix (bg1):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 +0.371 +0.602 +0.831
: var2: +0.371 +1.000 +0.699 +0.721
: var3: +0.602 +0.699 +1.000 +0.847
: var4: +0.831 +0.721 +0.847 +1.000
: ----------------------------------------
DataSetInfo : Correlation matrix (bg2):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 -0.660 +0.034 -0.012
: var2: -0.660 +1.000 +0.007 -0.004
: var3: +0.034 +0.007 +1.000 -0.037
: var4: -0.012 -0.004 -0.037 +1.000
: ----------------------------------------
DataSetFactory : [dataset] :
:
Factory : Booking method: MLP
:
MLP : Building Network.
: Initializing weights
Factory : Train all methods
Factory : [dataset] : Create Transformation "I" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
Factory : [dataset] : Create Transformation "D" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
Factory : [dataset] : Create Transformation "P" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
Factory : [dataset] : Create Transformation "G" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
Factory : [dataset] : Create Transformation "D" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
TFHandler_Factory : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.060705 1.0014 [ -4.0592 3.5808 ]
: var2: 0.31440 1.0501 [ -3.6891 3.7877 ]
: var3: 0.12000 1.1225 [ -3.6148 4.5640 ]
: var4: -0.070020 1.2598 [ -4.8486 5.0412 ]
: -----------------------------------------------------------
: Preparing the Decorrelation transformation...
TFHandler_Factory : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.096903 1.0000 [ -3.5985 2.9977 ]
: var2: 0.35671 1.0000 [ -3.3391 3.5408 ]
: var3: 0.070223 1.0000 [ -2.8950 3.1502 ]
: var4: -0.20167 1.0000 [ -3.2998 2.8753 ]
: -----------------------------------------------------------
: Preparing the Principle Component (PCA) transformation...
TFHandler_Factory : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 1.8262e-10 1.7916 [ -7.2781 7.8235 ]
: var2:-4.0762e-10 0.89644 [ -3.2734 2.6837 ]
: var3:-1.3316e-10 0.74817 [ -2.4103 2.7078 ]
: var4:-1.5119e-10 0.61596 [ -2.2644 1.5471 ]
: -----------------------------------------------------------
: Preparing the Gaussian transformation...
: Preparing the Decorrelation transformation...
TFHandler_Factory : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.0081281 1.0000 [ -2.6178 6.0982 ]
: var2: 0.010257 1.0000 [ -2.8460 6.2789 ]
: var3: 0.0095035 1.0000 [ -3.0077 5.8864 ]
: var4: 0.0074780 1.0000 [ -3.0452 5.6560 ]
: -----------------------------------------------------------
: Ranking input variables (method unspecific)...
Factory : Train method: BDTG for Multiclass classification
:
BDTG : #events: (reweighted) sig: 2000 bkg: 2000
: #events: (unweighted) sig: 1000 bkg: 3000
: Training 1000 Decision Trees ... patience please
: Elapsed time for training with 4000 events: 5.77 sec
: Dataset[dataset] : Create results for training
: Dataset[dataset] : Multiclass evaluation of BDTG on training sample
: Dataset[dataset] : Elapsed time for evaluation of 4000 events: 1.39 sec
: Creating multiclass response histograms...
: Creating xml weight file: dataset/weights/TMVAMulticlass_BDTG.weights.xml
: Creating standalone class: dataset/weights/TMVAMulticlass_BDTG.class.C
: TMVAMulticlass.root:/dataset/Method_BDTG/BDTG
Factory : Training finished
:
Factory : Train method: MLP for Multiclass classification
:
: Training Network
:
: Elapsed time for training with 4000 events: 20 sec
: Dataset[dataset] : Create results for training
: Dataset[dataset] : Multiclass evaluation of MLP on training sample
: Dataset[dataset] : Elapsed time for evaluation of 4000 events: 0.018 sec
: Creating multiclass response histograms...
: Creating xml weight file: dataset/weights/TMVAMulticlass_MLP.weights.xml
: Creating standalone class: dataset/weights/TMVAMulticlass_MLP.class.C
: Write special histos to file: TMVAMulticlass.root:/dataset/Method_MLP/MLP
Factory : Training finished
:
: Ranking input variables (method specific)...
BDTG : Ranking result (top variable is best ranked)
: --------------------------------------
: Rank : Variable : Variable Importance
: --------------------------------------
: 1 : var4 : 3.063e-01
: 2 : var1 : 2.578e-01
: 3 : var2 : 2.400e-01
: 4 : var3 : 1.959e-01
: --------------------------------------
MLP : Ranking result (top variable is best ranked)
: -----------------------------
: Rank : Variable : Importance
: -----------------------------
: 1 : var4 : 2.946e+01
: 2 : var1 : 1.697e+01
: 3 : var2 : 1.033e+01
: 4 : var3 : 6.599e+00
: -----------------------------
Factory : === Destroy and recreate all methods via weight files for testing ===
:
MLP : Building Network.
: Initializing weights
Factory : Test all methods
Factory : Test method: BDTG for Multiclass classification performance
:
: Dataset[dataset] : Create results for testing
: Dataset[dataset] : Multiclass evaluation of BDTG on testing sample
: Dataset[dataset] : Elapsed time for evaluation of 4000 events: 0.752 sec
: Creating multiclass response histograms...
Factory : Test method: MLP for Multiclass classification performance
:
: Dataset[dataset] : Create results for testing
: Dataset[dataset] : Multiclass evaluation of MLP on testing sample
: Dataset[dataset] : Elapsed time for evaluation of 4000 events: 0.0193 sec
: Creating multiclass response histograms...
Factory : Evaluate all methods
: Evaluate multiclass classification method: BDTG
TFHandler_BDTG : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.057094 1.0236 [ -3.6592 3.2749 ]
: var2: 0.31579 1.0607 [ -3.6952 3.7877 ]
: var3: 0.11645 1.1227 [ -4.5727 4.5640 ]
: var4: -0.079113 1.2819 [ -4.7970 4.2221 ]
: -----------------------------------------------------------
: Evaluate multiclass classification method: MLP
TFHandler_MLP : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.057094 1.0236 [ -3.6592 3.2749 ]
: var2: 0.31579 1.0607 [ -3.6952 3.7877 ]
: var3: 0.11645 1.1227 [ -4.5727 4.5640 ]
: var4: -0.079113 1.2819 [ -4.7970 4.2221 ]
: -----------------------------------------------------------
: 1-vs-rest performance metrics per class
: -------------------------------------------------------------------------------------------------------
:
: Considers the listed class as signal and the other classes
: as background, reporting the resulting binary performance.
:
: Dataset MVA Method Sig eff Sig eff Sig eff
: Name: / Class: ROC AUC @B=0.01 @B=0.10 @B=0.30
:
: dataset BDTG
: Signal 0.964 0.419 0.905 0.988
: bg0 0.881 0.144 0.626 0.889
: bg1 0.930 0.413 0.771 0.945
: bg2 0.955 0.555 0.870 0.972
:
: dataset MLP
: Signal 0.972 0.662 0.933 0.993
: bg0 0.933 0.333 0.786 0.954
: bg1 0.965 0.582 0.898 0.991
: bg2 0.978 0.648 0.954 0.996
: -------------------------------------------------------------------------------------------------------
:
:
: Confusion matrices for all methods
: -------------------------------------------------------------------------------------------------------
:
: Does a binary comparison between the two classes given by a
: particular row-column combination. In each case, the class
: given by the row is considered signal while the class given
: by the column index is considered background.
:
: Showing confusion matrix for method : BDTG
: (Signal Efficiency for Background Efficiency 0.01%)
: Signal bg0 bg1 bg2
: Signal - 0.420 0.691 0.272
: bg0 0.366 - 0.075 0.611
: bg1 0.718 0.267 - 0.475
: bg2 0.690 0.508 0.513 -
:
: (Signal Efficiency for Background Efficiency 0.10%)
: Signal bg0 bg1 bg2
: Signal - 0.872 0.982 0.867
: bg0 0.824 - 0.360 0.880
: bg1 0.971 0.646 - 0.757
: bg2 0.935 0.868 0.810 -
:
: (Signal Efficiency for Background Efficiency 0.30%)
: Signal bg0 bg1 bg2
: Signal - 0.960 0.999 0.998
: bg0 0.960 - 0.696 0.962
: bg1 0.995 0.860 - 0.943
: bg2 0.990 0.974 0.940 -
:
: Showing confusion matrix for method : MLP
: (Signal Efficiency for Background Efficiency 0.01%)
: Signal bg0 bg1 bg2
: Signal - 0.490 0.940 0.662
: bg0 0.400 - 0.175 0.676
: bg1 0.912 0.353 - 0.674
: bg2 0.659 0.634 0.644 -
:
: (Signal Efficiency for Background Efficiency 0.10%)
: Signal bg0 bg1 bg2
: Signal - 0.876 0.993 0.886
: bg0 0.742 - 0.671 0.945
: bg1 0.995 0.767 - 0.911
: bg2 0.952 0.970 0.944 -
:
: (Signal Efficiency for Background Efficiency 0.30%)
: Signal bg0 bg1 bg2
: Signal - 0.966 0.995 0.994
: bg0 0.946 - 0.916 0.992
: bg1 0.996 0.940 - 0.995
: bg2 0.995 0.996 0.996 -
:
: -------------------------------------------------------------------------------------------------------
:
Dataset:dataset : Created tree 'TestTree' with 4000 events
:
Dataset:dataset : Created tree 'TrainTree' with 4000 events
:
Factory : Thank you for using TMVA!
: For citation information, please visit: http://tmva.sf.net/citeTMVA.html
==> Wrote root file: TMVAMulticlass.root
==> TMVAMulticlass is done!
#include <cstdlib>
#include <iostream>
#include <map>
#include <string>
#include "TFile.h"
#include "TTree.h"
#include "TString.h"
#include "TSystem.h"
#include "TROOT.h"
#include "TMVA/Tools.h"
#include "TMVA/Factory.h"
using namespace TMVA;
void TMVAMulticlass( TString myMethodList = "" )
{
// This loads the library
// to get access to the GUI and all tmva macros
//
// TString tmva_dir(TString(gRootDir) + "/tmva");
// if(gSystem->Getenv("TMVASYS"))
// tmva_dir = TString(gSystem->Getenv("TMVASYS"));
// gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() );
// gROOT->ProcessLine(".L TMVAMultiClassGui.C");
//---------------------------------------------------------------
// Default MVA methods to be trained + tested
std::map<std::string,int> Use;
Use["MLP"] = 1;
Use["BDTG"] = 1;
Use["DNN_CPU"] = 0;
Use["FDA_GA"] = 0;
Use["PDEFoam"] = 0;
//---------------------------------------------------------------
std::cout << std::endl;
std::cout << "==> Start TMVAMulticlass" << std::endl;
if (myMethodList != "") {
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
for (UInt_t i=0; i<mlist.size(); i++) {
std::string regMethod(mlist[i]);
if (Use.find(regMethod) == Use.end()) {
std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
std::cout << std::endl;
return;
}
Use[regMethod] = 1;
}
}
// Create a new root output file.
TString outfileName = "TMVAMulticlass.root";
TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass", outputFile,
"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );
TMVA::DataLoader *dataloader=new TMVA::DataLoader("dataset");
dataloader->AddVariable( "var1", 'F' );
dataloader->AddVariable( "var2", "Variable 2", "", 'F' );
dataloader->AddVariable( "var3", "Variable 3", "units", 'F' );
dataloader->AddVariable( "var4", "Variable 4", "units", 'F' );
TFile *input(0);
TString fname = "./tmva_example_multiple_background.root";
if (!gSystem->AccessPathName( fname )) {
// first we try to find the file in the local directory
std::cout << "--- TMVAMulticlass : Accessing " << fname << std::endl;
input = TFile::Open( fname );
}
else {
std::cout << "Creating testdata...." << std::endl;
TString createDataMacro = gROOT->GetTutorialDir() + "/tmva/createData.C";
gROOT->ProcessLine(TString::Format(".L %s",createDataMacro.Data()));
gROOT->ProcessLine("create_MultipleBackground(2000)");
std::cout << " created tmva_example_multiple_background.root for tests of the multiclass features"<<std::endl;
input = TFile::Open( fname );
}
if (!input) {
std::cout << "ERROR: could not open data file" << std::endl;
exit(1);
}
TTree *signalTree = (TTree*)input->Get("TreeS");
TTree *background0 = (TTree*)input->Get("TreeB0");
TTree *background1 = (TTree*)input->Get("TreeB1");
TTree *background2 = (TTree*)input->Get("TreeB2");
gROOT->cd( outfileName+TString(":/") );
dataloader->AddTree (signalTree,"Signal");
dataloader->AddTree (background0,"bg0");
dataloader->AddTree (background1,"bg1");
dataloader->AddTree (background2,"bg2");
dataloader->PrepareTrainingAndTestTree( "", "SplitMode=Random:NormMode=NumEvents:!V" );
if (Use["BDTG"]) // gradient boosted decision trees
factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.50:nCuts=20:MaxDepth=2");
if (Use["MLP"]) // neural network
factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE");
if (Use["FDA_GA"]) // functional discriminant with GA minimizer
factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
if (Use["PDEFoam"]) // PDE-Foam approach
factory->BookMethod( dataloader, TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );
if (Use["DNN_CPU"]) {
TString layoutString("Layout=TANH|100,TANH|50,TANH|10,LINEAR");
TString training0("LearningRate=1e-1, Momentum=0.5, Repetitions=1, ConvergenceSteps=10,"
" BatchSize=256, TestRepetitions=10, Multithreading=True");
TString training1("LearningRate=1e-2, Momentum=0.0, Repetitions=1, ConvergenceSteps=10,"
" BatchSize=256, TestRepetitions=7, Multithreading=True");
TString trainingStrategyString("TrainingStrategy=");
trainingStrategyString += training0 + "|" + training1;
TString nnOptions("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:"
"WeightInitialization=XAVIERUNIFORM:Architecture=CPU");
nnOptions.Append(":");
nnOptions.Append(layoutString);
nnOptions.Append(":");
nnOptions.Append(trainingStrategyString);
factory->BookMethod(dataloader, TMVA::Types::kDNN, "DNN_CPU", nnOptions);
}
// Train MVAs using the set of training events
factory->TrainAllMethods();
// Evaluate all MVAs using the set of test events
factory->TestAllMethods();
// Evaluate and compare performance of all configured MVAs
factory->EvaluateAllMethods();
// --------------------------------------------------------------
// Save the output
outputFile->Close();
std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
std::cout << "==> TMVAMulticlass is done!" << std::endl;
delete factory;
delete dataloader;
// Launch the GUI for the root macros
if (!gROOT->IsBatch()) TMVAMultiClassGui( outfileName );
}
int main( int argc, char** argv )
{
// Select methods (don't look at this code - not of interest)
TString methodList;
for (int i=1; i<argc; i++) {
TString regMethod(argv[i]);
if(regMethod=="-b" || regMethod=="--batch") continue;
if (!methodList.IsNull()) methodList += TString(",");
methodList += regMethod;
}
TMVAMulticlass(methodList);
return 0;
}
Author
Andreas Hoecker

Definition in file TMVAMulticlass.C.