Logo ROOT   6.08/07
Reference Guide
TMVAMulticlass.C File Reference

Detailed Description

View in nbviewer Open in SWAN This macro provides a simple example for the training and testing of the TMVA multiclass classification

Processing /mnt/build/workspace/root-makedoc-v608/rootspi/rdoc/src/v6-08-00-patches/tutorials/tmva/TMVAMulticlass.C...
==> Start TMVAMulticlass
Creating testdata....
... event: 0 (2000)
... event: 1000 (2000)
======> EVENT:0
var1 = -1.14361
var2 = -0.822373
var3 = -0.395426
var4 = -0.529427
created tree: TreeS
... event: 0 (2000)
... event: 1000 (2000)
======> EVENT:0
var1 = -1.54361
var2 = -1.42237
var3 = -1.39543
var4 = -2.02943
created tree: TreeB0
... event: 0 (2000)
... event: 1000 (2000)
======> EVENT:0
var1 = -1.54361
var2 = -0.822373
var3 = -0.395426
var4 = -2.02943
created tree: TreeB1
======> EVENT:0
var1 = 0.463304
var2 = 1.37192
var3 = -1.16769
var4 = -1.77551
created tree: TreeB2
created data file: tmva_example_multiple_background.root
created tmva_example_multiple_background.root for tests of the multiclass features
DataSetInfo : [dataset] : Added class "Signal"
: Add Tree TreeS of type Signal with 2000 events
DataSetInfo : [dataset] : Added class "bg0"
: Add Tree TreeB0 of type bg0 with 2000 events
DataSetInfo : [dataset] : Added class "bg1"
: Add Tree TreeB1 of type bg1 with 2000 events
DataSetInfo : [dataset] : Added class "bg2"
: Add Tree TreeB2 of type bg2 with 2000 events
: Dataset[dataset] : Class index : 0 name : Signal
: Dataset[dataset] : Class index : 1 name : bg0
: Dataset[dataset] : Class index : 2 name : bg1
: Dataset[dataset] : Class index : 3 name : bg2
Factory : Booking method: BDTG
:
: the option *InverseBoostNegWeights* does not exist for BoostType=Grad --> change
: to new default for GradBoost *Pray*
DataSetFactory : [dataset] : Number of events in input trees
:
:
:
:
: Number of training and testing events
: ---------------------------------------------------------------------------
: Signal -- training events : 1000
: Signal -- testing events : 1000
: Signal -- training and testing events: 2000
: bg0 -- training events : 1000
: bg0 -- testing events : 1000
: bg0 -- training and testing events: 2000
: bg1 -- training events : 1000
: bg1 -- testing events : 1000
: bg1 -- training and testing events: 2000
: bg2 -- training events : 1000
: bg2 -- testing events : 1000
: bg2 -- training and testing events: 2000
:
DataSetInfo : Correlation matrix (Signal):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 +0.380 +0.597 +0.819
: var2: +0.380 +1.000 +0.706 +0.744
: var3: +0.597 +0.706 +1.000 +0.853
: var4: +0.819 +0.744 +0.853 +1.000
: ----------------------------------------
DataSetInfo : Correlation matrix (bg0):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 +0.406 +0.621 +0.837
: var2: +0.406 +1.000 +0.696 +0.727
: var3: +0.621 +0.696 +1.000 +0.853
: var4: +0.837 +0.727 +0.853 +1.000
: ----------------------------------------
DataSetInfo : Correlation matrix (bg1):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 +0.371 +0.602 +0.831
: var2: +0.371 +1.000 +0.699 +0.721
: var3: +0.602 +0.699 +1.000 +0.847
: var4: +0.831 +0.721 +0.847 +1.000
: ----------------------------------------
DataSetInfo : Correlation matrix (bg2):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 -0.660 +0.034 -0.012
: var2: -0.660 +1.000 +0.007 -0.004
: var3: +0.034 +0.007 +1.000 -0.037
: var4: -0.012 -0.004 -0.037 +1.000
: ----------------------------------------
DataSetFactory : [dataset] :
:
Factory : Booking method: MLP
:
MLP : Building Network.
: Initializing weights
Factory : Train all methods
Factory : [dataset] : Create Transformation "I" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
Factory : [dataset] : Create Transformation "D" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
Factory : [dataset] : Create Transformation "P" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
Factory : [dataset] : Create Transformation "G" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
Factory : [dataset] : Create Transformation "D" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
TFHandler_Factory : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.060705 1.0014 [ -4.0592 3.5808 ]
: var2: 0.31440 1.0501 [ -3.6891 3.7877 ]
: var3: 0.12000 1.1225 [ -3.6148 4.5640 ]
: var4: -0.070020 1.2598 [ -4.8486 5.0412 ]
: -----------------------------------------------------------
: Preparing the Decorrelation transformation...
TFHandler_Factory : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.096903 1.0000 [ -3.5985 2.9977 ]
: var2: 0.35671 1.0000 [ -3.3391 3.5408 ]
: var3: 0.070223 1.0000 [ -2.8950 3.1502 ]
: var4: -0.20167 1.0000 [ -3.2998 2.8753 ]
: -----------------------------------------------------------
: Preparing the Principle Component (PCA) transformation...
TFHandler_Factory : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 1.8262e-10 1.7916 [ -7.2781 7.8235 ]
: var2:-4.0762e-10 0.89644 [ -3.2734 2.6837 ]
: var3:-1.3316e-10 0.74817 [ -2.4103 2.7078 ]
: var4:-1.5119e-10 0.61596 [ -2.2644 1.5471 ]
: -----------------------------------------------------------
: Preparing the Gaussian transformation...
: Preparing the Decorrelation transformation...
TFHandler_Factory : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.0081281 1.0000 [ -2.6178 6.0982 ]
: var2: 0.010257 1.0000 [ -2.8460 6.2789 ]
: var3: 0.0095035 1.0000 [ -3.0077 5.8864 ]
: var4: 0.0074780 1.0000 [ -3.0452 5.6560 ]
: -----------------------------------------------------------
: Ranking input variables (method unspecific)...
Factory : Train method: BDTG for Multiclass classification
:
BDTG : #events: (reweighted) sig: 2000 bkg: 2000
: #events: (unweighted) sig: 1000 bkg: 3000
: Training 1000 Decision Trees ... patience please
: Elapsed time for training with 4000 events: 12 sec
: Dataset[dataset] : Create results for training
: Dataset[dataset] : Multiclass evaluation of BDTG on training sample
: Dataset[dataset] : Elapsed time for evaluation of 4000 events: 19.8 sec
: Creating multiclass response histograms...
: Creating xml weight file: dataset/weights/TMVAMulticlass_BDTG.weights.xml
: Creating standalone class: dataset/weights/TMVAMulticlass_BDTG.class.C
: TMVAMulticlass.root:/dataset/Method_BDTG/BDTG
Factory : Training finished
:
Factory : Train method: MLP for Multiclass classification
:
: Training Network
:
: Elapsed time for training with 4000 events: 19.6 sec
: Dataset[dataset] : Create results for training
: Dataset[dataset] : Multiclass evaluation of MLP on training sample
: Dataset[dataset] : Elapsed time for evaluation of 4000 events: 17.8 sec
: Creating multiclass response histograms...
: Creating xml weight file: dataset/weights/TMVAMulticlass_MLP.weights.xml
: Creating standalone class: dataset/weights/TMVAMulticlass_MLP.class.C
: Write special histos to file: TMVAMulticlass.root:/dataset/Method_MLP/MLP
Factory : Training finished
:
: Ranking input variables (method specific)...
BDTG : Ranking result (top variable is best ranked)
: --------------------------------------
: Rank : Variable : Variable Importance
: --------------------------------------
: 1 : var4 : 3.063e-01
: 2 : var1 : 2.578e-01
: 3 : var2 : 2.400e-01
: 4 : var3 : 1.959e-01
: --------------------------------------
MLP : Ranking result (top variable is best ranked)
: -----------------------------
: Rank : Variable : Importance
: -----------------------------
: 1 : var4 : 2.729e+01
: 2 : var2 : 2.104e+01
: 3 : var3 : 8.049e+00
: 4 : var1 : 6.805e+00
: -----------------------------
Factory : === Destroy and recreate all methods via weight files for testing ===
:
MLP : Building Network.
: Initializing weights
Factory : Test all methods
Factory : Test method: BDTG for Multiclass classification performance
:
: Dataset[dataset] : Create results for testing
: Dataset[dataset] : Multiclass evaluation of BDTG on testing sample
: Dataset[dataset] : Elapsed time for evaluation of 4000 events: 2.35 sec
: Creating multiclass response histograms...
Factory : Test method: MLP for Multiclass classification performance
:
: Dataset[dataset] : Create results for testing
: Dataset[dataset] : Multiclass evaluation of MLP on testing sample
: Dataset[dataset] : Elapsed time for evaluation of 4000 events: 4.05 sec
: Creating multiclass response histograms...
Factory : Evaluate all methods
: Evaluate multiclass classification method: BDTG
TFHandler_BDTG : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.057094 1.0236 [ -3.6592 3.2749 ]
: var2: 0.31579 1.0607 [ -3.6952 3.7877 ]
: var3: 0.11645 1.1227 [ -4.5727 4.5640 ]
: var4: -0.079113 1.2819 [ -4.7970 4.2221 ]
: -----------------------------------------------------------
: Dataset[dataset] : Determine optimal multiclass cuts for test data...
: Calculating best set of cuts for class Signal
FitterBase : <GeneticFitter> Optimisation, please be patient ... (inaccurate progress timing for GA)
: Elapsed time: 1.94 sec
: cutValue[Signal] = 0.636281;
: cutValue[bg0] = -0.352564;
: cutValue[bg1] = -0.216383;
: cutValue[bg2] = -0.25183;
: Calculating best set of cuts for class bg0
FitterBase : <GeneticFitter> Optimisation, please be patient ... (inaccurate progress timing for GA)
: Elapsed time: 3.89 sec
: cutValue[Signal] = 0.00864322;
: cutValue[bg0] = 0.135966;
: cutValue[bg1] = -0.565068;
: cutValue[bg2] = -0.695156;
: Calculating best set of cuts for class bg1
FitterBase : <GeneticFitter> Optimisation, please be patient ... (inaccurate progress timing for GA)
: Elapsed time: 3.01 sec
: cutValue[Signal] = -0.364226;
: cutValue[bg0] = 0.0323294;
: cutValue[bg1] = 0.28893;
: cutValue[bg2] = -0.618922;
: Calculating best set of cuts for class bg2
FitterBase : <GeneticFitter> Optimisation, please be patient ... (inaccurate progress timing for GA)
: Elapsed time: 2.67 sec
: cutValue[Signal] = -0.968535;
: cutValue[bg0] = -0.30323;
: cutValue[bg1] = -0.609151;
: cutValue[bg2] = 0.308002;
: Evaluate multiclass classification method: MLP
TFHandler_MLP : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.057094 1.0236 [ -3.6592 3.2749 ]
: var2: 0.31579 1.0607 [ -3.6952 3.7877 ]
: var3: 0.11645 1.1227 [ -4.5727 4.5640 ]
: var4: -0.079113 1.2819 [ -4.7970 4.2221 ]
: -----------------------------------------------------------
: Dataset[dataset] : Determine optimal multiclass cuts for test data...
: Calculating best set of cuts for class Signal
FitterBase : <GeneticFitter> Optimisation, please be patient ... (inaccurate progress timing for GA)
: Elapsed time: 2.4 sec
: cutValue[Signal] = 0.258811;
: cutValue[bg0] = -0.363876;
: cutValue[bg1] = -0.881824;
: cutValue[bg2] = -0.296778;
: Calculating best set of cuts for class bg0
FitterBase : <GeneticFitter> Optimisation, please be patient ... (inaccurate progress timing for GA)
: Elapsed time: 3.7 sec
: cutValue[Signal] = -0.30632;
: cutValue[bg0] = 0.259599;
: cutValue[bg1] = -0.354431;
: cutValue[bg2] = 0.145191;
: Calculating best set of cuts for class bg1
FitterBase : <GeneticFitter> Optimisation, please be patient ... (inaccurate progress timing for GA)
: Elapsed time: 2.76 sec
: cutValue[Signal] = -0.223345;
: cutValue[bg0] = -0.297497;
: cutValue[bg1] = 0.296876;
: cutValue[bg2] = -0.898552;
: Calculating best set of cuts for class bg2
FitterBase : <GeneticFitter> Optimisation, please be patient ... (inaccurate progress timing for GA)
: Elapsed time: 2.55 sec
: cutValue[Signal] = -0.370779;
: cutValue[bg0] = -0.330154;
: cutValue[bg1] = -0.583391;
: cutValue[bg2] = 0.271479;
:
: Evaluation results ranked by best signal efficiency times signal purity
: -------------------------------------------------------------------------------------------------------
: DataSet Name MVA Method Signal bg0 bg1 bg2
: -------------------------------------------------------------------------------------------------------
: [dataset ] BDTG 0.696 0.529 0.576 0.670
: [dataset ] MLP 0.740 0.595 0.667 0.707
: -------------------------------------------------------------------------------------------------------
:
: DataSet Name MVA Method Signal bg0 bg1 bg2
: -------------------------------------------------------------------------------------------------------
: [dataset ] BDTG 0.696 0.529 0.576 0.670
: [dataset ] MLP 0.740 0.595 0.667 0.707
: -------------------------------------------------------------------------------------------------------
:
Dataset:dataset : Created tree 'TestTree' with 4000 events
:
Dataset:dataset : Created tree 'TrainTree' with 4000 events
:
Factory : Thank you for using TMVA!
: For citation information, please visit: http://tmva.sf.net/citeTMVA.html
==> Wrote root file: TMVAMulticlass.root
==> TMVAClassification is done!
#include <cstdlib>
#include <iostream>
#include <map>
#include <string>
#include "TFile.h"
#include "TTree.h"
#include "TString.h"
#include "TSystem.h"
#include "TROOT.h"
#include "TMVA/Tools.h"
#include "TMVA/Factory.h"
using namespace TMVA;
void TMVAMulticlass( TString myMethodList = "" )
{
// This loads the library
// to get access to the GUI and all tmva macros
//
// TString tmva_dir(TString(gRootDir) + "/tmva");
// if(gSystem->Getenv("TMVASYS"))
// tmva_dir = TString(gSystem->Getenv("TMVASYS"));
// gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() );
// gROOT->ProcessLine(".L TMVAMultiClassGui.C");
//---------------------------------------------------------------
// Default MVA methods to be trained + tested
std::map<std::string,int> Use;
Use["MLP"] = 1;
Use["BDTG"] = 1;
Use["DNN"] = 0;
Use["FDA_GA"] = 0;
Use["PDEFoam"] = 0;
//---------------------------------------------------------------
std::cout << std::endl;
std::cout << "==> Start TMVAMulticlass" << std::endl;
if (myMethodList != "") {
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
for (UInt_t i=0; i<mlist.size(); i++) {
std::string regMethod(mlist[i]);
if (Use.find(regMethod) == Use.end()) {
std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
std::cout << std::endl;
return;
}
Use[regMethod] = 1;
}
}
// Create a new root output file.
TString outfileName = "TMVAMulticlass.root";
TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass", outputFile,
"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );
TMVA::DataLoader *dataloader=new TMVA::DataLoader("dataset");
dataloader->AddVariable( "var1", 'F' );
dataloader->AddVariable( "var2", "Variable 2", "", 'F' );
dataloader->AddVariable( "var3", "Variable 3", "units", 'F' );
dataloader->AddVariable( "var4", "Variable 4", "units", 'F' );
TFile *input(0);
TString fname = "./tmva_example_multiple_background.root";
if (!gSystem->AccessPathName( fname )) {
// first we try to find the file in the local directory
std::cout << "--- TMVAMulticlass : Accessing " << fname << std::endl;
input = TFile::Open( fname );
}
else {
std::cout << "Creating testdata...." << std::endl;
TString createDataMacro = TString(gROOT->GetTutorialsDir()) + "/tmva/createData.C";
gROOT->ProcessLine(TString::Format(".L %s",createDataMacro.Data()));
gROOT->ProcessLine("create_MultipleBackground(2000)");
std::cout << " created tmva_example_multiple_background.root for tests of the multiclass features"<<std::endl;
input = TFile::Open( fname );
}
if (!input) {
std::cout << "ERROR: could not open data file" << std::endl;
exit(1);
}
TTree *signalTree = (TTree*)input->Get("TreeS");
TTree *background0 = (TTree*)input->Get("TreeB0");
TTree *background1 = (TTree*)input->Get("TreeB1");
TTree *background2 = (TTree*)input->Get("TreeB2");
gROOT->cd( outfileName+TString(":/") );
dataloader->AddTree (signalTree,"Signal");
dataloader->AddTree (background0,"bg0");
dataloader->AddTree (background1,"bg1");
dataloader->AddTree (background2,"bg2");
dataloader->PrepareTrainingAndTestTree( "", "SplitMode=Random:NormMode=NumEvents:!V" );
if (Use["BDTG"]) // gradient boosted decision trees
factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.50:nCuts=20:MaxDepth=2");
if (Use["MLP"]) // neural network
factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE");
if (Use["FDA_GA"]) // functional discriminant with GA minimizer
factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
if (Use["PDEFoam"]) // PDE-Foam approach
factory->BookMethod( dataloader, TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );
if (Use["DNN"]) {
TString layoutString ("Layout=TANH|100,TANH|50,TANH|10,LINEAR");
TString training0 ("LearningRate=1e-1, Momentum=0.5, Repetitions=1, ConvergenceSteps=10,"
" BatchSize=256, TestRepetitions=10, Multithreading=True");
TString training1 ("LearningRate=1e-2, Momentum=0.0, Repetitions=1, ConvergenceSteps=10,"
" BatchSize=256, TestRepetitions=7, Multithreading=True");
TString trainingStrategyString ("TrainingStrategy=");
trainingStrategyString += training0 + "|" + training1;
TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:"
"WeightInitialization=XAVIERUNIFORM:Architecture=STANDARD");
nnOptions.Append (":"); nnOptions.Append (layoutString);
nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);
factory->BookMethod(dataloader, TMVA::Types::kDNN, "DNN", nnOptions );
}
// Train MVAs using the set of training events
factory->TrainAllMethods();
// Evaluate all MVAs using the set of test events
factory->TestAllMethods();
// Evaluate and compare performance of all configured MVAs
factory->EvaluateAllMethods();
// --------------------------------------------------------------
// Save the output
outputFile->Close();
std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
std::cout << "==> TMVAClassification is done!" << std::endl;
delete factory;
delete dataloader;
// Launch the GUI for the root macros
if (!gROOT->IsBatch()) TMVAMultiClassGui( outfileName );
}
int main( int argc, char** argv )
{
// Select methods (don't look at this code - not of interest)
TString methodList;
for (int i=1; i<argc; i++) {
TString regMethod(argv[i]);
if(regMethod=="-b" || regMethod=="--batch") continue;
if (!methodList.IsNull()) methodList += TString(",");
methodList += regMethod;
}
TMVAMulticlass(methodList);
return 0;
}
Author
Andreas Hoecker

Definition in file TMVAMulticlass.C.