82 TFile* TMVA::Factory::fgTargetFile = 0;
86 #define RECREATE_METHODS kTRUE
97 : Configurable ( theOption ),
98 fDataSetManager (
NULL ),
99 fDataInputHandler ( new DataInputHandler ),
100 fTransformations (
"I" ),
102 fJobName ( jobName ),
103 fDataAssignType ( kAssignEvents ),
104 fATreeEvent (
NULL ),
105 fAnalysisType ( Types::kClassification )
107 fgTargetFile = theTargetFile;
110 fDataSetManager =
new DataSetManager( *fDataInputHandler );
114 if (
gTools().CheckForSilentOption( GetOptions() ))
Log().InhibitOutput();
118 SetConfigDescription(
"Configuration options for Factory running" );
119 SetConfigName( GetName() );
133 DeclareOptionRef( fVerbose,
"V",
"Verbose flag" );
134 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
135 DeclareOptionRef( fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
136 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
137 DeclareOptionRef( drawProgressBar,
138 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
141 DeclareOptionRef( analysisType,
142 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
143 AddPreDefVal(
TString(
"Classification"));
144 AddPreDefVal(
TString(
"Regression"));
145 AddPreDefVal(
TString(
"Multiclass"));
149 CheckForUnusedOptions();
159 if ( analysisType ==
"classification" ) fAnalysisType = Types::kClassification;
160 else if( analysisType ==
"regression" ) fAnalysisType = Types::kRegression;
161 else if( analysisType ==
"multiclass" ) fAnalysisType = Types::kMulticlass;
162 else if( analysisType ==
"auto" ) fAnalysisType = Types::kNoAnalysisType;
184 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
185 for (;trfIt != fDefaultTrfs.end(); trfIt++)
delete (*trfIt);
187 this->DeleteAllMethods();
188 delete fDataInputHandler;
192 delete fDataSetManager;
206 MVector::iterator itrMethod = fMethods.begin();
207 for (; itrMethod != fMethods.end(); itrMethod++) {
208 Log() <<
kDEBUG <<
"Delete method: " << (*itrMethod)->GetName() <<
Endl;
225 return fDataSetManager->AddDataSetInfo(dsi);
232 DataSetInfo* dsi = fDataSetManager->GetDataSetInfo(dsiName);
234 if (dsi!=0)
return *dsi;
236 return fDataSetManager->AddDataSetInfo(*(
new DataSetInfo(dsiName)));
249 assignTree->
Branch(
"type", &fATreeType,
"ATreeType/I" );
250 assignTree->
Branch(
"weight", &fATreeWeight,
"ATreeWeight/F" );
252 std::vector<VariableInfo>& vars = DefaultDataSetInfo().GetVariableInfos();
253 std::vector<VariableInfo>& tgts = DefaultDataSetInfo().GetTargetInfos();
254 std::vector<VariableInfo>& spec = DefaultDataSetInfo().GetSpectatorInfos();
256 if (!fATreeEvent) fATreeEvent =
new Float_t[vars.size()+tgts.size()+spec.size()];
258 for (
UInt_t ivar=0; ivar<vars.size(); ivar++) {
259 TString vname = vars[ivar].GetExpression();
260 assignTree->
Branch( vname, &(fATreeEvent[ivar]), vname +
"/F" );
263 for (
UInt_t itgt=0; itgt<tgts.size(); itgt++) {
264 TString vname = tgts[itgt].GetExpression();
265 assignTree->
Branch( vname, &(fATreeEvent[vars.size()+itgt]), vname +
"/F" );
268 for (
UInt_t ispc=0; ispc<spec.size(); ispc++) {
269 TString vname = spec[ispc].GetExpression();
270 assignTree->
Branch( vname, &(fATreeEvent[vars.size()+tgts.size()+ispc]), vname +
"/F" );
328 const std::vector<Double_t>& event,
Double_t weight )
330 ClassInfo* theClass = DefaultDataSetInfo().AddClass(className);
339 if (clIndex>=fTrainAssignTree.size()) {
340 fTrainAssignTree.resize(clIndex+1, 0);
341 fTestAssignTree.resize(clIndex+1, 0);
344 if (fTrainAssignTree[clIndex]==0) {
345 fTrainAssignTree[clIndex] = CreateEventAssignTrees(
Form(
"TrainAssignTree_%s", className.
Data()) );
346 fTestAssignTree[clIndex] = CreateEventAssignTrees(
Form(
"TestAssignTree_%s", className.
Data()) );
349 fATreeType = clIndex;
350 fATreeWeight = weight;
351 for (
UInt_t ivar=0; ivar<
event.size(); ivar++) fATreeEvent[ivar] = event[ivar];
354 else fTestAssignTree[clIndex]->Fill();
363 return fTrainAssignTree[clIndex]!=0;
371 UInt_t size = fTrainAssignTree.size();
372 for(
UInt_t i=0; i<size; i++) {
373 if(!UserAssignEvents(i))
continue;
374 const TString& className = DefaultDataSetInfo().GetClassInfo(i)->GetName();
375 SetWeightExpression(
"weight", className );
393 Log() <<
kFATAL <<
"<AddTree> cannot interpret tree type: \"" << treetype
394 <<
"\" should be \"Training\" or \"Test\" or \"Training and Testing\"" <<
Endl;
396 AddTree( tree, className, weight, cut, tt );
405 Log() <<
kFATAL <<
"Tree does not exist (empty pointer)." <<
Endl;
407 DefaultDataSetInfo().AddClass( className );
413 Log() <<
kINFO <<
"Add Tree " << tree->
GetName() <<
" of type " << className
415 DataInput().AddTree( tree, className, weight, cut, tt );
423 AddTree( signal,
"Signal", weight,
TCut(
""), treetype );
432 TTree* signalTree =
new TTree(
"TreeS",
"Tree (S)" );
435 Log() <<
kINFO <<
"Create TTree objects from ASCII input files ... \n- Signal file : \""
439 AddTree( signalTree,
"Signal", weight,
TCut(
""), treetype );
446 AddTree( signal,
"Signal", weight,
TCut(
""), treetype );
454 AddTree( signal,
"Background", weight,
TCut(
""), treetype );
462 TTree* bkgTree =
new TTree(
"TreeB",
"Tree (B)" );
465 Log() <<
kINFO <<
"Create TTree objects from ASCII input files ... \n- Background file : \""
469 AddTree( bkgTree,
"Background", weight,
TCut(
""), treetype );
476 AddTree( signal,
"Background", weight,
TCut(
""), treetype );
483 AddTree( tree,
"Signal", weight );
490 AddTree( tree,
"Background", weight );
516 DataInput().AddTree( datFileS,
"Signal", signalWeight );
517 DataInput().AddTree( datFileB,
"Background", backgroundWeight );
537 DefaultDataSetInfo().AddVariable( expression, title, unit, min, max, type );
546 DefaultDataSetInfo().AddVariable( expression,
"",
"", min, max, type );
558 DefaultDataSetInfo().AddTarget( expression, title, unit, min, max );
567 DefaultDataSetInfo().AddSpectator( expression, title, unit, min, max );
575 return AddDataSet(
"Default" );
583 for (std::vector<TString>::iterator it=theVariables->begin();
584 it!=theVariables->end(); it++) AddVariable(*it);
591 DefaultDataSetInfo().SetWeightExpression(variable,
"Signal");
598 DefaultDataSetInfo().SetWeightExpression(variable,
"Background");
607 SetSignalWeightExpression(variable);
608 SetBackgroundWeightExpression(variable);
610 else DefaultDataSetInfo().SetWeightExpression( variable, className );
616 SetCut(
TCut(cut), className );
623 DefaultDataSetInfo().SetCut( cut, className );
630 AddCut(
TCut(cut), className );
637 DefaultDataSetInfo().AddCut( cut, className );
647 SetInputTreesFromEventAssignTrees();
651 DefaultDataSetInfo().SetSplitOptions(
Form(
"nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:%s",
652 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.
Data()) );
661 SetInputTreesFromEventAssignTrees();
665 DefaultDataSetInfo().SetSplitOptions(
Form(
"nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:EqualTrainSample:!V",
666 Ntrain, Ntrain, Ntest, Ntest) );
675 SetInputTreesFromEventAssignTrees();
677 DefaultDataSetInfo().PrintClasses();
679 DefaultDataSetInfo().SetSplitOptions( opt );
688 SetInputTreesFromEventAssignTrees();
690 Log() <<
kINFO <<
"Preparing trees for training and testing..." <<
Endl;
691 AddCut( sigcut,
"Signal" );
692 AddCut( bkgcut,
"Background" );
694 DefaultDataSetInfo().SetSplitOptions( splitOpt );
703 if( DefaultDataSetInfo().GetNClasses()==2
704 && DefaultDataSetInfo().GetClassInfo(
"Signal") !=
NULL
705 && DefaultDataSetInfo().GetClassInfo(
"Background") !=
NULL
708 }
else if( DefaultDataSetInfo().GetNClasses() >= 2 ){
711 Log() <<
kFATAL <<
"No analysis type for " << DefaultDataSetInfo().GetNClasses() <<
" classes and "
712 << DefaultDataSetInfo().GetNTargets() <<
" regression targets." <<
Endl;
718 Log() <<
kFATAL <<
"Booking failed since method with title <"
719 << methodTitle <<
"> already exists"
730 "Number of times the classifier will be boosted" );
740 DefaultDataSetInfo(),
745 Log() <<
"Boost Number is " << boostNum <<
" > 0: train boosted classifier" <<
Endl;
749 DefaultDataSetInfo(),
753 Log() <<
kFATAL <<
"Method with type kBoost cannot be casted to MethodCategory. /Factory" <<
Endl;
760 if (method==0)
return 0;
766 Log() <<
kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
772 DefaultDataSetInfo().GetNClasses(),
773 DefaultDataSetInfo().GetNTargets() )) {
776 Log() <<
"regression with " << DefaultDataSetInfo().GetNTargets() <<
" targets." <<
Endl;
779 Log() <<
"multiclass classification with " << DefaultDataSetInfo().GetNClasses() <<
" classes." <<
Endl;
782 Log() <<
"classification with " << DefaultDataSetInfo().GetNClasses() <<
" classes." <<
Endl;
796 fMethods.push_back( method );
817 MVector::const_iterator itrMethod = fMethods.begin();
818 MVector::const_iterator itrMethodEnd = fMethods.end();
820 for (; itrMethod != itrMethodEnd; itrMethod++) {
835 DefaultDataSetInfo().GetDataSet();
843 for (
UInt_t cls = 0; cls < DefaultDataSetInfo().GetNClasses() ; cls++) {
844 m = DefaultDataSetInfo().CorrelationMatrix(DefaultDataSetInfo().GetClassInfo(cls)->GetName());
845 h = DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
TString(
"CorrelationMatrix")+DefaultDataSetInfo().GetClassInfo(cls)->GetName(),
846 "Correlation Matrix ("+ DefaultDataSetInfo().GetClassInfo(cls)->GetName() +
TString(
")"));
854 m = DefaultDataSetInfo().CorrelationMatrix(
"Signal" );
855 h = DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
"CorrelationMatrixS",
"Correlation Matrix (signal)");
861 m = DefaultDataSetInfo().CorrelationMatrix(
"Background" );
862 h = DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
"CorrelationMatrixB",
"Correlation Matrix (background)");
868 m = DefaultDataSetInfo().CorrelationMatrix(
"Regression" );
869 h = DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
"CorrelationMatrix",
"Correlation Matrix");
881 processTrfs = fTransformations;
884 std::vector<TMVA::TransformationHandler*> trfs;
888 std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
889 for (; trfsDefIt!=trfsDef.end(); trfsDefIt++) {
894 Log() <<
kINFO <<
"current transformation string: '" << trfS.
Data() <<
"'" <<
Endl;
896 DefaultDataSetInfo(),
900 if (trfS.
BeginsWith(
'I')) identityTrHandler = trfs.back();
903 const std::vector<Event*>& inputEvents = DefaultDataSetInfo().GetDataSet()->GetEventCollection();
906 std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
908 for (;trfIt != trfs.end(); trfIt++) {
910 (*trfIt)->SetRootDir(RootBaseDir());
911 (*trfIt)->CalcTransformations(inputEvents);
916 for (trfIt = trfs.begin(); trfIt != trfs.end(); trfIt++)
delete *trfIt;
928 MVector::iterator itrMethod;
931 for( itrMethod = fMethods.begin(); itrMethod != fMethods.end(); ++itrMethod ) {
935 Log() <<
kFATAL <<
"Dynamic cast to MethodBase failed" <<
Endl;
941 <<
" not trained (training tree has less entries ["
943 <<
"] than required [" << MinNoTrainingEvents <<
"]" <<
Endl;
952 Log() <<
kINFO <<
"Optimization of tuning paremters finished for Method:"<<mva->
GetName() <<
Endl;
961 if(fDataInputHandler->GetEntries() <=1) {
962 Log() <<
kFATAL <<
"No input data for the training provided!" <<
Endl;
966 Log() <<
kFATAL <<
"You want to do regression training without specifying a target." <<
Endl;
968 && DefaultDataSetInfo().GetNClasses() < 2 )
969 Log() <<
kFATAL <<
"You want to do classification training, but specified less than two classes." <<
Endl;
974 WriteDataInformation();
977 if (fMethods.empty()) {
984 Log() <<
kINFO <<
"Train all methods for "
988 MVector::iterator itrMethod;
991 for( itrMethod = fMethods.begin(); itrMethod != fMethods.end(); ++itrMethod ) {
998 <<
" not trained (training tree has less entries ["
1000 <<
"] than required [" << MinNoTrainingEvents <<
"]" <<
Endl;
1015 Log() <<
kINFO <<
"Ranking input variables (method specific)..." <<
Endl;
1016 for (itrMethod = fMethods.begin(); itrMethod != fMethods.end(); itrMethod++) {
1021 const Ranking* ranking = (*itrMethod)->CreateRanking();
1022 if (ranking != 0) ranking->
Print();
1023 else Log() <<
kINFO <<
"No variable ranking supplied by classifier: "
1035 Log() <<
kINFO <<
"=== Destroy and recreate all methods via weight files for testing ===" << Endl <<
Endl;
1037 RootBaseDir()->cd();
1040 for (
UInt_t i=0; i<fMethods.size(); i++) {
1058 dataSetInfo, weightfile ) );
1061 if( !methCat )
Log() <<
kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
1084 if (fMethods.empty()) {
1091 MVector::iterator itrMethod = fMethods.begin();
1092 MVector::iterator itrMethodEnd = fMethods.end();
1093 for (; itrMethod != itrMethodEnd; itrMethod++) {
1096 if(mva==0)
continue;
1100 (analysisType ==
Types::kMulticlass ?
"Multiclass classification" :
"Classification")) <<
" performance" <<
Endl;
1111 if (methodTitle !=
"") {
1115 Log() <<
kWARNING <<
"<MakeClass> Could not find classifier \"" << methodTitle
1116 <<
"\" in list" <<
Endl;
1122 MVector::const_iterator itrMethod = fMethods.begin();
1123 MVector::const_iterator itrMethodEnd = fMethods.end();
1124 for (; itrMethod != itrMethodEnd; itrMethod++) {
1126 if(method==0)
continue;
1139 if (methodTitle !=
"") {
1143 Log() <<
kWARNING <<
"<PrintHelpMessage> Could not find classifier \"" << methodTitle
1144 <<
"\" in list" <<
Endl;
1150 MVector::const_iterator itrMethod = fMethods.begin();
1151 MVector::const_iterator itrMethodEnd = fMethods.end();
1152 for (; itrMethod != itrMethodEnd; itrMethod++) {
1154 if(method==0)
continue;
1166 Log() <<
kINFO <<
"Evaluating all variables..." <<
Endl;
1169 for (
UInt_t i=0; i<DefaultDataSetInfo().GetNVariables(); i++) {
1170 TString s = DefaultDataSetInfo().GetVariableInfo(i).GetLabel();
1171 if (options.
Contains(
"V")) s +=
":V";
1172 this->BookMethod(
"Variable", s );
1184 if (fMethods.empty()) {
1185 Log() <<
kINFO <<
"...nothing found to evaluate" <<
Endl;
1197 Int_t nmeth_used[2] = {0,0};
1199 std::vector<std::vector<TString> > mname(2);
1200 std::vector<std::vector<Double_t> > sig(2),
sep(2), roc(2);
1201 std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1202 std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1203 std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1205 std::vector<std::vector<Float_t> > multiclass_testEff;
1206 std::vector<std::vector<Float_t> > multiclass_trainEff;
1207 std::vector<std::vector<Float_t> > multiclass_testPur;
1208 std::vector<std::vector<Float_t> > multiclass_trainPur;
1210 std::vector<std::vector<Double_t> > biastrain(1);
1211 std::vector<std::vector<Double_t> > biastest(1);
1212 std::vector<std::vector<Double_t> > devtrain(1);
1213 std::vector<std::vector<Double_t> > devtest(1);
1214 std::vector<std::vector<Double_t> > rmstrain(1);
1215 std::vector<std::vector<Double_t> > rmstest(1);
1216 std::vector<std::vector<Double_t> > minftrain(1);
1217 std::vector<std::vector<Double_t> > minftest(1);
1218 std::vector<std::vector<Double_t> > rhotrain(1);
1219 std::vector<std::vector<Double_t> > rhotest(1);
1222 std::vector<std::vector<Double_t> > biastrainT(1);
1223 std::vector<std::vector<Double_t> > biastestT(1);
1224 std::vector<std::vector<Double_t> > devtrainT(1);
1225 std::vector<std::vector<Double_t> > devtestT(1);
1226 std::vector<std::vector<Double_t> > rmstrainT(1);
1227 std::vector<std::vector<Double_t> > rmstestT(1);
1228 std::vector<std::vector<Double_t> > minftrainT(1);
1229 std::vector<std::vector<Double_t> > minftestT(1);
1238 MVector::iterator itrMethod = fMethods.begin();
1239 MVector::iterator itrMethodEnd = fMethods.end();
1240 for (; itrMethod != itrMethodEnd; itrMethod++) {
1243 if(theMethod==0)
continue;
1247 doRegression =
kTRUE;
1255 biastest[0] .push_back( bias );
1256 devtest[0] .push_back( dev );
1257 rmstest[0] .push_back( rms );
1258 minftest[0] .push_back( mInf );
1259 rhotest[0] .push_back( rho );
1260 biastestT[0] .push_back( biasT );
1261 devtestT[0] .push_back( devT );
1262 rmstestT[0] .push_back( rmsT );
1263 minftestT[0] .push_back( mInfT );
1266 biastrain[0] .push_back( bias );
1267 devtrain[0] .push_back( dev );
1268 rmstrain[0] .push_back( rms );
1269 minftrain[0] .push_back( mInf );
1270 rhotrain[0] .push_back( rho );
1271 biastrainT[0].push_back( biasT );
1272 devtrainT[0] .push_back( devT );
1273 rmstrainT[0] .push_back( rmsT );
1274 minftrainT[0].push_back( mInfT );
1279 Log() <<
kINFO <<
"Write evaluation histograms to file" <<
Endl;
1284 doMulticlass =
kTRUE;
1286 Log() <<
kINFO <<
"Write evaluation histograms to file" <<
Endl;
1312 eff01err[isel].push_back( err );
1314 eff10err[isel].push_back( err );
1316 eff30err[isel].push_back( err );
1325 Log() <<
kINFO <<
"Write evaluation histograms to file" <<
Endl;
1332 std::vector<TString> vtemps = mname[0];
1333 std::vector< std::vector<Double_t> > vtmp;
1334 vtmp.push_back( devtest[0] );
1335 vtmp.push_back( devtrain[0] );
1336 vtmp.push_back( biastest[0] );
1337 vtmp.push_back( biastrain[0] );
1338 vtmp.push_back( rmstest[0] );
1339 vtmp.push_back( rmstrain[0] );
1340 vtmp.push_back( minftest[0] );
1341 vtmp.push_back( minftrain[0] );
1342 vtmp.push_back( rhotest[0] );
1343 vtmp.push_back( rhotrain[0] );
1344 vtmp.push_back( devtestT[0] );
1345 vtmp.push_back( devtrainT[0] );
1346 vtmp.push_back( biastestT[0] );
1347 vtmp.push_back( biastrainT[0]);
1348 vtmp.push_back( rmstestT[0] );
1349 vtmp.push_back( rmstrainT[0] );
1350 vtmp.push_back( minftestT[0] );
1351 vtmp.push_back( minftrainT[0]);
1354 devtest[0] = vtmp[0];
1355 devtrain[0] = vtmp[1];
1356 biastest[0] = vtmp[2];
1357 biastrain[0] = vtmp[3];
1358 rmstest[0] = vtmp[4];
1359 rmstrain[0] = vtmp[5];
1360 minftest[0] = vtmp[6];
1361 minftrain[0] = vtmp[7];
1362 rhotest[0] = vtmp[8];
1363 rhotrain[0] = vtmp[9];
1364 devtestT[0] = vtmp[10];
1365 devtrainT[0] = vtmp[11];
1366 biastestT[0] = vtmp[12];
1367 biastrainT[0] = vtmp[13];
1368 rmstestT[0] = vtmp[14];
1369 rmstrainT[0] = vtmp[15];
1370 minftestT[0] = vtmp[16];
1371 minftrainT[0] = vtmp[17];
1373 else if (doMulticlass) {
1379 for (
Int_t k=0; k<2; k++) {
1380 std::vector< std::vector<Double_t> > vtemp;
1381 vtemp.push_back( effArea[k] );
1382 vtemp.push_back( eff10[k] );
1383 vtemp.push_back( eff01[k] );
1384 vtemp.push_back( eff30[k] );
1385 vtemp.push_back( eff10err[k] );
1386 vtemp.push_back( eff01err[k] );
1387 vtemp.push_back( eff30err[k] );
1388 vtemp.push_back( trainEff10[k] );
1389 vtemp.push_back( trainEff01[k] );
1390 vtemp.push_back( trainEff30[k] );
1391 vtemp.push_back( sig[k] );
1392 vtemp.push_back(
sep[k] );
1393 vtemp.push_back( roc[k] );
1394 std::vector<TString> vtemps = mname[k];
1396 effArea[k] = vtemp[0];
1397 eff10[k] = vtemp[1];
1398 eff01[k] = vtemp[2];
1399 eff30[k] = vtemp[3];
1400 eff10err[k] = vtemp[4];
1401 eff01err[k] = vtemp[5];
1402 eff30err[k] = vtemp[6];
1403 trainEff10[k] = vtemp[7];
1404 trainEff01[k] = vtemp[8];
1405 trainEff30[k] = vtemp[9];
1420 const Int_t nmeth = methodsNoCuts.size();
1421 const Int_t nvar = DefaultDataSetInfo().GetNVariables();
1422 if (!doRegression && !doMulticlass ) {
1428 std::vector<Double_t> rvec;
1436 std::vector<TString>* theVars =
new std::vector<TString>;
1437 std::vector<ResultsClassification*> mvaRes;
1438 for (itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); itrMethod++, ivar++) {
1443 theVars->back().ReplaceAll(
"MVA_",
"" );
1456 DataSet* defDs = DefaultDataSetInfo().GetDataSet();
1463 for (
Int_t im=0; im<nmeth; im++) {
1467 Log() <<
kWARNING <<
"Found NaN return value in event: " << ievt
1468 <<
" for method \"" << methodsNoCuts[im]->GetName() <<
"\"" <<
Endl;
1471 else dvec[im] = retval;
1474 if (DefaultDataSetInfo().IsSignal(ev)) { tpSig->
AddRow( dvec ); theMat = overlapS; }
1475 else { tpBkg->
AddRow( dvec ); theMat = overlapB; }
1478 for (
Int_t im=0; im<nmeth; im++) {
1479 for (
Int_t jm=im; jm<nmeth; jm++) {
1480 if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1482 if (im != jm) (*theMat)(jm,im)++;
1502 if (corrMatS != 0 && corrMatB != 0) {
1507 for (
Int_t im=0; im<nmeth; im++) {
1508 for (
Int_t jm=0; jm<nmeth; jm++) {
1509 mvaMatS(im,jm) = (*corrMatS)(im,jm);
1510 mvaMatB(im,jm) = (*corrMatB)(im,jm);
1515 std::vector<TString> theInputVars;
1518 for (
Int_t iv=0; iv<nvar; iv++) {
1519 theInputVars.push_back( DefaultDataSetInfo().GetVariableInfo( iv ).GetLabel() );
1520 for (
Int_t jm=0; jm<nmeth; jm++) {
1521 varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1522 varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1528 Log() <<
kINFO <<
"Inter-MVA correlation matrix (signal):" <<
Endl;
1532 Log() <<
kINFO <<
"Inter-MVA correlation matrix (background):" <<
Endl;
1537 Log() <<
kINFO <<
"Correlations between input variables and MVA response (signal):" <<
Endl;
1541 Log() <<
kINFO <<
"Correlations between input variables and MVA response (background):" <<
Endl;
1545 else Log() <<
kWARNING <<
"<TestAllMethods> cannot compute correlation matrices" <<
Endl;
1548 Log() <<
kINFO <<
"The following \"overlap\" matrices contain the fraction of events for which " <<
Endl;
1549 Log() <<
kINFO <<
"the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" <<
Endl;
1550 Log() <<
kINFO <<
"An event is signal-like, if its MVA output exceeds the following value:" <<
Endl;
1552 Log() <<
kINFO <<
"which correspond to the working point: eff(signal) = 1 - eff(background)" <<
Endl;
1555 if (nmeth != (
Int_t)fMethods.size())
1556 Log() <<
kINFO <<
"Note: no correlations and overlap with cut method are provided at present" <<
Endl;
1560 Log() <<
kINFO <<
"Inter-MVA overlap matrix (signal):" <<
Endl;
1564 Log() <<
kINFO <<
"Inter-MVA overlap matrix (background):" <<
Endl;
1588 TString hLine =
"-------------------------------------------------------------------------";
1589 Log() <<
kINFO <<
"Evaluation results ranked by smallest RMS on test sample:" <<
Endl;
1590 Log() <<
kINFO <<
"(\"Bias\" quotes the mean deviation of the regression from true target." <<
Endl;
1591 Log() <<
kINFO <<
" \"MutInf\" is the \"Mutual Information\" between regression and target." <<
Endl;
1592 Log() <<
kINFO <<
" Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" <<
Endl;
1593 Log() <<
kINFO <<
" tained when removing events deviating more than 2sigma from average.)" <<
Endl;
1595 Log() <<
kINFO <<
"MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" <<
Endl;
1598 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1599 Log() <<
kINFO <<
Form(
"%-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1600 (
const char*)mname[0][i],
1601 biastest[0][i], biastestT[0][i],
1602 rmstest[0][i], rmstestT[0][i],
1603 minftest[0][i], minftestT[0][i] )
1608 Log() <<
kINFO <<
"Evaluation results ranked by smallest RMS on training sample:" <<
Endl;
1611 Log() <<
kINFO <<
"MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" <<
Endl;
1614 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1615 Log() <<
kINFO <<
Form(
"%-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1616 (
const char*)mname[0][i],
1617 biastrain[0][i], biastrainT[0][i],
1618 rmstrain[0][i], rmstrainT[0][i],
1619 minftrain[0][i], minftrainT[0][i] )
1625 else if( doMulticlass ){
1627 TString hLine =
"--------------------------------------------------------------------------------";
1628 Log() <<
kINFO <<
"Evaluation results ranked by best signal efficiency times signal purity " <<
Endl;
1630 TString header=
"MVA Method ";
1631 for(
UInt_t icls = 0; icls<DefaultDataSetInfo().GetNClasses(); ++icls){
1632 header +=
Form(
"%-12s ",DefaultDataSetInfo().GetClassInfo(icls)->GetName().
Data());
1636 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1637 TString res =
Form(
"%-15s",(
const char*)mname[0][i]);
1638 for(
UInt_t icls = 0; icls<DefaultDataSetInfo().GetNClasses(); ++icls){
1639 res +=
Form(
"%#1.3f ",(multiclass_testEff[i][icls])*(multiclass_testPur[i][icls]));
1649 TString hLine =
"--------------------------------------------------------------------------------";
1650 Log() <<
kINFO <<
"Evaluation results ranked by best signal efficiency and purity (area)" <<
Endl;
1652 Log() <<
kINFO <<
"MVA Signal efficiency at bkg eff.(error): | Sepa- Signifi- " <<
Endl;
1653 Log() <<
kINFO <<
"Method: @B=0.01 @B=0.10 @B=0.30 ROC-integ. | ration: cance: " <<
Endl;
1655 for (
Int_t k=0; k<2; k++) {
1656 if (k == 1 && nmeth_used[k] > 0) {
1658 Log() <<
kINFO <<
"Input Variables: " << Endl << hLine <<
Endl;
1660 for (
Int_t i=0; i<nmeth_used[k]; i++) {
1661 if (k == 1) mname[k][i].ReplaceAll(
"Variable_",
"" );
1662 if (
sep[k][i] < 0 || sig[k][i] < 0) {
1664 Log() <<
kINFO <<
Form(
"%-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) %#1.3f | -- --",
1665 (
const char*)mname[k][i],
1666 eff01[k][i],
Int_t(1000*eff01err[k][i]),
1667 eff10[k][i],
Int_t(1000*eff10err[k][i]),
1668 eff30[k][i],
Int_t(1000*eff30err[k][i]),
1669 effArea[k][i]) <<
Endl;
1672 Log() <<
kINFO <<
Form(
"%-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) %#1.3f | %#1.3f %#1.3f",
1673 (
const char*)mname[k][i],
1674 eff01[k][i],
Int_t(1000*eff01err[k][i]),
1675 eff10[k][i],
Int_t(1000*eff10err[k][i]),
1676 eff30[k][i],
Int_t(1000*eff30err[k][i]),
1678 sep[k][i], sig[k][i]) <<
Endl;
1684 Log() <<
kINFO <<
"Testing efficiency compared to training efficiency (overtraining check)" <<
Endl;
1686 Log() <<
kINFO <<
"MVA Signal efficiency: from test sample (from training sample) " <<
Endl;
1687 Log() <<
kINFO <<
"Method: @B=0.01 @B=0.10 @B=0.30 " <<
Endl;
1689 for (
Int_t k=0; k<2; k++) {
1690 if (k == 1 && nmeth_used[k] > 0) {
1692 Log() <<
kINFO <<
"Input Variables: " << Endl << hLine <<
Endl;
1694 for (
Int_t i=0; i<nmeth_used[k]; i++) {
1695 if (k == 1) mname[k][i].ReplaceAll(
"Variable_",
"" );
1696 Log() <<
kINFO <<
Form(
"%-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
1697 (
const char*)mname[k][i],
1698 eff01[k][i],trainEff01[k][i],
1699 eff10[k][i],trainEff10[k][i],
1700 eff30[k][i],trainEff30[k][i]) <<
Endl;
1708 RootBaseDir()->cd();
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Principal Components Analysis (PCA)
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
virtual void MakeClass(const TString &methodTitle="") const
Print predefined help message of classifier iterate over methods and test.
void OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
iterates through all booked methods and sees if they use parameter tuning and if so.
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
MsgLogger & Endl(MsgLogger &ml)
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
static void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
create variable transformations
TString & ReplaceAll(const TString &s1, const TString &s2)
const char * GetName() const
Returns name of object.
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
void SetSignalWeightExpression(const TString &variable)
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimzier with the set of paremeters and ranges that are meant to be tuned.
void SetInputVariables(std::vector< TString > *theVariables)
fill input variables in data set
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Double_t background(Double_t *x, Double_t *par)
virtual void MakeClass(const TString &classFileName=TString("")) const =0
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
void ToLower()
Change string to lower-case.
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
void TrainAllMethods()
iterates through all booked methods and calls training
virtual void TestMulticlass()
test multiclass classification
DataSetInfo & DefaultDataSetInfo()
default creation
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
void SetSignalTree(TTree *signal, Double_t weight=1.0)
void WriteDataInformation()
put correlations of input data and a few (default + user selected) transformations into the root file...
const TString & GetMethodName() const
TString GetWeightFileName() const
retrieve weight file name
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
const char * Data() const
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators ...
static Types & Instance()
the the single instance of "Types" if existin already, or create it (Signleton)
Types::EAnalysisType GetAnalysisType() const
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
IMethod * GetMethod(const TString &title) const
returns pointer to MVA that corresponds to given method title
ClassImp(TMVA::Factory) TMVA
standard constructor jobname : this name will appear in all weight file names produced by the MVAs th...
void ReadStateFromFile()
Function to write options and weights to file.
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
void EvaluateAllVariables(TString options="")
iterates over all MVA input varables and evaluates them
Bool_t DoMulticlass() const
std::vector< std::vector< double > > Data
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
void SetDrawProgressBar(Bool_t d)
Types::EMVA GetMethodType() const
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
TCppMethod_t GetMethod(TCppScope_t scope, TCppIndex_t imeth)
A specialized string object used for TTree selections.
const TMatrixD * GetCovarianceMatrix() const
TMatrixT< Double_t > TMatrixD
Bool_t UserAssignEvents(UInt_t clIndex)
const Int_t MinNoTrainingEvents
virtual ~Factory()
destructor delete fATreeEvent;
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void SetCut(const TString &cut, const TString &className="")
DataSetInfo & AddDataSet(DataSetInfo &)
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
std::string GetMethodName(TCppMethod_t)
Service class for 2-Dim histogram classes.
MethodBase * BookMethod(TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
virtual void AddRow(const Double_t *x)
Begin_Html.
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
void EvaluateAllMethods(void)
iterates over all MVAs that have been booked, and calls their evaluation methods
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
void AddCut(const TString &cut, const TString &className="")
virtual const char * GetName() const
Returns name of object.
void SetBackgroundWeightExpression(const TString &variable)
virtual Double_t GetSignificance() const
compute significance of mean difference significance = | - |/Sqrt(RMS_S2 + RMS_B2) ...
void Greetings()
print welcome message options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg ...
virtual void MakePrincipals()
Perform the principal components analysis.
void SetBoostedMethodName(TString methodName)
void SetVerbose(Bool_t v=kTRUE)
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
const Event * GetEvent() const
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
void PrintHelpMessage(const TString &methodTitle="") const
Print predefined help message of classifier iterate over methods and test.
void SetCurrentType(Types::ETreeType type) const
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
virtual void PrintHelpMessage() const =0
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
static void DestroyInstance()
static function: destroy TMVA instance
DataSetInfo & DataInfo() const
RooCmdArg Verbose(Bool_t flag=kTRUE)
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter= ' ')
Create or simply read branches from filename.
void SetUseColor(Bool_t uc)
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Bool_t DoRegression() const
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate of regression output versus "true" value from test sample ...
DataSetManager * fDataSetManager
void SetWeightExpression(const TString &variable, const TString &className="")
Log() << kWarning << DefaultDataSetInfo().GetNClasses() /*fClasses.size()*/ << Endl;.
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
void PrintHelpMessage() const
prints out method-specific help method
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
void DeleteAllMethods(void)
delete methods
virtual Double_t GetTrainingEfficiency(const TString &)
virtual Long64_t GetEntries() const
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
Long64_t GetNTrainingEvents() const
A TTree object has a header with a name and a title.
std::vector< IMethod * > MVector
Double_t GetSignalReferenceCut() const
virtual void Print() const
get maximum length of variable names
TString GetMethodTypeName() const
void SetTestvarName(const TString &v="")
void SetTree(TTree *tree, const TString &className, Double_t weight)
set background tree
virtual void TestClassification()
initialization
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
virtual void SetAnalysisType(Types::EAnalysisType type)
const TString & GetTestvarName() const
void SetBackgroundTree(TTree *background, Double_t weight=1.0)