134 fFitMethod ( kUseGeneticAlgorithm ),
135 fEffMethod ( kUseEventSelection ),
160 fVarHistS_smooth( 0 ),
161 fVarHistB_smooth( 0 ),
174 fFitMethod ( kUseGeneticAlgorithm ),
175 fEffMethod ( kUseEventSelection ),
200 fVarHistS_smooth( 0 ),
201 fVarHistB_smooth( 0 ),
222 fVarHistS = fVarHistB = 0;
223 fVarHistS_smooth = fVarHistB_smooth = 0;
224 fVarPdfS = fVarPdfB = 0;
226 fBinaryTreeS = fBinaryTreeB = 0;
232 fRangeSign =
new std::vector<Int_t> ( GetNvar() );
235 fMeanS =
new std::vector<Double_t>( GetNvar() );
236 fMeanB =
new std::vector<Double_t>( GetNvar() );
237 fRmsS =
new std::vector<Double_t>( GetNvar() );
238 fRmsB =
new std::vector<Double_t>( GetNvar() );
241 fFitParams =
new std::vector<EFitParameters>( GetNvar() );
244 fFitMethod = kUseMonteCarlo;
250 for (
UInt_t i=0; i<GetNvar(); i++) {
263 fTmpCutMin =
new Double_t[GetNvar()];
264 fTmpCutMax =
new Double_t[GetNvar()];
278 delete fEffBvsSLocal;
280 if (
NULL != fCutRangeMin)
delete [] fCutRangeMin;
281 if (
NULL != fCutRangeMax)
delete [] fCutRangeMax;
282 if (
NULL != fAllVarsI)
delete [] fAllVarsI;
284 for (
UInt_t i=0;i<GetNvar();i++) {
285 if (
NULL != fCutMin[i] )
delete [] fCutMin[i];
286 if (
NULL != fCutMax[i] )
delete [] fCutMax[i];
287 if (
NULL != fCutRange[i])
delete fCutRange[i];
290 if (
NULL != fCutMin)
delete [] fCutMin;
291 if (
NULL != fCutMax)
delete [] fCutMax;
293 if (
NULL != fTmpCutMin)
delete [] fTmpCutMin;
294 if (
NULL != fTmpCutMax)
delete [] fTmpCutMax;
296 if (
NULL != fBinaryTreeS)
delete fBinaryTreeS;
297 if (
NULL != fBinaryTreeB)
delete fBinaryTreeB;
321 DeclareOptionRef(fFitMethodS =
"GA",
"FitMethod",
"Minimisation Method (GA, SA, and MC are the primary methods to be used; the others have been introduced for testing purposes and are depreciated)");
325 AddPreDefVal(
TString(
"MCEvents"));
326 AddPreDefVal(
TString(
"MINUIT"));
327 AddPreDefVal(
TString(
"EventScan"));
330 DeclareOptionRef(fEffMethodS =
"EffSel",
"EffMethod",
"Selection Method");
331 AddPreDefVal(
TString(
"EffSel"));
332 AddPreDefVal(
TString(
"EffPDF"));
335 fCutRange.resize(GetNvar());
336 fCutRangeMin =
new Double_t[GetNvar()];
337 fCutRangeMax =
new Double_t[GetNvar()];
340 fCutRangeMin[
ivar] = fCutRangeMax[
ivar] = -1;
343 DeclareOptionRef( fCutRangeMin, GetNvar(),
"CutRangeMin",
"Minimum of allowed cut range (set per variable)" );
344 DeclareOptionRef( fCutRangeMax, GetNvar(),
"CutRangeMax",
"Maximum of allowed cut range (set per variable)" );
346 fAllVarsI =
new TString[GetNvar()];
348 for (
UInt_t i=0; i<GetNvar(); i++) fAllVarsI[i] =
"NotEnforced";
350 DeclareOptionRef(fAllVarsI, GetNvar(),
"VarProp",
"Categorisation of cuts");
351 AddPreDefVal(
TString(
"NotEnforced"));
354 AddPreDefVal(
TString(
"FSmart"));
365 if (IsNormalised()) {
366 Log() << kWARNING <<
"Normalisation of the input variables for cut optimisation is not" <<
Endl;
367 Log() << kWARNING <<
"supported because this provides intransparent cut values, and no" <<
Endl;
368 Log() << kWARNING <<
"improvement in the performance of the algorithm." <<
Endl;
369 Log() << kWARNING <<
"Please remove \"Normalise\" option from booking option string" <<
Endl;
370 Log() << kWARNING <<
"==> Will reset normalisation flag to \"False\"" <<
Endl;
374 if (IgnoreEventsWithNegWeightsInTraining()) {
375 Log() << kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: "
376 << GetMethodTypeName()
377 <<
" --> Please remove \"IgnoreNegWeightsInTraining\" option from booking string."
381 if (fFitMethodS ==
"MC" ) fFitMethod = kUseMonteCarlo;
382 else if (fFitMethodS ==
"MCEvents") fFitMethod = kUseMonteCarloEvents;
383 else if (fFitMethodS ==
"GA" ) fFitMethod = kUseGeneticAlgorithm;
384 else if (fFitMethodS ==
"SA" ) fFitMethod = kUseSimulatedAnnealing;
385 else if (fFitMethodS ==
"MINUIT" ) {
386 fFitMethod = kUseMinuit;
387 Log() << kWARNING <<
"poor performance of MINUIT in MethodCuts; preferred fit method: GA" <<
Endl;
389 else if (fFitMethodS ==
"EventScan" ) fFitMethod = kUseEventScan;
390 else Log() << kFATAL <<
"unknown minimisation method: " << fFitMethodS <<
Endl;
392 if (fEffMethodS ==
"EFFSEL" ) fEffMethod = kUseEventSelection;
393 else if (fEffMethodS ==
"EFFPDF" ) fEffMethod = kUsePDFs;
394 else fEffMethod = kUseEventSelection;
397 Log() << kINFO <<
Form(
"Use optimization method: \"%s\"",
398 (fFitMethod == kUseMonteCarlo) ?
"Monte Carlo" :
399 (fFitMethod == kUseMonteCarlo) ?
"Monte-Carlo-Event sampling" :
400 (fFitMethod == kUseEventScan) ?
"Full Event Scan (slow)" :
401 (fFitMethod == kUseMinuit) ?
"MINUIT" :
"Genetic Algorithm" ) <<
Endl;
402 Log() << kINFO <<
Form(
"Use efficiency computation method: \"%s\"",
403 (fEffMethod == kUseEventSelection) ?
"Event Selection" :
"PDF" ) <<
Endl;
413 if (fAllVarsI[
ivar] ==
"" || fAllVarsI[
ivar] ==
"NotEnforced")
theFitP = kNotEnforced;
414 else if (fAllVarsI[
ivar] ==
"FMax" )
theFitP = kForceMax;
415 else if (fAllVarsI[
ivar] ==
"FMin" )
theFitP = kForceMin;
416 else if (fAllVarsI[
ivar] ==
"FSmart" )
theFitP = kForceSmart;
418 Log() << kFATAL <<
"unknown value \'" << fAllVarsI[
ivar]
419 <<
"\' for fit parameter option " <<
Form(
"VarProp[%i]",
ivar) <<
Endl;
424 Log() << kINFO <<
"Use \"" << fAllVarsI[
ivar]
425 <<
"\" cuts for variable: " <<
"'" << (*fInputVars)[
ivar] <<
"'" <<
Endl;
438 if (fCutMin ==
NULL || fCutMax ==
NULL || fNbins == 0) {
439 Log() << kFATAL <<
"<Eval_Cuts> fCutMin/Max have zero pointer. "
440 <<
"Did you book Cuts ?" <<
Endl;
446 if (fTestSignalEff > 0) {
448 Int_t ibin = fEffBvsSLocal->FindBin( fTestSignalEff );
450 else if (
ibin >= fNbins)
ibin = fNbins - 1;
474 std::vector<TString>*
varVec = 0;
475 if (GetTransformationHandler().GetNumOfTransformations() == 0) {
477 varVec =
new std::vector<TString>;
479 varVec->push_back( DataInfo().GetVariableInfo(
ivar).GetLabel() );
482 else if (GetTransformationHandler().GetNumOfTransformations() == 1) {
484 varVec = GetTransformationHandler().GetTransformationStringsOfLastTransform();
488 varVec =
new std::vector<TString>;
490 varVec->push_back( DataInfo().GetVariableInfo(
ivar).GetLabel() +
" [transformed]" );
502 Log() << kHEADER <<
"Cut values for requested signal efficiency: " <<
trueEffS <<
Endl;
503 Log() << kINFO <<
"Corresponding background efficiency : " << fEffBvsSLocal->GetBinContent(
ibin ) <<
Endl;
504 if (GetTransformationHandler().GetNumOfTransformations() == 1) {
505 Log() << kINFO <<
"Transformation applied to input variables : \""
506 << GetTransformationHandler().GetNameOfLastTransform() <<
"\"" <<
Endl;
508 else if (GetTransformationHandler().GetNumOfTransformations() > 1) {
509 Log() << kINFO <<
"[ More than one (=" << GetTransformationHandler().GetNumOfTransformations() <<
") "
510 <<
" transformations applied in transformation chain; cuts applied on transformed quantities ] " <<
Endl;
513 Log() << kINFO <<
"Transformation applied to input variables : None" <<
Endl;
519 <<
"Cut[" << std::setw(2) <<
ivar <<
"]: "
522 << std::setw(
maxL) << (*varVec)[
ivar]
538 std::vector<Double_t>
cMin( GetNvar() );
539 std::vector<Double_t>
cMax( GetNvar() );
552 std::vector<Double_t>&
cutMin,
553 std::vector<Double_t>&
cutMax )
const
563 else if (
ibin >= fNbins)
ibin = fNbins - 1;
580 if (fEffMethod == kUsePDFs) CreateVariablePDFs();
583 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
584 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
593 fBinaryTreeB->Fill( GetEventCollection(
Types::kTraining), fBackgroundClass );
623 delete fEffBvsSLocal;
624 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
625 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
626 fEffBvsSLocal->SetDirectory(
nullptr);
632 if (fFitMethod == kUseGeneticAlgorithm ||
633 fFitMethod == kUseMonteCarlo ||
634 fFitMethod == kUseMinuit ||
635 fFitMethod == kUseSimulatedAnnealing) {
638 std::vector<Interval*> ranges;
643 if (DataInfo().GetVariableInfo(
ivar).GetVarType() ==
'I') {
644 nbins =
Int_t(fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin()) + 1;
647 if ((*fFitParams)[
ivar] == kForceSmart) {
648 if ((*fMeanS)[
ivar] > (*fMeanB)[
ivar]) (*fFitParams)[
ivar] = kForceMax;
649 else (*fFitParams)[
ivar] = kForceMin;
652 if ((*fFitParams)[
ivar] == kForceMin) {
653 ranges.push_back(
new Interval( fCutRange[
ivar]->GetMin(), fCutRange[
ivar]->GetMin(), nbins ) );
654 ranges.push_back(
new Interval( 0, fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin(), nbins ) );
656 else if ((*fFitParams)[
ivar] == kForceMax) {
657 ranges.push_back(
new Interval( fCutRange[
ivar]->GetMin(), fCutRange[
ivar]->GetMax(), nbins ) );
658 ranges.push_back(
new Interval( fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin(),
659 fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin(), nbins ) );
662 ranges.push_back(
new Interval( fCutRange[
ivar]->GetMin(), fCutRange[
ivar]->GetMax(), nbins ) );
663 ranges.push_back(
new Interval( 0, fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin(), nbins ) );
670 switch (fFitMethod) {
671 case kUseGeneticAlgorithm:
680 case kUseSimulatedAnnealing:
684 Log() << kFATAL <<
"Wrong fit method: " << fFitMethod <<
Endl;
700 else if (fFitMethod == kUseEventScan) {
710 Log() << kINFO <<
"Running full event scan: " <<
Endl;
714 fIPyCurrentIter =
ic;
715 if (fExitFromTraining)
break;
725 else if (fFitMethod == kUseMonteCarloEvents) {
729 DeclareOptionRef(
nsamples,
"SampleSize",
"Number of Monte-Carlo-Event samples" );
730 DeclareOptionRef( seed,
"Seed",
"Seed for the random generator (0 takes random seeds)" );
743 Log() << kINFO <<
"Running Monte-Carlo-Event sampling over " <<
nsamples <<
" events" <<
Endl;
744 std::vector<Double_t> pars( 2*GetNvar() );
747 fIPyCurrentIter =
ic;
748 if (fExitFromTraining)
break;
772 Log() << kFATAL <<
"<MCEvents>: could not find signal events"
773 <<
" after 10000 trials - do you have signal events in your sample ?"
786 EstimatorFunction( pars );
796 else Log() << kFATAL <<
"Unknown minimisation method: " << fFitMethod <<
Endl;
798 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
799 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
805 if ((*fFitParams)[
ivar] == kForceMin && fCutMin[
ivar][
ibin] > -fgMaxAbsCutVal) {
806 fCutMin[
ivar][
ibin] = -fgMaxAbsCutVal;
808 if ((*fFitParams)[
ivar] == kForceMax && fCutMax[
ivar][
ibin] < fgMaxAbsCutVal) {
809 fCutMax[
ivar][
ibin] = fgMaxAbsCutVal;
818 for (
Double_t eff=0.1; eff<0.95; eff += 0.1) PrintCuts( eff+epsilon );
820 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
837 if (!DataInfo().IsSignal(
ev1))
return -1;
840 if (!DataInfo().IsSignal(
ev2))
return -1;
842 const Int_t nvar = GetNvar();
852 std::vector<Double_t> pars;
872 return ComputeEstimator( pars );
880 return ComputeEstimator( pars );
900 this->MatchParsToCuts( pars, &fTmpCutMin[0], &fTmpCutMax[0] );
903 switch (fEffMethod) {
905 this->GetEffsfromPDFs (&fTmpCutMin[0], &fTmpCutMax[0],
effS,
effB);
907 case kUseEventSelection:
908 this->GetEffsfromSelection (&fTmpCutMin[0], &fTmpCutMax[0],
effS,
effB);
911 this->GetEffsfromSelection (&fTmpCutMin[0], &fTmpCutMax[0],
effS,
effB);
941 fEffBvsSLocal->SetBinContent(
ibinS,
effB );
959 diff=(fCutRange[
ivar]->GetMax()-fTmpCutMax[
ivar])/(fCutRange[
ivar]->GetMax()-fCutRange[
ivar]->GetMin());
961 diff=(fCutRange[
ivar]->GetMin()-fTmpCutMin[
ivar])/(fCutRange[
ivar]->GetMax()-fCutRange[
ivar]->GetMin());
966 else return 10.*(1.-10.*
effS);
979 cutMin[
ivar] = ((*fRangeSign)[
ivar] > 0) ? pars[ipar] : pars[ipar] - pars[ipar+1];
980 cutMax[
ivar] = ((*fRangeSign)[
ivar] > 0) ? pars[ipar] + pars[ipar+1] : pars[ipar];
993 const UInt_t nvar = GetNvar();
1036 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1037 fNegEffWarning =
kTRUE;
1041 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1042 fNegEffWarning =
kTRUE;
1059 nSelS = fBinaryTreeS->SearchVolume( volume );
1060 nSelB = fBinaryTreeB->SearchVolume( volume );
1065 nTotS = fBinaryTreeS->GetSumOfWeights();
1066 nTotB = fBinaryTreeB->GetSumOfWeights();
1069 if (nTotS == 0 && nTotB == 0) {
1070 Log() << kFATAL <<
"<GetEffsfromSelection> fatal error in zero total number of events:"
1071 <<
" nTotS, nTotB: " << nTotS <<
" " << nTotB <<
" ***" <<
Endl;
1078 Log() << kWARNING <<
"<ComputeEstimator> zero number of signal events" <<
Endl;
1080 else if (nTotB == 0) {
1083 Log() << kWARNING <<
"<ComputeEstimator> zero number of background events" <<
Endl;
1093 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1094 fNegEffWarning =
kTRUE;
1098 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1099 fNegEffWarning =
kTRUE;
1109 fVarHistS =
new std::vector<TH1*>( GetNvar() );
1110 fVarHistB =
new std::vector<TH1*>( GetNvar() );
1111 fVarHistS_smooth =
new std::vector<TH1*>( GetNvar() );
1112 fVarHistB_smooth =
new std::vector<TH1*>( GetNvar() );
1113 fVarPdfS =
new std::vector<PDF*>( GetNvar() );
1114 fVarPdfB =
new std::vector<PDF*>( GetNvar() );
1124 if( val > minVal ) minVal = val;
1125 if( val < maxVal ) maxVal = val;
1147 histName = (*fInputVars)[
ivar] +
"_bgd";
1163 if( DataInfo().IsSignal(
ev) ){
1164 (*fVarHistS)[
ivar]->Fill( val );
1166 (*fVarHistB)[
ivar]->Fill( val );
1173 (*fVarHistS_smooth)[
ivar] = (
TH1F*)(*fVarHistS)[
ivar]->Clone();
1174 histTitle = (*fInputVars)[
ivar] +
" signal training smoothed ";
1177 histName = (*fInputVars)[
ivar] +
"_sig_smooth";
1178 (*fVarHistS_smooth)[
ivar]->SetName(histName);
1198 histTitle = (*fInputVars)[
ivar]+
" background training smoothed ";
1201 histName = (*fInputVars)[
ivar]+
"_bgd_smooth";
1202 (*fVarHistB_smooth)[
ivar]->SetName(histName);
1223 istr >> dummy >> dummy;
1225 istr >> dummy >> fNbins;
1228 istr >> dummy >> dummy >> dummy >> dummy >> dummy >> dummy >>
dummyInt >> dummy ;
1231 if (
dummyInt != Data()->GetNVariables()) {
1232 Log() << kFATAL <<
"<ReadWeightsFromStream> fatal error: mismatch "
1233 <<
"in number of variables: " <<
dummyInt <<
" != " << Data()->GetNVariables() <<
Endl;
1238 if (fFitMethod == kUseMonteCarlo) {
1239 Log() << kWARNING <<
"Read cuts optimised using sample of MC events" <<
Endl;
1241 else if (fFitMethod == kUseMonteCarloEvents) {
1242 Log() << kWARNING <<
"Read cuts optimised using sample of MC events" <<
Endl;
1244 else if (fFitMethod == kUseGeneticAlgorithm) {
1245 Log() << kINFO <<
"Read cuts optimised using Genetic Algorithm" <<
Endl;
1247 else if (fFitMethod == kUseSimulatedAnnealing) {
1248 Log() << kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" <<
Endl;
1250 else if (fFitMethod == kUseEventScan) {
1251 Log() << kINFO <<
"Read cuts optimised using Full Event Scan" <<
Endl;
1254 Log() << kWARNING <<
"unknown method: " << fFitMethod <<
Endl;
1256 Log() << kINFO <<
"in " << fNbins <<
" signal efficiency bins and for " << GetNvar() <<
" variables" <<
Endl;
1260 istr.getline(buffer,200);
1261 istr.getline(buffer,200);
1265 if (fEffBvsSLocal != 0)
delete fEffBvsSLocal;
1266 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
1267 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
1268 fEffBvsSLocal->SetDirectory(
nullptr);
1279 fEffSMin = fEffBvsSLocal->GetBinCenter(1);
1280 fEffSMax = fEffBvsSLocal->GetBinCenter(fNbins);
1290 std::vector<Double_t>
cutsMin;
1291 std::vector<Double_t>
cutsMax;
1297 gTools().
AddComment(
wght,
TString::Format(
"Below are the optimised cuts for %i variables: Format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0] ... cutMin[ivar=n-1] cutMax[ivar=n-1]", GetNvar() ) );
1330 for (
UInt_t i=0; i<GetNvar(); i++) {
1331 if (fCutMin[i] != 0)
delete [] fCutMin[i];
1332 if (fCutMax[i] != 0)
delete [] fCutMax[i];
1334 if (fCutMin != 0)
delete [] fCutMin;
1335 if (fCutMax != 0)
delete [] fCutMax;
1346 if (fFitMethod == kUseMonteCarlo) {
1347 Log() << kINFO <<
"Read cuts optimised using sample of MC events" <<
Endl;
1349 else if (fFitMethod == kUseMonteCarloEvents) {
1350 Log() << kINFO <<
"Read cuts optimised using sample of MC-Event events" <<
Endl;
1352 else if (fFitMethod == kUseGeneticAlgorithm) {
1353 Log() << kINFO <<
"Read cuts optimised using Genetic Algorithm" <<
Endl;
1355 else if (fFitMethod == kUseSimulatedAnnealing) {
1356 Log() << kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" <<
Endl;
1358 else if (fFitMethod == kUseEventScan) {
1359 Log() << kINFO <<
"Read cuts optimised using Full Event Scan" <<
Endl;
1362 Log() << kWARNING <<
"unknown method: " << fFitMethod <<
Endl;
1364 Log() << kINFO <<
"Reading " << fNbins <<
" signal efficiency bins for " << GetNvar() <<
" variables" <<
Endl;
1366 delete fEffBvsSLocal;
1367 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
1368 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
1369 fEffBvsSLocal->SetDirectory(
nullptr);
1372 fCutMin =
new Double_t*[GetNvar()];
1373 fCutMax =
new Double_t*[GetNvar()];
1374 for (
UInt_t i=0;i<GetNvar();i++) {
1395 Log() << kFATAL <<
"Mismatch in bins: " <<
tmpbin-1 <<
" >= " << fNbins <<
Endl;
1413 Log() << kINFO <<
"Write monitoring histograms to file: " << BaseDir()->GetPath() <<
Endl;
1415 fEffBvsSLocal->
Write();
1418 if (fEffMethod == kUsePDFs) {
1420 (*fVarHistS)[
ivar]->Write();
1421 (*fVarHistB)[
ivar]->Write();
1422 (*fVarHistS_smooth)[
ivar]->Write();
1423 (*fVarHistB_smooth)[
ivar]->Write();
1424 (*fVarPdfS)[
ivar]->GetPDFHist()->Write();
1425 (*fVarPdfB)[
ivar]->GetPDFHist()->Write();
1447 if (list->GetSize() != 2) {
1448 Log() << kFATAL <<
"<GetTrainingEfficiency> wrong number of arguments"
1450 <<
" | required format, e.g., Efficiency:0.05" <<
Endl;
1463 if (
results->GetHist(
"EFF_BVSS_TR")==0) {
1465 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
1466 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
1471 fBinaryTreeB->Fill( GetEventCollection(
Types::kTraining), fBackgroundClass );
1479 TH1*
eff_bvss_tr =
new TH1F( GetTestvarName() +
"_trainingEffBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1481 TH1*
rej_bvss_tr =
new TH1F( GetTestvarName() +
"_trainingRejBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1503 Log()<< kVERBOSE <<
"unable to fill efficiency bin " <<
bini<<
" " <<
effBin <<
Endl;
1522 if (
NULL == fSplTrainEffBvsS)
return 0.0;
1532 effB = fSplTrainEffBvsS->Eval(
effS );
1557 Data()->SetCurrentType(
type);
1564 if (list->GetSize() > 2) {
1566 Log() << kFATAL <<
"<GetEfficiency> wrong number of arguments"
1568 <<
" | required format, e.g., Efficiency:0.05, or empty string" <<
Endl;
1583 if (
results->GetHist(
"MVA_EFF_BvsS")==0) {
1585 if (fBinaryTreeS!=0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
1586 if (fBinaryTreeB!=0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
1592 fBinaryTreeS->Fill( GetEventCollection(
Types::kTesting), fSignalClass );
1594 fBinaryTreeB->Fill( GetEventCollection(
Types::kTesting), fBackgroundClass );
1603 TH1*
eff_BvsS =
new TH1F( GetTestvarName() +
"_effBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1605 TH1*
rej_BvsS =
new TH1F( GetTestvarName() +
"_rejBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1613 TH1*
eff_s =
new TH1F( GetTestvarName() +
"_effS", GetTestvarName() +
" (signal)", fNbins,
xmin,
xmax);
1615 TH1*
eff_b =
new TH1F( GetTestvarName() +
"_effB", GetTestvarName() +
" (background)", fNbins,
xmin,
xmax);
1641 tmpBvsS->SetPoint(fNbins+1, 1., 1.);
1657 if (
NULL == fSpleffBvsS)
return 0.0;
1672 integral += (1.0 -
effB);
1694 if (Data()->GetNEvtSigTest() > 0)
1709 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
1710 fout <<
"};" << std::endl;
1728 Log() <<
"The optimisation of rectangular cuts performed by TMVA maximises " <<
Endl;
1729 Log() <<
"the background rejection at given signal efficiency, and scans " <<
Endl;
1730 Log() <<
"over the full range of the latter quantity. Three optimisation" <<
Endl;
1731 Log() <<
"methods are optional: Monte Carlo sampling (MC), a Genetics" <<
Endl;
1732 Log() <<
"Algorithm (GA), and Simulated Annealing (SA). GA and SA are" <<
Endl;
1733 Log() <<
"expected to perform best." <<
Endl;
1735 Log() <<
"The difficulty to find the optimal cuts strongly increases with" <<
Endl;
1736 Log() <<
"the dimensionality (number of input variables) of the problem." <<
Endl;
1737 Log() <<
"This behavior is due to the non-uniqueness of the solution space."<<
Endl;
1741 Log() <<
"If the dimensionality exceeds, say, 4 input variables, it is " <<
Endl;
1742 Log() <<
"advisable to scrutinize the separation power of the variables," <<
Endl;
1743 Log() <<
"and to remove the weakest ones. If some among the input variables" <<
Endl;
1744 Log() <<
"can be described by a single cut (e.g., because signal tends to be" <<
Endl;
1745 Log() <<
"larger than background), this can be indicated to MethodCuts via" <<
Endl;
1746 Log() <<
"the \"Fsmart\" options (see option string). Choosing this option" <<
Endl;
1747 Log() <<
"reduces the number of requirements for the variable from 2 (min/max)" <<
Endl;
1748 Log() <<
"to a single one (TMVA finds out whether it is to be interpreted as" <<
Endl;
1749 Log() <<
"min or max)." <<
Endl;
1752 Log() <<
"" <<
Endl;
1754 Log() <<
"" <<
Endl;
1755 Log() <<
"Apart form the \"Fsmart\" option for the variables, the only way" <<
Endl;
1756 Log() <<
"to improve the MC sampling is to increase the sampling rate. This" <<
Endl;
1757 Log() <<
"is done via the configuration option \"MC_NRandCuts\". The execution" <<
Endl;
1758 Log() <<
"time scales linearly with the sampling rate." <<
Endl;
1759 Log() <<
"" <<
Endl;
1761 Log() <<
"" <<
Endl;
1762 Log() <<
"The algorithm terminates if no significant fitness increase has" <<
Endl;
1763 Log() <<
"been achieved within the last \"nsteps\" steps of the calculation." <<
Endl;
1764 Log() <<
"Wiggles in the ROC curve or constant background rejection of 1" <<
Endl;
1765 Log() <<
"indicate that the GA failed to always converge at the true maximum" <<
Endl;
1766 Log() <<
"fitness. In such a case, it is recommended to broaden the search " <<
Endl;
1767 Log() <<
"by increasing the population size (\"popSize\") and to give the GA " <<
Endl;
1768 Log() <<
"more time to find improvements by increasing the number of steps" <<
Endl;
1769 Log() <<
"(\"nsteps\")" <<
Endl;
1770 Log() <<
" -> increase \"popSize\" (at least >10 * number of variables)" <<
Endl;
1771 Log() <<
" -> increase \"nsteps\"" <<
Endl;
1772 Log() <<
"" <<
Endl;
1773 Log() <<
bold <<
"Simulated Annealing (SA) algorithm:" <<
resbold <<
Endl;
1774 Log() <<
"" <<
Endl;
1775 Log() <<
"\"Increasing Adaptive\" approach:" <<
Endl;
1776 Log() <<
"" <<
Endl;
1777 Log() <<
"The algorithm seeks local minima and explores their neighborhoods, while" <<
Endl;
1778 Log() <<
"changing the ambient temperature depending on the number of failures" <<
Endl;
1779 Log() <<
"in the previous steps. The performance can be improved by increasing" <<
Endl;
1780 Log() <<
"the number of iteration steps (\"MaxCalls\"), or by adjusting the" <<
Endl;
1781 Log() <<
"minimal temperature (\"MinTemperature\"). Manual adjustments of the" <<
Endl;
1782 Log() <<
"speed of the temperature increase (\"TemperatureScale\" and \"AdaptiveSpeed\")" <<
Endl;
1783 Log() <<
"to individual data sets should also help. Summary:" <<
brk <<
Endl;
1784 Log() <<
" -> increase \"MaxCalls\"" <<
brk <<
Endl;
1785 Log() <<
" -> adjust \"MinTemperature\"" <<
brk <<
Endl;
1786 Log() <<
" -> adjust \"TemperatureScale\"" <<
brk <<
Endl;
1787 Log() <<
" -> adjust \"AdaptiveSpeed\"" <<
Endl;
1788 Log() <<
"" <<
Endl;
1789 Log() <<
"\"Decreasing Adaptive\" approach:" <<
Endl;
1790 Log() <<
"" <<
Endl;
1791 Log() <<
"The algorithm calculates the initial temperature (based on the effect-" <<
Endl;
1792 Log() <<
"iveness of large steps) and the multiplier that ensures to reach the" <<
Endl;
1793 Log() <<
"minimal temperature with the requested number of iteration steps." <<
Endl;
1794 Log() <<
"The performance can be improved by adjusting the minimal temperature" <<
Endl;
1795 Log() <<
" (\"MinTemperature\") and by increasing number of steps (\"MaxCalls\"):" <<
brk <<
Endl;
1796 Log() <<
" -> increase \"MaxCalls\"" <<
brk <<
Endl;
1797 Log() <<
" -> adjust \"MinTemperature\"" <<
Endl;
1798 Log() <<
" " <<
Endl;
1799 Log() <<
"Other kernels:" <<
Endl;
1800 Log() <<
"" <<
Endl;
1801 Log() <<
"Alternative ways of counting the temperature change are implemented. " <<
Endl;
1802 Log() <<
"Each of them starts with the maximum temperature (\"MaxTemperature\")" <<
Endl;
1803 Log() <<
"and decreases while changing the temperature according to a given" <<
Endl;
1804 Log() <<
"prescription:" <<
brk <<
Endl;
1805 Log() <<
"CurrentTemperature =" <<
brk <<
Endl;
1806 Log() <<
" - Sqrt: InitialTemperature / Sqrt(StepNumber+2) * TemperatureScale" <<
brk <<
Endl;
1807 Log() <<
" - Log: InitialTemperature / Log(StepNumber+2) * TemperatureScale" <<
brk <<
Endl;
1808 Log() <<
" - Homo: InitialTemperature / (StepNumber+2) * TemperatureScale" <<
brk <<
Endl;
1809 Log() <<
" - Sin: (Sin(StepNumber / TemperatureScale) + 1) / (StepNumber + 1)*InitialTemperature + Eps" <<
brk <<
Endl;
1810 Log() <<
" - Geo: CurrentTemperature * TemperatureScale" <<
Endl;
1811 Log() <<
"" <<
Endl;
1812 Log() <<
"Their performance can be improved by adjusting initial temperature" <<
Endl;
1813 Log() <<
"(\"InitialTemperature\"), the number of iteration steps (\"MaxCalls\")," <<
Endl;
1814 Log() <<
"and the multiplier that scales the temperature decrease" <<
Endl;
1815 Log() <<
"(\"TemperatureScale\")" <<
brk <<
Endl;
1816 Log() <<
" -> increase \"MaxCalls\"" <<
brk <<
Endl;
1817 Log() <<
" -> adjust \"InitialTemperature\"" <<
brk <<
Endl;
1818 Log() <<
" -> adjust \"TemperatureScale\"" <<
brk <<
Endl;
1819 Log() <<
" -> adjust \"KernelTemperature\"" <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
A TGraph is an object made of two arrays X and Y with npoints each.
1-D histogram with a float per channel (see TH1 documentation)
TH1 is the base class of all histogram classes in ROOT.
A simple Binary search tree including a volume search method.
Bool_t WriteOptionsReference() const
void CheckForUnusedOptions() const
checks for unused options in option string
Class that contains all the data information.
Base class for TMVA fitters.
void SetIPythonInteractive(bool *ExitFromTraining, UInt_t *fIPyMaxIter_, UInt_t *fIPyCurrentIter_)
Double_t Run()
estimator function interface for fitting
Fitter using a Genetic Algorithm.
The TMVA::Interval Class.
Fitter using Monte Carlo sampling of parameters.
Virtual base Class for all MVA method.
Multivariate optimisation of signal efficiency for given background efficiency, applying rectangular ...
Double_t EstimatorFunction(std::vector< Double_t > &) override
returns estimator for "cut fitness" used by GA
Double_t ComputeEstimator(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA.
void MakeClassSpecific(std::ostream &, const TString &) const override
write specific classifier response
void AddWeightsXMLTo(void *parent) const override
create XML description for LD classification and regression (for arbitrary number of output classes/t...
void DeclareOptions() override
define the options (their key words) that can be set in the option string.
void Train(void) override
training method: here the cuts are optimised for the training sample
void MatchCutsToPars(std::vector< Double_t > &, Double_t *, Double_t *)
translates cuts into parameters
Double_t GetTrainingEfficiency(const TString &) override
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
void ProcessOptions() override
process user options.
static const Double_t fgMaxAbsCutVal
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
Cuts can only handle classification with 2 classes.
void CreateVariablePDFs(void)
for PDF method: create efficiency reference histograms and PDFs
void ReadWeightsFromStream(std::istream &i) override
read the cuts from stream
void GetEffsfromSelection(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from event counting for given cut sample
void TestClassification() override
nothing to test
void WriteMonitoringHistosToFile(void) const override
write histograms and PDFs to file for monitoring purposes
void MatchParsToCuts(const std::vector< Double_t > &, Double_t *, Double_t *)
translates parameters into cuts
virtual ~MethodCuts(void)
destructor
Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &) override
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
void Init(void) override
default initialisation called by all constructors
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
cut evaluation: returns 1.0 if event passed, 0.0 otherwise
void ReadWeightsFromXML(void *wghtnode) override
read coefficients from xml weight file
void GetHelpMessage() const override
get help message text
void GetEffsfromPDFs(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from PDFs for given cut sample
Double_t GetCuts(Double_t effS, std::vector< Double_t > &cutMin, std::vector< Double_t > &cutMax) const
retrieve cut values for given signal efficiency
void PrintCuts(Double_t effS) const
print cuts
PDF wrapper for histograms; uses user-defined spline interpolation.
Class that is the base-class for a vector of result.
Fitter using a Simulated Annealing Algorithm.
Linear interpolation of TGraph.
Timing information for training and evaluation of MVA methods.
Singleton class for Global types used by TMVA.
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
Volume for BinarySearchTree.
Collectable string class.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Random number generator class based on M.
const char * Data() const
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
void Clone(Ssiz_t nc)
Make self a distinct copy with capacity of at least tot, where tot cannot be smaller than the current...
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Sqrt(Double_t x)
Returns the square root of x.
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.