96 fScaleWithPreselEff(0),
100 fLogger( new
MsgLogger(
"DataSetFactory", kINFO) )
109 std::vector<TTreeFormula*>::const_iterator formIt;
111 for (formIt = fInputFormulas.begin() ; formIt!=fInputFormulas.end() ; ++formIt)
if (*formIt)
delete *formIt;
112 for (formIt = fTargetFormulas.begin() ; formIt!=fTargetFormulas.end() ; ++formIt)
if (*formIt)
delete *formIt;
113 for (formIt = fCutFormulas.begin() ; formIt!=fCutFormulas.end() ; ++formIt)
if (*formIt)
delete *formIt;
114 for (formIt = fWeightFormula.begin() ; formIt!=fWeightFormula.end() ; ++formIt)
if (*formIt)
delete *formIt;
115 for (formIt = fSpectatorFormulas.begin(); formIt!=fSpectatorFormulas.end(); ++formIt)
if (*formIt)
delete *formIt;
127 DataSet * ds = BuildInitialDataSet( dsi, dataInput );
129 if (ds->GetNEvents() > 1 && fComputeCorrelations ) {
151 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"Build DataSet consisting of one Event with dynamically changing variables" <<
Endl;
161 std::vector<Float_t*>* evdyn =
new std::vector<Float_t*>(0);
165 if (varinfos.empty())
166 Log() << kFATAL <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"Dynamic data set cannot be built, since no variable informations are present. Apparently no variables have been set. This should not happen, please contact the TMVA authors." <<
Endl;
168 std::vector<VariableInfo>::iterator it = varinfos.begin(), itEnd=varinfos.end();
169 for (;it!=itEnd;++it) {
172 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"The link to the external variable is NULL while I am trying to build a dynamic data set. In this case fTmpEvent from MethodBase HAS TO BE USED in the method to get useful values in variables." <<
Endl;
173 else evdyn->push_back (external);
177 it = spectatorinfos.begin();
178 for (;it!=spectatorinfos.end();++it) evdyn->push_back( (
Float_t*)(*it).GetExternalLink() );
180 TMVA::Event * ev =
new Event((
const std::vector<Float_t*>*&)evdyn, varinfos.size());
181 std::vector<Event*>* newEventVector =
new std::vector<Event*>;
182 newEventVector->push_back(ev);
186 ds->SetCurrentEvent( 0 );
188 delete newEventVector;
200 if (dataInput.
GetEntries()==0)
return BuildDynamicDataSet( dsi );
205 std::vector< TString >* classList = dataInput.
GetClassList();
206 for (std::vector<TString>::iterator it = classList->begin(); it< classList->end(); ++it) {
217 InitOptions( dsi, eventCounts, normMode, splitSeed, splitMode , mixMode );
220 BuildEventVector( dsi, dataInput, tmpEventVector, eventCounts );
222 DataSet* ds = MixEvents( dsi, tmpEventVector, eventCounts,
223 splitMode, mixMode, normMode, splitSeed );
226 if (showCollectedOutput) {
228 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"Collected:" <<
Endl;
230 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" "
232 <<
" training entries: " << ds->GetNClassEvents( 0, cl ) <<
Endl;
233 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" "
235 <<
" testing entries: " << ds->GetNClassEvents( 1, cl ) <<
Endl;
253 Log() << kFATAL <<
"Expression " << expression.
Data()
254 <<
" could not be resolved to a valid formula. " <<
Endl;
256 Log() << kWARNING <<
"Expression: " << expression.
Data()
257 <<
" does not provide data for this event. "
258 <<
"This event is not taken into account. --> please check if you use as a variable "
259 <<
"an entry of an array which is not filled for some events "
260 <<
"(e.g. arr[4] when arr has only 3 elements)." <<
Endl;
261 Log() << kWARNING <<
"If you want to take the event into account you can do something like: "
262 <<
"\"Alt$(arr[4],0)\" where in cases where arr doesn't have a 4th element, "
263 <<
" 0 is taken as an alternative." <<
Endl;
270 for (
int i = 0, iEnd = ttf->
GetNcodes (); i < iEnd; ++i)
298 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" create input formulas for tree " << tr->
GetName() <<
Endl;
299 std::vector<TTreeFormula*>::const_iterator formIt, formItEnd;
300 for (formIt = fInputFormulas.begin(), formItEnd=fInputFormulas.end(); formIt!=formItEnd; ++formIt)
if (*formIt)
delete *formIt;
301 fInputFormulas.clear();
303 fInputTableFormulas.clear();
305 bool firstArrayVar =
kTRUE;
306 int firstArrayVarIndex = -1;
315 fInputFormulas.emplace_back(ttf);
316 fInputTableFormulas.emplace_back(std::make_pair(ttf, (
Int_t) 0));
325 fInputFormulas.push_back(ttf);
329 firstArrayVarIndex = i;
334 fInputTableFormulas.push_back(std::make_pair(ttf, (
Int_t) i-firstArrayVarIndex));
335 if (
int(i)-firstArrayVarIndex == arraySize-1 ) {
337 firstArrayVar =
kTRUE;
338 firstArrayVarIndex = -1;
348 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"transform regression targets" <<
Endl;
349 for (formIt = fTargetFormulas.begin(), formItEnd = fTargetFormulas.end(); formIt!=formItEnd; ++formIt)
if (*formIt)
delete *formIt;
350 fTargetFormulas.clear();
355 fTargetFormulas.push_back( ttf );
361 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"transform spectator variables" <<
Endl;
362 for (formIt = fSpectatorFormulas.begin(), formItEnd = fSpectatorFormulas.end(); formIt!=formItEnd; ++formIt)
if (*formIt)
delete *formIt;
363 fSpectatorFormulas.clear();
368 fSpectatorFormulas.push_back( ttf );
374 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"transform cuts" <<
Endl;
375 for (formIt = fCutFormulas.begin(), formItEnd = fCutFormulas.end(); formIt!=formItEnd; ++formIt)
if (*formIt)
delete *formIt;
376 fCutFormulas.clear();
383 Bool_t worked = CheckTTreeFormula( ttf, tmpCutExp, hasDollar );
389 fCutFormulas.push_back( ttf );
395 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"transform weights" <<
Endl;
396 for (formIt = fWeightFormula.begin(), formItEnd = fWeightFormula.end(); formIt!=formItEnd; ++formIt)
if (*formIt)
delete *formIt;
397 fWeightFormula.clear();
402 fWeightFormula.push_back( 0 );
408 ttf =
new TTreeFormula(
"FormulaWeight", tmpWeight, tr );
409 Bool_t worked = CheckTTreeFormula( ttf, tmpWeight, hasDollar );
418 fWeightFormula.push_back( ttf );
423 Log() << kDEBUG <<
Form(
"Dataset[%s] : ", dsi.
GetName()) <<
"enable branches" <<
Endl;
428 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"enable branches: input variables" <<
Endl;
430 for (formIt = fInputFormulas.begin(); formIt!=fInputFormulas.end(); ++formIt) {
437 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"enable branches: targets" <<
Endl;
438 for (formIt = fTargetFormulas.begin(); formIt!=fTargetFormulas.end(); ++formIt) {
444 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"enable branches: spectators" <<
Endl;
445 for (formIt = fSpectatorFormulas.begin(); formIt!=fSpectatorFormulas.end(); ++formIt) {
451 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"enable branches: cuts" <<
Endl;
452 for (formIt = fCutFormulas.begin(); formIt!=fCutFormulas.end(); ++formIt) {
459 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"enable branches: weights" <<
Endl;
460 for (formIt = fWeightFormula.begin(); formIt!=fWeightFormula.end(); ++formIt) {
467 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"tree initialized" <<
Endl;
476 const UInt_t nvar = ds->GetNVariables();
477 const UInt_t ntgts = ds->GetNTargets();
478 const UInt_t nvis = ds->GetNSpectators();
487 for (
UInt_t ivar=0; ivar<nvar ; ivar++) { min[ivar] = FLT_MAX; max[ivar] = -FLT_MAX; }
488 for (
UInt_t ivar=0; ivar<ntgts; ivar++) { tgmin[ivar] = FLT_MAX; tgmax[ivar] = -FLT_MAX; }
489 for (
UInt_t ivar=0; ivar<nvis; ivar++) { vmin[ivar] = FLT_MAX; vmax[ivar] = -FLT_MAX; }
493 for (
Int_t i=0; i<ds->GetNEvents(); i++) {
494 const Event * ev = ds->GetEvent(i);
495 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
497 if (
v<min[ivar]) min[ivar] =
v;
498 if (
v>max[ivar]) max[ivar] =
v;
500 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
502 if (
v<tgmin[itgt]) tgmin[itgt] =
v;
503 if (
v>tgmax[itgt]) tgmax[itgt] =
v;
505 for (
UInt_t ivis=0; ivis<nvis; ivis++) {
507 if (
v<vmin[ivis]) vmin[ivis] =
v;
508 if (
v>vmax[ivis]) vmax[ivis] =
v;
512 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
515 if(
TMath::Abs(max[ivar]-min[ivar]) <= FLT_MIN )
518 for (
UInt_t ivar=0; ivar<ntgts; ivar++) {
521 if(
TMath::Abs(tgmax[ivar]-tgmin[ivar]) <= FLT_MIN )
524 for (
UInt_t ivar=0; ivar<nvis; ivar++) {
546 TMatrixD* mat = CalcCovarianceMatrix( ds, classNumber );
549 UInt_t nvar = ds->GetNVariables(), ivar, jvar;
551 for (ivar=0; ivar<nvar; ivar++) {
552 for (jvar=0; jvar<nvar; jvar++) {
554 Double_t d = (*mat)(ivar, ivar)*(*mat)(jvar, jvar);
555 if (
d > 0) (*mat)(ivar, jvar) /=
sqrt(
d);
557 Log() << kWARNING <<
Form(
"Dataset[%s] : ",
DataSetInfo().GetName())<<
"<GetCorrelationMatrix> Zero variances for variables "
558 <<
"(" << ivar <<
", " << jvar <<
") = " <<
d
560 (*mat)(ivar, jvar) = 0;
566 for (ivar=0; ivar<nvar; ivar++) (*mat)(ivar, ivar) = 1.0;
576 UInt_t nvar = ds->GetNVariables();
577 UInt_t ivar = 0, jvar = 0;
584 for (ivar=0; ivar<nvar; ivar++) {
586 for (jvar=0; jvar<nvar; jvar++) mat2(ivar, jvar) = 0;
591 for (
Int_t i=0; i<ds->GetNEvents(); i++) {
593 const Event * ev = ds->GetEvent(i);
594 if (ev->
GetClass() != classNumber )
continue;
599 for (ivar=0; ivar<nvar; ivar++) {
602 vec(ivar) += xi*weight;
603 mat2(ivar, ivar) += (xi*xi*weight);
605 for (jvar=ivar+1; jvar<nvar; jvar++) {
607 mat2(ivar, jvar) += (xi*xj*weight);
612 for (ivar=0; ivar<nvar; ivar++)
613 for (jvar=ivar+1; jvar<nvar; jvar++)
614 mat2(jvar, ivar) = mat2(ivar, jvar);
618 for (ivar=0; ivar<nvar; ivar++) {
619 for (jvar=0; jvar<nvar; jvar++) {
620 (*mat)(ivar, jvar) = mat2(ivar, jvar)/ic - vec(ivar)*vec(jvar)/(ic*ic);
642 splitSpecs.
SetConfigDescription(
"Configuration options given in the \"PrepareForTrainingAndTesting\" call; these options define the creation of the data sets used for training and expert validation by TMVA" );
644 splitMode =
"Random";
646 "Method of picking training and testing events (default: random)" );
651 mixMode =
"SameAsSplitMode";
653 "Method of mixing events of different classes into one dataset (default: SameAsSplitMode)" );
661 "Seed for random event shuffling" );
663 normMode =
"EqualNumEvents";
665 "Overall renormalisation of event-by-event weights used in the training (NumEvents: average weight of 1 per event, independently for signal and background; EqualNumEvents: average weight of 1 per event for signal, and sum of weights for background equal to sum of weights for signal)" );
670 splitSpecs.
DeclareOptionRef(fScaleWithPreselEff=
kFALSE,
"ScaleWithPreselEff",
"Scale the number of requested events by the eff. of the preselection cuts (or not)" );
681 splitSpecs.
DeclareOptionRef( nEventRequests.at(cl).nTrainingEventsRequested,
TString(
"nTrain_")+clName, titleTrain );
682 splitSpecs.
DeclareOptionRef( nEventRequests.at(cl).nTestingEventsRequested ,
TString(
"nTest_")+clName , titleTest );
683 splitSpecs.
DeclareOptionRef( nEventRequests.at(cl).TrainTestSplitRequested ,
TString(
"TrainTestSplit_")+clName , titleTest );
688 splitSpecs.
DeclareOptionRef( fVerboseLevel=
TString(
"Info"),
"VerboseLevel",
"VerboseLevel (Debug/Verbose/Info)" );
693 fCorrelations =
kTRUE;
694 splitSpecs.
DeclareOptionRef(fCorrelations,
"Correlations",
"Boolean to show correlation output (Default: true)");
695 fComputeCorrelations =
kTRUE;
696 splitSpecs.
DeclareOptionRef(fComputeCorrelations,
"CalcCorrelations",
"Compute correlations and also some variable statistics, e.g. min/max (Default: true )");
702 if (Verbose()) fLogger->SetMinType( kVERBOSE );
703 if (fVerboseLevel.CompareTo(
"Debug") ==0) fLogger->SetMinType( kDEBUG );
704 if (fVerboseLevel.CompareTo(
"Verbose") ==0) fLogger->SetMinType( kVERBOSE );
705 if (fVerboseLevel.CompareTo(
"Info") ==0) fLogger->SetMinType( kINFO );
711 <<
"\tSplitmode is: \"" << splitMode <<
"\" the mixmode is: \"" << mixMode <<
"\"" <<
Endl;
712 if (mixMode==
"SAMEASSPLITMODE") mixMode = splitMode;
713 else if (mixMode!=splitMode)
714 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"DataSet splitmode="<<splitMode
715 <<
" differs from mixmode="<<mixMode<<
Endl;
739 for (
size_t i=0; i<nclasses; i++) {
740 eventCounts[i].varAvLength =
new Float_t[nvars];
741 for (
UInt_t ivar=0; ivar<nvars; ivar++)
742 eventCounts[i].varAvLength[ivar] = 0;
752 std::map<TString, int> nanInfWarnings;
753 std::map<TString, int> nanInfErrors;
757 for (
UInt_t cl=0; cl<nclasses; cl++) {
761 EventStats& classEventCounts = eventCounts[cl];
775 std::vector<Float_t> vars(nvars);
776 std::vector<Float_t> tgts(ntgts);
777 std::vector<Float_t> vis(nvis);
787 ChangeToNewTree( currentInfo, dsi );
797 for (
Long64_t evtIdx = 0; evtIdx < nEvts; evtIdx++) {
804 ChangeToNewTree( currentInfo, dsi );
808 Int_t sizeOfArrays = 1;
809 Int_t prevArrExpr = 0;
821 for (
UInt_t ivar = 0; ivar < nvars; ivar++) {
824 auto inputFormula = fInputTableFormulas[ivar].first;
826 Int_t ndata = inputFormula->GetNdata();
829 if (ndata == 1)
continue;
830 haveAllArrayData =
kTRUE;
833 if (sizeOfArrays == 1) {
834 sizeOfArrays = ndata;
837 else if (sizeOfArrays!=ndata) {
838 Log() << kERROR <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"ERROR while preparing training and testing trees:" <<
Endl;
839 Log() <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
" multiple array-type expressions of different length were encountered" <<
Endl;
840 Log() <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
" location of error: event " << evtIdx
843 Log() <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
" expression " << inputFormula->GetTitle() <<
" has "
844 <<
Form(
"Dataset[%s] : ",dsi.
GetName()) << ndata <<
" entries, while" <<
Endl;
845 Log() <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
" expression " << fInputTableFormulas[prevArrExpr].first->GetTitle() <<
" has "
846 <<
Form(
"Dataset[%s] : ",dsi.
GetName())<< fInputTableFormulas[prevArrExpr].first->GetNdata() <<
" entries" <<
Endl;
847 Log() << kFATAL <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"Need to abort" <<
Endl;
852 for (
Int_t idata = 0; idata<sizeOfArrays; idata++) {
855 auto checkNanInf = [&](std::map<TString, int> &msgMap,
Float_t value,
const char *
what,
const char *formulaTitle) {
857 contains_NaN_or_inf =
kTRUE;
858 ++msgMap[
TString::Format(
"Dataset[%s] : %s expression resolves to indeterminate value (NaN): %s", dsi.
GetName(),
what, formulaTitle)];
860 contains_NaN_or_inf =
kTRUE;
861 ++msgMap[
TString::Format(
"Dataset[%s] : %s expression resolves to infinite value (+inf or -inf): %s", dsi.
GetName(),
what, formulaTitle)];
869 formula = fCutFormulas[cl];
875 checkNanInf(nanInfErrors, cutVal,
"Cut", formula->
GetTitle());
879 auto &nanMessages = cutVal < 0.5 ? nanInfWarnings : nanInfErrors;
882 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
883 auto formulaMap = fInputTableFormulas[ivar];
884 formula = formulaMap.first;
885 int inputVarIndex = formulaMap.second;
892 if (ndata < arraySize) {
894 <<
" in the current tree " << currentInfo.
GetTree()->
GetName() <<
" for the event " << evtIdx
895 <<
" is " << ndata <<
" instead of " << arraySize <<
Endl;
896 }
else if (ndata > arraySize && !foundLargerArraySize) {
898 <<
" in the current tree " << currentInfo.
GetTree()->
GetName() <<
" for the event "
899 << evtIdx <<
" is " << ndata <<
", larger than " << arraySize <<
Endl;
900 Log() << kWARNING <<
"Some data will then be ignored. This WARNING is printed only once, "
901 <<
" check in case for the other variables and events " <<
Endl;
903 foundLargerArraySize =
kTRUE;
908 vars[ivar] = ( !haveAllArrayData ?
911 checkNanInf(nanMessages, vars[ivar],
"Input", formula->
GetTitle());
915 for (
UInt_t itrgt=0; itrgt<ntgts; itrgt++) {
916 formula = fTargetFormulas[itrgt];
918 tgts[itrgt] = (ndata == 1 ?
921 checkNanInf(nanMessages, tgts[itrgt],
"Target", formula->
GetTitle());
925 for (
UInt_t itVis=0; itVis<nvis; itVis++) {
926 formula = fSpectatorFormulas[itVis];
928 vis[itVis] = (ndata == 1 ?
931 checkNanInf(nanMessages, vis[itVis],
"Spectator", formula->
GetTitle());
937 formula = fWeightFormula[cl];
940 weight *= (ndata == 1 ?
943 checkNanInf(nanMessages, weight,
"Weight", formula->
GetTitle());
953 if (cutVal<0.5)
continue;
962 if (contains_NaN_or_inf) {
963 Log() << kWARNING <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"NaN or +-inf in Event " << evtIdx <<
Endl;
964 if (sizeOfArrays>1) Log() << kWARNING <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
" rejected" <<
Endl;
974 event_v.push_back(
new Event(vars, tgts , vis, cl , weight));
981 if (!nanInfWarnings.empty()) {
982 Log() << kWARNING <<
"Found events with NaN and/or +-inf values" <<
Endl;
983 for (
const auto &warning : nanInfWarnings) {
984 auto &
log = Log() << kWARNING << warning.first;
985 if (warning.second > 1)
log <<
" (" << warning.second <<
" times)";
988 Log() << kWARNING <<
"These NaN and/or +-infs were all removed by the specified cut, continuing." <<
Endl;
992 if (!nanInfErrors.empty()) {
993 Log() << kWARNING <<
"Found events with NaN and/or +-inf values (not removed by cut)" <<
Endl;
994 for (
const auto &error : nanInfErrors) {
995 auto &
log = Log() << kWARNING << error.first;
996 if (error.second > 1)
log <<
" (" << error.second <<
" times)";
999 Log() << kFATAL <<
"How am I supposed to train a NaN or +-inf?!" <<
Endl;
1005 Log() << kHEADER <<
Form(
"[%s] : ",dsi.
GetName()) <<
"Number of events in input trees" <<
Endl;
1006 Log() << kDEBUG <<
"(after possible flattening of arrays):" <<
Endl;
1013 <<
" -- number of events : "
1014 << std::setw(5) << eventCounts[cl].nEvBeforeCut
1015 <<
" / sum of weights: " << std::setw(5) << eventCounts[cl].nWeEvBeforeCut <<
Endl;
1021 <<
" tree -- total number of entries: "
1025 if (fScaleWithPreselEff)
1027 <<
"\tPreselection: (will affect number of requested training and testing events)" <<
Endl;
1030 <<
"\tPreselection: (will NOT affect number of requested training and testing events)" <<
Endl;
1036 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" "
1038 <<
" -- number of events passed: "
1039 << std::setw(5) << eventCounts[cl].nEvAfterCut
1040 <<
" / sum of weights: " << std::setw(5) << eventCounts[cl].nWeEvAfterCut <<
Endl;
1041 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" "
1043 <<
" -- efficiency : "
1044 << std::setw(6) << eventCounts[cl].nWeEvAfterCut/eventCounts[cl].nWeEvBeforeCut <<
Endl;
1047 else Log() << kDEBUG
1048 <<
" No preselection cuts applied on event classes" <<
Endl;
1071 if (splitMode.
Contains(
"RANDOM" ) ) {
1075 if( ! unspecifiedEvents.empty() ) {
1076 Log() << kDEBUG <<
"randomly shuffling "
1077 << unspecifiedEvents.size()
1078 <<
" events of class " << cls
1079 <<
" which are not yet associated to testing or training" <<
Endl;
1080 std::shuffle(unspecifiedEvents.begin(), unspecifiedEvents.end(), rndm);
1086 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"SPLITTING ========" <<
Endl;
1088 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"---- class " << cls <<
Endl;
1089 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"check number of training/testing events, requested and available number of events and for class " << cls <<
Endl;
1096 Int_t availableTraining = eventVectorTraining.size();
1097 Int_t availableTesting = eventVectorTesting.size();
1098 Int_t availableUndefined = eventVectorUndefined.size();
1101 if (fScaleWithPreselEff) {
1102 presel_scale = eventCounts[cls].cutScaling();
1103 if (presel_scale < 1)
1104 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" you have opted for scaling the number of requested training/testing events\n to be scaled by the preselection efficiency"<<
Endl;
1107 if (eventCounts[cls].cutScaling() < 1)
1108 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" you have opted for interpreting the requested number of training/testing events\n to be the number of events AFTER your preselection cuts" <<
Endl;
1115 if(eventCounts[cls].TrainTestSplitRequested < 1.0 && eventCounts[cls].TrainTestSplitRequested > 0.0){
1116 eventCounts[cls].nTrainingEventsRequested =
Int_t(eventCounts[cls].TrainTestSplitRequested*(availableTraining+availableTesting+availableUndefined));
1117 eventCounts[cls].nTestingEventsRequested =
Int_t(0);
1119 else if(eventCounts[cls].TrainTestSplitRequested != 0.0) Log() << kFATAL <<
Form(
"The option TrainTestSplit_<class> has to be in range (0, 1] but is set to %f.",eventCounts[cls].TrainTestSplitRequested) <<
Endl;
1120 Int_t requestedTraining =
Int_t(eventCounts[cls].nTrainingEventsRequested * presel_scale);
1121 Int_t requestedTesting =
Int_t(eventCounts[cls].nTestingEventsRequested * presel_scale);
1123 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"events in training trees : " << availableTraining <<
Endl;
1124 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"events in testing trees : " << availableTesting <<
Endl;
1125 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"events in unspecified trees : " << availableUndefined <<
Endl;
1126 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"requested for training : " << requestedTraining <<
Endl;;
1129 Log() <<
" ( " << eventCounts[cls].nTrainingEventsRequested
1130 <<
" * " << presel_scale <<
" preselection efficiency)" <<
Endl;
1133 Log() << kDEBUG <<
"requested for testing : " << requestedTesting;
1135 Log() <<
" ( " << eventCounts[cls].nTestingEventsRequested
1136 <<
" * " << presel_scale <<
" preselection efficiency)" <<
Endl;
1187 Int_t useForTesting(0),useForTraining(0);
1188 Int_t allAvailable(availableUndefined + availableTraining + availableTesting);
1190 if( (requestedTraining == 0) && (requestedTesting == 0)){
1194 if ( availableUndefined >=
TMath::Abs(availableTraining - availableTesting) ) {
1196 useForTraining = useForTesting = allAvailable/2;
1199 useForTraining = availableTraining;
1200 useForTesting = availableTesting;
1201 if (availableTraining < availableTesting)
1202 useForTraining += availableUndefined;
1204 useForTesting += availableUndefined;
1206 requestedTraining = useForTraining;
1207 requestedTesting = useForTesting;
1210 else if (requestedTesting == 0){
1212 useForTraining =
TMath::Max(requestedTraining,availableTraining);
1213 if (allAvailable < useForTraining) {
1214 Log() << kFATAL <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"More events requested for training ("
1215 << requestedTraining <<
") than available ("
1216 << allAvailable <<
")!" <<
Endl;
1218 useForTesting = allAvailable - useForTraining;
1219 requestedTesting = useForTesting;
1222 else if (requestedTraining == 0){
1223 useForTesting =
TMath::Max(requestedTesting,availableTesting);
1224 if (allAvailable < useForTesting) {
1225 Log() << kFATAL <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"More events requested for testing ("
1226 << requestedTesting <<
") than available ("
1227 << allAvailable <<
")!" <<
Endl;
1229 useForTraining= allAvailable - useForTesting;
1230 requestedTraining = useForTraining;
1239 Int_t stillNeedForTraining =
TMath::Max(requestedTraining-availableTraining,0);
1240 Int_t stillNeedForTesting =
TMath::Max(requestedTesting-availableTesting,0);
1242 int NFree = availableUndefined - stillNeedForTraining - stillNeedForTesting;
1243 if (NFree <0) NFree = 0;
1244 useForTraining =
TMath::Max(requestedTraining,availableTraining) + NFree/2;
1245 useForTesting= allAvailable - useForTraining;
1248 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"determined event sample size to select training sample from="<<useForTraining<<
Endl;
1249 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"determined event sample size to select test sample from="<<useForTesting<<
Endl;
1254 if( splitMode ==
"ALTERNATE" ){
1255 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"split 'ALTERNATE'" <<
Endl;
1256 Int_t nTraining = availableTraining;
1257 Int_t nTesting = availableTesting;
1258 for( EventVector::iterator it = eventVectorUndefined.begin(), itEnd = eventVectorUndefined.end(); it != itEnd; ){
1260 if( nTraining <= requestedTraining ){
1261 eventVectorTraining.insert( eventVectorTraining.end(), (*it) );
1266 eventVectorTesting.insert( eventVectorTesting.end(), (*it) );
1271 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"split '" << splitMode <<
"'" <<
Endl;
1274 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"availableundefined : " << availableUndefined <<
Endl;
1275 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"useForTraining : " << useForTraining <<
Endl;
1276 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"useForTesting : " << useForTesting <<
Endl;
1277 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"availableTraining : " << availableTraining <<
Endl;
1278 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"availableTesting : " << availableTesting <<
Endl;
1280 if( availableUndefined<(useForTraining-availableTraining) ||
1281 availableUndefined<(useForTesting -availableTesting ) ||
1282 availableUndefined<(useForTraining+useForTesting-availableTraining-availableTesting ) ){
1283 Log() << kFATAL <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"More events requested than available!" <<
Endl;
1287 if (useForTraining>availableTraining){
1288 eventVectorTraining.insert( eventVectorTraining.end() , eventVectorUndefined.begin(), eventVectorUndefined.begin()+ useForTraining- availableTraining );
1289 eventVectorUndefined.erase( eventVectorUndefined.begin(), eventVectorUndefined.begin() + useForTraining- availableTraining);
1291 if (useForTesting>availableTesting){
1292 eventVectorTesting.insert( eventVectorTesting.end() , eventVectorUndefined.begin(), eventVectorUndefined.begin()+ useForTesting- availableTesting );
1295 eventVectorUndefined.clear();
1298 if (splitMode.
Contains(
"RANDOM" )){
1299 UInt_t sizeTraining = eventVectorTraining.size();
1300 if( sizeTraining >
UInt_t(requestedTraining) ){
1301 std::vector<UInt_t> indicesTraining( sizeTraining );
1305 std::shuffle(indicesTraining.begin(), indicesTraining.end(), rndm);
1307 indicesTraining.erase( indicesTraining.begin()+sizeTraining-
UInt_t(requestedTraining), indicesTraining.end() );
1309 for( std::vector<UInt_t>::iterator it = indicesTraining.begin(), itEnd = indicesTraining.end(); it != itEnd; ++it ){
1310 delete eventVectorTraining.at( (*it) );
1311 eventVectorTraining.at( (*it) ) = NULL;
1314 eventVectorTraining.erase( std::remove( eventVectorTraining.begin(), eventVectorTraining.end(), (
void*)NULL ), eventVectorTraining.end() );
1317 UInt_t sizeTesting = eventVectorTesting.size();
1318 if( sizeTesting >
UInt_t(requestedTesting) ){
1319 std::vector<UInt_t> indicesTesting( sizeTesting );
1323 std::shuffle(indicesTesting.begin(), indicesTesting.end(), rndm);
1325 indicesTesting.erase( indicesTesting.begin()+sizeTesting-
UInt_t(requestedTesting), indicesTesting.end() );
1327 for( std::vector<UInt_t>::iterator it = indicesTesting.begin(), itEnd = indicesTesting.end(); it != itEnd; ++it ){
1328 delete eventVectorTesting.at( (*it) );
1329 eventVectorTesting.at( (*it) ) = NULL;
1332 eventVectorTesting.erase( std::remove( eventVectorTesting.begin(), eventVectorTesting.end(), (
void*)NULL ), eventVectorTesting.end() );
1336 if( eventVectorTraining.size() <
UInt_t(requestedTraining) )
1337 Log() << kWARNING <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"DataSetFactory/requested number of training samples larger than size of eventVectorTraining.\n"
1338 <<
"There is probably an issue. Please contact the TMVA developers." <<
Endl;
1339 std::for_each( eventVectorTraining.begin()+requestedTraining, eventVectorTraining.end(), DeleteFunctor<Event>() );
1340 eventVectorTraining.erase(eventVectorTraining.begin()+requestedTraining,eventVectorTraining.end());
1342 if( eventVectorTesting.size() <
UInt_t(requestedTesting) )
1343 Log() << kWARNING <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"DataSetFactory/requested number of testing samples larger than size of eventVectorTesting.\n"
1344 <<
"There is probably an issue. Please contact the TMVA developers." <<
Endl;
1345 std::for_each( eventVectorTesting.begin()+requestedTesting, eventVectorTesting.end(), DeleteFunctor<Event>() );
1346 eventVectorTesting.erase(eventVectorTesting.begin()+requestedTesting,eventVectorTesting.end());
1352 Int_t trainingSize = 0;
1353 Int_t testingSize = 0;
1367 trainingEventVector->reserve( trainingSize );
1368 testingEventVector->reserve( testingSize );
1374 Log() << kDEBUG <<
" MIXING ============= " <<
Endl;
1376 if( mixMode ==
"ALTERNATE" ){
1381 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"Training sample: You are trying to mix events in alternate mode although the classes have different event numbers. This works but the alternation stops at the last event of the smaller class."<<
Endl;
1384 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"Testing sample: You are trying to mix events in alternate mode although the classes have different event numbers. This works but the alternation stops at the last event of the smaller class."<<
Endl;
1387 typedef EventVector::iterator EvtVecIt;
1388 EvtVecIt itEvent, itEventEnd;
1391 Log() << kDEBUG <<
"insert class 0 into training and test vector" <<
Endl;
1393 testingEventVector->insert( testingEventVector->end(), tmpEventVector[
Types::kTesting].at(0).begin(), tmpEventVector[
Types::kTesting].at(0).end() );
1398 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"insert class " << cls <<
Endl;
1400 itTarget = trainingEventVector->begin() - 1;
1402 for( itEvent = tmpEventVector[
Types::kTraining].at(cls).begin(), itEventEnd = tmpEventVector[
Types::kTraining].at(cls).end(); itEvent != itEventEnd; ++itEvent ){
1404 if( (trainingEventVector->end() - itTarget) <
Int_t(cls+1) ) {
1405 itTarget = trainingEventVector->end();
1406 trainingEventVector->insert( itTarget, itEvent, itEventEnd );
1410 trainingEventVector->insert( itTarget, (*itEvent) );
1414 itTarget = testingEventVector->begin() - 1;
1416 for( itEvent = tmpEventVector[
Types::kTesting].at(cls).begin(), itEventEnd = tmpEventVector[
Types::kTesting].at(cls).end(); itEvent != itEventEnd; ++itEvent ){
1418 if( ( testingEventVector->end() - itTarget ) <
Int_t(cls+1) ) {
1419 itTarget = testingEventVector->end();
1420 testingEventVector->insert( itTarget, itEvent, itEventEnd );
1424 testingEventVector->insert( itTarget, (*itEvent) );
1430 trainingEventVector->insert( trainingEventVector->end(), tmpEventVector[
Types::kTraining].at(cls).begin(), tmpEventVector[
Types::kTraining].at(cls).end() );
1431 testingEventVector->insert ( testingEventVector->end(), tmpEventVector[
Types::kTesting].at(cls).begin(), tmpEventVector[
Types::kTesting].at(cls).end() );
1440 if (mixMode ==
"RANDOM") {
1441 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"shuffling events"<<
Endl;
1443 std::shuffle(trainingEventVector->begin(), trainingEventVector->end(), rndm);
1444 std::shuffle(testingEventVector->begin(), testingEventVector->end(), rndm);
1447 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"trainingEventVector " << trainingEventVector->size() <<
Endl;
1448 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"testingEventVector " << testingEventVector->size() <<
Endl;
1459 if (ds->GetNTrainingEvents() < 1){
1460 Log() << kFATAL <<
"Dataset " << std::string(dsi.
GetName()) <<
" does not have any training events, I better stop here and let you fix that one first " <<
Endl;
1463 if (ds->GetNTestEvents() < 1) {
1464 Log() << kERROR <<
"Dataset " << std::string(dsi.
GetName()) <<
" does not have any testing events, guess that will cause problems later..but for now, I continue " <<
Endl;
1467 delete trainingEventVector;
1468 delete testingEventVector;
1492 Int_t trainingSize = 0;
1493 Int_t testingSize = 0;
1501 Double_t trainingSumSignalWeights = 0;
1502 Double_t trainingSumBackgrWeights = 0;
1503 Double_t testingSumSignalWeights = 0;
1504 Double_t testingSumBackgrWeights = 0;
1509 trainingSizePerClass.at(cls) = tmpEventVector[
Types::kTraining].at(cls).size();
1510 testingSizePerClass.at(cls) = tmpEventVector[
Types::kTesting].at(cls).size();
1512 trainingSize += trainingSizePerClass.back();
1513 testingSize += testingSizePerClass.back();
1526 trainingSumWeightsPerClass.at(cls) =
1531 testingSumWeightsPerClass.at(cls) =
1537 trainingSumSignalWeights += trainingSumWeightsPerClass.at(cls);
1538 testingSumSignalWeights += testingSumWeightsPerClass.at(cls);
1540 trainingSumBackgrWeights += trainingSumWeightsPerClass.at(cls);
1541 testingSumBackgrWeights += testingSumWeightsPerClass.at(cls);
1561 if (normMode ==
"NONE") {
1562 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"No weight renormalisation applied: use original global and event weights" <<
Endl;
1568 else if (normMode ==
"NUMEVENTS") {
1570 <<
"\tWeight renormalisation mode: \"NumEvents\": renormalises all event classes " <<
Endl;
1572 <<
" such that the effective (weighted) number of events in each class equals the respective " <<
Endl;
1574 <<
" number of events (entries) that you demanded in PrepareTrainingAndTestTree(\"\",\"nTrain_Signal=.. )" <<
Endl;
1576 <<
" ... i.e. such that Sum[i=1..N_j]{w_i} = N_j, j=0,1,2..." <<
Endl;
1578 <<
" ... (note that N_j is the sum of TRAINING events (nTrain_j...with j=Signal,Background.." <<
Endl;
1580 <<
" ..... Testing events are not renormalised nor included in the renormalisation factor! )"<<
Endl;
1586 renormFactor.at(cls) = ((
Float_t)trainingSizePerClass.at(cls) )/
1587 (trainingSumWeightsPerClass.at(cls)) ;
1590 else if (normMode ==
"EQUALNUMEVENTS") {
1596 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"Weight renormalisation mode: \"EqualNumEvents\": renormalises all event classes ..." <<
Endl;
1597 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" such that the effective (weighted) number of events in each class is the same " <<
Endl;
1598 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" (and equals the number of events (entries) given for class=0 )" <<
Endl;
1599 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"... i.e. such that Sum[i=1..N_j]{w_i} = N_classA, j=classA, classB, ..." <<
Endl;
1600 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"... (note that N_j is the sum of TRAINING events" <<
Endl;
1601 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" ..... Testing events are not renormalised nor included in the renormalisation factor!)" <<
Endl;
1604 UInt_t referenceClass = 0;
1606 renormFactor.at(cls) =
Float_t(trainingSizePerClass.at(referenceClass))/
1607 (trainingSumWeightsPerClass.at(cls));
1611 Log() << kFATAL <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"<PrepareForTrainingAndTesting> Unknown NormMode: " << normMode <<
Endl;
1619 <<
"--> Rescale " << setiosflags(ios::left) << std::setw(maxL)
1621 for (EventVector::iterator it = tmpEventVector[
Types::kTraining].at(cls).begin(),
1622 itEnd = tmpEventVector[
Types::kTraining].at(cls).end(); it != itEnd; ++it){
1623 (*it)->SetWeight ((*it)->GetWeight() * renormFactor.at(cls));
1634 <<
"Number of training and testing events" <<
Endl;
1635 Log() << kDEBUG <<
"\tafter rescaling:" <<
Endl;
1637 <<
"---------------------------------------------------------------------------" <<
Endl;
1639 trainingSumSignalWeights = 0;
1640 trainingSumBackgrWeights = 0;
1641 testingSumSignalWeights = 0;
1642 testingSumBackgrWeights = 0;
1645 trainingSumWeightsPerClass.at(cls) =
1650 testingSumWeightsPerClass.at(cls) =
1656 trainingSumSignalWeights += trainingSumWeightsPerClass.at(cls);
1657 testingSumSignalWeights += testingSumWeightsPerClass.at(cls);
1659 trainingSumBackgrWeights += trainingSumWeightsPerClass.at(cls);
1660 testingSumBackgrWeights += testingSumWeightsPerClass.at(cls);
1666 << setiosflags(ios::left) << std::setw(maxL)
1668 <<
"training events : " << trainingSizePerClass.at(cls) <<
Endl;
1669 Log() << kDEBUG <<
"\t(sum of weights: " << trainingSumWeightsPerClass.at(cls) <<
")"
1670 <<
" - requested were " << eventCounts[cls].nTrainingEventsRequested <<
" events" <<
Endl;
1672 << setiosflags(ios::left) << std::setw(maxL)
1674 <<
"testing events : " << testingSizePerClass.at(cls) <<
Endl;
1675 Log() << kDEBUG <<
"\t(sum of weights: " << testingSumWeightsPerClass.at(cls) <<
")"
1676 <<
" - requested were " << eventCounts[cls].nTestingEventsRequested <<
" events" <<
Endl;
1678 << setiosflags(ios::left) << std::setw(maxL)
1680 <<
"training and testing events: "
1681 << (trainingSizePerClass.at(cls)+testingSizePerClass.at(cls)) <<
Endl;
1682 Log() << kDEBUG <<
"\t(sum of weights: "
1683 << (trainingSumWeightsPerClass.at(cls)+testingSumWeightsPerClass.at(cls)) <<
")" <<
Endl;
1684 if(eventCounts[cls].nEvAfterCut<eventCounts[cls].nEvBeforeCut) {
1685 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) << setiosflags(ios::left) << std::setw(maxL)
1687 <<
"due to the preselection a scaling factor has been applied to the numbers of requested events: "
1688 << eventCounts[cls].cutScaling() <<
Endl;
1691 Log() << kINFO <<
Endl;
TMatrixT< Double_t > TMatrixD
char * Form(const char *fmt,...)
A specialized string object used for TTree selections.
virtual TFile * GetFile() const
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
virtual Bool_t IsOnTerminalBranch() const
TBranch * GetBranch() const
const TCut & GetCut() const
void SetNumber(const UInt_t index)
const TString & GetWeight() const
void SetConfigDescription(const char *d)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
void SetConfigName(const char *n)
virtual void ParseOptions()
options parser
void CheckForUnusedOptions() const
checks for unused options in option string
~DataSetFactory()
destructor
DataSet * BuildInitialDataSet(DataSetInfo &, TMVA::DataInputHandler &)
if no entries, than create a DataSet with one Event which uses dynamic variables (pointers to variabl...
DataSetFactory()
constructor
std::map< Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType
void ChangeToNewTree(TreeInfo &, const DataSetInfo &)
While the data gets copied into the local training and testing trees, the input tree can change (for ...
void BuildEventVector(DataSetInfo &dsi, DataInputHandler &dataInput, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts)
build empty event vectors distributes events between kTraining/kTesting/kMaxTreeType
DataSet * CreateDataSet(DataSetInfo &, DataInputHandler &)
steering the creation of a new dataset
DataSet * MixEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts, const TString &splitMode, const TString &mixMode, const TString &normMode, UInt_t splitSeed)
Select and distribute unassigned events to kTraining and kTesting.
std::vector< int > NumberPerClass
std::vector< EventVector > EventVectorOfClasses
void InitOptions(DataSetInfo &dsi, EvtStatsPerClass &eventsmap, TString &normMode, UInt_t &splitSeed, TString &splitMode, TString &mixMode)
the dataset splitting
void CalcMinMax(DataSet *, DataSetInfo &dsi)
compute covariance matrix
std::vector< Double_t > ValuePerClass
DataSet * BuildDynamicDataSet(DataSetInfo &)
std::vector< EventStats > EvtStatsPerClass
Bool_t CheckTTreeFormula(TTreeFormula *ttf, const TString &expression, Bool_t &hasDollar)
checks a TTreeFormula for problems
void RenormEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, const EvtStatsPerClass &eventCounts, const TString &normMode)
renormalisation of the TRAINING event weights
TMatrixD * CalcCorrelationMatrix(DataSet *, const UInt_t classNumber)
computes correlation matrix for variables "theVars" in tree; "theType" defines the required event "ty...
TMatrixD * CalcCovarianceMatrix(DataSet *, const UInt_t classNumber)
compute covariance matrix
std::vector< Event * > EventVector
Class that contains all the data information.
std::vector< VariableInfo > & GetVariableInfos()
UInt_t GetNVariables() const
UInt_t GetNSpectators(bool all=kTRUE) const
Int_t GetVarArraySize(const TString &expression) const
ClassInfo * AddClass(const TString &className)
virtual const char * GetName() const
Returns name of object.
Bool_t IsVariableFromArray(Int_t i) const
std::vector< VariableInfo > & GetSpectatorInfos()
void SetNormalization(const TString &norm)
UInt_t GetNClasses() const
const TString & GetSplitOptions() const
UInt_t GetNTargets() const
void SetTestingSumSignalWeights(Double_t testingSumSignalWeights)
UInt_t GetSignalClassIndex()
void SetTrainingSumSignalWeights(Double_t trainingSumSignalWeights)
ClassInfo * GetClassInfo(Int_t clNum) const
void SetTestingSumBackgrWeights(Double_t testingSumBackgrWeights)
Int_t GetClassNameMaxLength() const
void PrintCorrelationMatrix(const TString &className)
calculates the correlation matrices for signal and background, prints them to standard output,...
VariableInfo & GetVariableInfo(Int_t i)
void SetTrainingSumBackgrWeights(Double_t trainingSumBackgrWeights)
VariableInfo & GetTargetInfo(Int_t i)
VariableInfo & GetSpectatorInfo(Int_t i)
void SetCorrelationMatrix(const TString &className, TMatrixD *matrix)
Class that contains all the data information.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Float_t GetTarget(UInt_t itgt) const
ostringstream derivative to redirect and format output
Types::ETreeType GetTreeType() const
const TString & GetClassName() const
Double_t GetWeight() const
const TString & GetExpression() const
const TString & GetInternalName() const
virtual const char * GetTitle() const
Returns title of object.
virtual const char * GetName() const
Returns name of object.
virtual const char * ClassName() const
Returns name of class to which the object belongs.
const char * Data() const
void ToUpper()
Change string to upper case.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
A TTree represents a columnar dataset.
TFile * GetCurrentFile() const
Return pointer to the current file.
TDirectory * GetDirectory() const
virtual Long64_t GetEntries() const
virtual TTree * GetTree() const
virtual Long64_t LoadTree(Long64_t entry)
Set current entry.
virtual Int_t GetEntry(Long64_t entry=0, Int_t getall=0)
Read all branches of entry and return total number of bytes read.
virtual void ResetBranchAddresses()
Tell all of our branches to drop their current objects and allocate new ones.
virtual void SetBranchStatus(const char *bname, Bool_t status=1, UInt_t *found=0)
Set branch status to Process or DoNotProcess.
create variable transformations
Int_t LargestCommonDivider(Int_t a, Int_t b)
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Int_t Finite(Double_t x)
Check if it is finite with a mask in order to be consistent in presence of fast math.