58 :
TNamed(dsi.GetName(),
"DataSet"),
63 fHasNegativeEventWeights(
kFALSE),
80 for (
Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
95 fHasNegativeEventWeights(
kFALSE),
112 for (
Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
129 fBlockBelongToTraining.clear();
131 for (std::vector< std::map< TString, Results* > >::iterator it = fResults.begin(); it != fResults.end(); ++it) {
132 for (std::map< TString, Results* >::iterator itMap = (*it).begin(); itMap != (*it).end(); ++itMap) {
133 delete itMap->second;
138 if (fSamplingRandom != 0 )
delete fSamplingRandom;
152 if (fClassEvents.size()<(
UInt_t)(
type+1)) fClassEvents.resize(
type+1 );
153 if (fClassEvents.at(
type ).size() < classNumber+1) fClassEvents.at(
type ).resize( classNumber+1 );
154 fClassEvents.at(
type ).at( classNumber ) += 1;
161 if (fClassEvents.size()<(
UInt_t)(
type+1)) fClassEvents.resize(
type+1 );
162 fClassEvents.at(
type ).clear();
170 return fClassEvents.at(
type).at(classNumber);
172 catch (std::out_of_range &) {
173 ClassInfo* ci = fdsi->GetClassInfo( classNumber );
174 Log() << kFATAL <<
Form(
"Dataset[%s] : ",fdsi->GetName()) <<
"No " << (
type==0?
"training":(
type==1?
"testing":
"_unknown_type_"))
175 <<
" events for class " << (ci==NULL?
"_no_name_known_":ci->
GetName()) <<
" (index # "<<classNumber<<
")"
176 <<
" available. Check if all class names are spelled correctly and if events are"
177 <<
" passing the selection cuts." <<
Endl;
180 Log() << kFATAL <<
Form(
"Dataset[%s] : ",fdsi->GetName()) <<
"ERROR/CAUGHT : DataSet/GetNClassEvents, .. unknown error" <<
Endl;
191 if (i>=fEventCollection.size() || fEventCollection[i].size()==0)
return;
194 for (
UInt_t j=0; j<fEventCollection[i].size(); j++)
delete fEventCollection[i][j];
196 fEventCollection[i].clear();
203 if (fSampling.size() >
UInt_t(fCurrentTreeIdx) && fSampling.at(fCurrentTreeIdx)) {
204 Long64_t iEvt = fSamplingSelected.at(fCurrentTreeIdx).at( fCurrentEventIdx ).second;
205 return ((fEventCollection.at(fCurrentTreeIdx))).at(iEvt);
208 return ((fEventCollection.at(fCurrentTreeIdx))).at(fCurrentEventIdx);
217 return fdsi->GetNVariables();
225 return fdsi->GetNTargets();
233 return fdsi->GetNSpectators();
242 fEventCollection.at(
Int_t(
type)).push_back(ev);
251 DestroyCollection(
type,deleteEvents);
254 ClearNClassEvents(
type );
256 fEventCollection.at(t) = *events;
257 for (std::vector<Event*>::iterator it = fEventCollection.at(t).begin(); it < fEventCollection.at(t).end(); ++it) {
258 IncrementNClassEvents( t, (*it)->GetClass() );
269 if (t<fResults.size()) {
270 const std::map< TString, Results* >& resultsForType = fResults[t];
271 std::map< TString, Results* >::const_iterator it = resultsForType.find(resultsName);
272 if (it!=resultsForType.end()) {
278 fResults.resize(t+1);
284 switch(analysistype) {
304 fResults[t][resultsName] = newresults;
319 if (fResults.empty())
return;
322 Log()<<kFATAL<<
Form(
"Dataset[%s] : ",fdsi->GetName()) <<
"you asked for an Treetype (training/testing/...)"
323 <<
" whose index " <<
type <<
" does not exist " <<
Endl;
325 std::map< TString, Results* >& resultsForType = fResults[
UInt_t(
type)];
326 std::map< TString, Results* >::iterator it = resultsForType.find(resultsName);
327 if (it!=resultsForType.end()) {
328 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",fdsi->GetName()) <<
" Delete Results previous existing result:" << resultsName
331 resultsForType.erase(it->first);
334 Log() << kINFO <<
Form(
"Dataset[%s] : ",fdsi->GetName()) <<
"could not fine Result class of " << resultsName
335 <<
" of type " <<
type <<
" which I should have deleted" <<
Endl;
345 if (fResults.empty())
return;
348 Log()<<kFATAL<<
Form(
"Dataset[%s] : ",fdsi->GetName()) <<
"you asked for an Treetype (training/testing/...)"
349 <<
" whose index " <<
type <<
" does not exist " <<
Endl;
352 std::map<TString, Results *> & resultsForType = fResults[
UInt_t(
type)];
354 for (
auto && it : resultsForType) {
355 auto & resultsName = it.first;
357 Log() << kDEBUG <<
Form(
"Dataset[%s] : ", fdsi->GetName())
358 <<
" DeleteAllResults previous existing result: "
359 << resultsName <<
" of type " <<
type <<
Endl;
364 resultsForType.clear();
374 if (fBlockBelongToTraining.size() == blockNum)
return;
376 if (fBlockBelongToTraining.size() == 1) {
377 if (fEventCollection[tOrg].size() == 0)
378 fEventCollection[tOrg].resize(fEventCollection[tTrn].size());
379 fEventCollection[tOrg].clear();
380 for (
UInt_t i=0; i<fEventCollection[tTrn].size(); i++)
381 fEventCollection[tOrg].push_back(fEventCollection[tTrn][i]);
382 fClassEvents[tOrg] = fClassEvents[tTrn];
385 fBlockBelongToTraining.clear();
386 for (
UInt_t i=0 ; i < blockNum ; i++) fBlockBelongToTraining.push_back(
kTRUE);
388 ApplyTrainingSetDivision();
397 fEventCollection[tTrn].clear();
398 if (fEventCollection[tVld].size()==0)
399 fEventCollection[tVld].resize(fEventCollection[tOrg].size());
400 fEventCollection[tVld].clear();
403 for (
UInt_t i=0; i<fEventCollection[tOrg].size(); i++) {
404 if (fBlockBelongToTraining[i % fBlockBelongToTraining.size()])
405 fEventCollection[tTrn].push_back(fEventCollection[tOrg][i]);
407 fEventCollection[tVld].push_back(fEventCollection[tOrg][i]);
417 fBlockBelongToTraining[blockInd]=
kFALSE;
419 fBlockBelongToTraining[blockInd]=
kTRUE;
420 if (applyChanges) ApplyTrainingSetDivision();
428 return GetNClassEvents(
Types::kTesting, fdsi->GetClassInfo(
"Signal")->GetNumber() );
436 return GetNClassEvents(
Types::kTesting, fdsi->GetClassInfo(
"Background")->GetNumber() );
444 return GetNClassEvents(
Types::kTraining, fdsi->GetClassInfo(
"Signal")->GetNumber() );
452 return GetNClassEvents(
Types::kTraining, fdsi->GetClassInfo(
"Background")->GetNumber() );
461 if (fSamplingRandom == 0 ) fSamplingRandom =
new TRandom3( seed );
464 std::vector< std::pair< Float_t, Long64_t >* > evtList;
466 Int_t treeIdx = TreeIndex( GetCurrentType() );
468 if (fSamplingEventList.size() <
UInt_t(treeIdx+1) ) fSamplingEventList.resize(treeIdx+1);
469 if (fSamplingSelected.size() <
UInt_t(treeIdx+1) ) fSamplingSelected.resize(treeIdx+1);
471 fSamplingEventList.at(treeIdx).clear();
472 fSamplingSelected.at(treeIdx).clear();
474 if (fSampling.size() <
UInt_t(treeIdx+1) ) fSampling.resize(treeIdx+1);
475 if (fSamplingNEvents.size() <
UInt_t(treeIdx+1) ) fSamplingNEvents.resize(treeIdx+1);
476 if (fSamplingWeight.size() <
UInt_t(treeIdx+1) ) fSamplingWeight.resize(treeIdx+1);
478 if (fraction > 0.999999 || fraction < 0.0000001) {
479 fSampling.at( treeIdx ) =
false;
480 fSamplingNEvents.at( treeIdx ) = 0;
481 fSamplingWeight.at( treeIdx ) = 1.0;
486 fSampling.at( treeIdx ) =
false;
488 fSamplingNEvents.at( treeIdx ) =
Int_t(fraction*GetNEvents());
489 fSamplingWeight.at( treeIdx ) = weight;
492 fSamplingEventList.at( treeIdx ).reserve( nEvts );
493 fSamplingSelected.at( treeIdx ).reserve( fSamplingNEvents.at(treeIdx) );
494 for (
Long64_t ievt=0; ievt<nEvts; ievt++) {
495 std::pair<Float_t,Long64_t> p(1.0,ievt);
496 fSamplingEventList.at( treeIdx ).push_back( p );
500 fSampling.at( treeIdx ) =
true;
509 Int_t treeIdx = TreeIndex( GetCurrentType() );
511 if (!fSampling.at(treeIdx) )
return;
513 if (fSamplingRandom == 0 )
514 Log() << kFATAL<<
Form(
"Dataset[%s] : ",fdsi->GetName())
515 <<
"no random generator present for creating a random/importance sampling (initialized?)" <<
Endl;
518 fSamplingSelected.at(treeIdx).clear();
521 std::vector< std::pair< Float_t, Long64_t > > evtList;
522 std::vector< std::pair< Float_t, Long64_t > >::iterator evtListIt;
528 evtList.assign( fSamplingEventList.at(treeIdx).begin(), fSamplingEventList.at(treeIdx).end() );
531 for (evtListIt = evtList.begin(); evtListIt != evtList.end(); ++evtListIt) {
532 sumWeights += (*evtListIt).first;
534 evtListIt = evtList.begin();
537 std::vector< Float_t > rnds;
538 rnds.reserve(fSamplingNEvents.at(treeIdx));
541 for (
Int_t i = 0; i < fSamplingNEvents.at(treeIdx); i++) {
542 pos = fSamplingRandom->Rndm()*sumWeights;
543 rnds.push_back( pos );
547 std::sort(rnds.begin(),rnds.end());
550 std::vector< Float_t >::iterator rndsIt = rnds.begin();
551 Float_t runningSum = 0.000000001;
552 for (evtListIt = evtList.begin(); evtListIt != evtList.end();) {
553 runningSum += (*evtListIt).first;
554 if (runningSum >= (*rndsIt)) {
555 fSamplingSelected.at(treeIdx).push_back( (*evtListIt) );
556 evtListIt = evtList.erase( evtListIt );
559 if (rndsIt == rnds.end() )
break;
574 if (!fSampling.at(fCurrentTreeIdx))
return;
575 if (fSamplingWeight.at(fCurrentTreeIdx) > 0.99999999999)
return;
578 Long64_t stop = fSamplingEventList.at(fCurrentTreeIdx).size() -1;
579 if (evtNumber >= 0) {
583 for (
Long64_t iEvt = start; iEvt <= stop; iEvt++ ){
584 if (
Long64_t(fSamplingEventList.at(fCurrentTreeIdx).size()) < iEvt) {
585 Log() << kWARNING <<
Form(
"Dataset[%s] : ",fdsi->GetName()) <<
"event number (" << iEvt
586 <<
") larger than number of sampled events ("
587 << fSamplingEventList.at(fCurrentTreeIdx).size() <<
" of tree " << fCurrentTreeIdx <<
")" <<
Endl;
590 Float_t weight = fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first;
593 weight /= fSamplingWeight.at(fCurrentTreeIdx);
594 if (weight > 1.0 ) weight = 1.0;
598 weight *= fSamplingWeight.at(fCurrentTreeIdx);
600 fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first = weight;
618 SetCurrentType(
type);
620 if (fResults.size() <= t) {
621 Log() << kWARNING <<
Form(
"Dataset[%s] : ",fdsi->GetName()) <<
"No results for treetype " << (
type==
Types::kTraining ?
"training" :
"testing" )
622 <<
" found. Size=" << fResults.size() <<
Endl;
642 for(
UInt_t i=0; i<fResults.at(t).size(); i++ )
643 metVals[i] =
new Float_t[fdsi->GetNTargets()+fdsi->GetNClasses()];
646 tree->Branch(
"classID", &cls,
"classID/I" );
647 tree->Branch(
"className", className,
"className/C" );
651 Int_t ivar_array = 0;
652 Int_t arraySize = -1;
653 for (std::vector<VariableInfo>::const_iterator itVars = fdsi->GetVariableInfos().begin();
654 itVars != fdsi->GetVariableInfos().end(); ++itVars) {
658 tree->Branch( (*itVars).GetInternalName(), &varVals[
n], (*itVars).GetInternalName()+
TString(
"/F") );
661 if (ivar_array == 0) {
663 name.ReplaceAll(
"[0]",
"");
664 arraySize = fdsi->GetVarArraySize((*itVars).GetExpression());
666 Log() << kDEBUG <<
"creating branch for array " <<
name <<
" with size " << arraySize <<
Endl;
669 if (ivar_array == arraySize)
676 for (std::vector<VariableInfo>::const_iterator itTgts = fdsi->GetTargetInfos().begin();
677 itTgts != fdsi->GetTargetInfos().end(); ++itTgts) {
679 tree->Branch( (*itTgts).GetInternalName(), &tgtVals[
n], (*itTgts).GetInternalName()+
TString(
"/F") );
684 for (std::vector<VariableInfo>::const_iterator itVis = fdsi->GetSpectatorInfos().begin();
685 itVis != fdsi->GetSpectatorInfos().end(); ++itVis) {
687 tree->Branch( (*itVis).GetInternalName(), &visVals[
n], (*itVis).GetInternalName()+
TString(
"/F") );
691 tree->Branch(
"weight", &weight,
"weight/F" );
695 for (std::map< TString, Results* >::iterator itMethod = fResults.at(t).begin();
696 itMethod != fResults.at(t).end(); ++itMethod) {
699 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",fdsi->GetName()) <<
"analysis type: " << (itMethod->second->GetAnalysisType()==
Types::kRegression ?
"Regression" :
704 tree->Branch( itMethod->first, &(metVals[
n][0]), itMethod->first +
"/F" );
709 for (
UInt_t iCls = 0; iCls < fdsi->GetNClasses(); iCls++) {
710 if (iCls > 0) leafList.
Append(
":" );
711 leafList.
Append( fdsi->GetClassInfo( iCls )->GetName() );
714 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",fdsi->GetName()) <<
"itMethod->first " << itMethod->first <<
" LEAFLIST: "
715 << leafList <<
" itMethod->second " << itMethod->second <<
Endl;
716 tree->Branch( itMethod->first, (metVals[
n]), leafList );
721 for (
UInt_t iTgt = 0; iTgt < fdsi->GetNTargets(); iTgt++) {
722 if (iTgt > 0) leafList.
Append(
":" );
723 leafList.
Append( fdsi->GetTargetInfo( iTgt ).GetInternalName() );
727 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",fdsi->GetName()) <<
"itMethod->first " << itMethod->first <<
" LEAFLIST: "
728 << leafList <<
" itMethod->second " << itMethod->second <<
Endl;
729 tree->Branch( itMethod->first, (metVals[
n]), leafList );
732 Log() << kWARNING <<
Form(
"Dataset[%s] : ",fdsi->GetName()) <<
"Unknown analysis type for result found when writing TestTree." <<
Endl;
739 for (
auto && itMethod : fResults.at(t)) {
740 auto numEvents = GetNEvents(
type);
741 auto results = itMethod.second;
742 auto resultsName = itMethod.first;
745 auto analysisType = results->GetAnalysisType();
753 Log() << kFATAL <<
"Unexpected analysisType." <<
Endl;
756 if (numEventsResults != numEvents) {
757 Log() << kFATAL <<
"An error occurred in DataSet::GetTree. "
758 "Inconsistent size of result for result with name '"
759 << resultsName <<
"'."
760 <<
" Size is '" << std::to_string(numEventsResults)
762 <<
" Expected '" << numEvents <<
"'." <<
Endl;
767 for (
Long64_t iEvt = 0; iEvt < GetNEvents(
type ); iEvt++) {
769 const Event* ev = GetEvent( iEvt );
773 strlcpy(className, fdsi->GetClassInfo( cls )->GetName(),
sizeof(className));
783 for (
auto && itMethod : fResults.at(t)) {
784 auto & results = *itMethod.second;
785 auto analysisType = results.GetAnalysisType();
787 auto const & vals = results[iEvt];
790 metVals[iMethod][0] = vals[0];
792 for (
UInt_t nCls = 0; nCls < fdsi->GetNClasses(); nCls++) {
794 metVals[iMethod][nCls] = val;
797 for (
UInt_t nTgts = 0; nTgts < fdsi->GetNTargets(); nTgts++) {
799 metVals[iMethod][nTgts] = val;
809 <<
"Created tree '" <<
tree->GetName() <<
"' with " <<
tree->GetEntries() <<
" events" <<
Endl <<
Endl;
811 SetCurrentType(savedType);
817 for(
UInt_t i=0; i<fResults.at(t).size(); i++ )
char * Form(const char *fmt,...)
Class that contains all the information of a class.
Class that contains all the data information.
void DivideTrainingSet(UInt_t blockNum)
divide training set
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
std::vector< Char_t > fSampling
std::vector< Float_t > fSamplingWeight
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
void ClearNClassEvents(Int_t type)
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
void SetEventCollection(std::vector< Event * > *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets,...
const Event * GetEvent() const
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
std::vector< Char_t > fBlockBelongToTraining
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
std::vector< Int_t > fSamplingNEvents
virtual ~DataSet()
destructor
std::vector< std::vector< Long64_t > > fClassEvents
void DeleteAllResults(Types::ETreeType type, Types::EAnalysisType analysistype)
Deletes all results currently in the dataset.
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particular Method instance.
void CreateSampling() const
create an event sampling (random or importance sampling)
TRandom3 * fSamplingRandom
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
void ApplyTrainingSetDivision()
apply division of data set
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
UInt_t GetNSpectators() const
accessor to the number of spectators
UInt_t GetNVariables() const
accessor to the number of variables
UInt_t GetNTargets() const
accessor to the number of targets
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Float_t GetTarget(UInt_t itgt) const
ostringstream derivative to redirect and format output
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Class that is the base-class for a vector of result.
void SetTreeType(Types::ETreeType type)
The TNamed class is the base class for all named ROOT classes.
virtual const char * GetName() const
Returns name of object.
Random number generator class based on M.
TString & Append(const char *cs)
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
A TTree represents a columnar dataset.
MsgLogger & Endl(MsgLogger &ml)
#define dest(otri, vertexptr)