88 fDataSetManager (
NULL ),
90 fTransformations ( "
I" ),
92 fDataAssignType ( kAssignEvents ),
94 fMakeFoldDataSet ( kFALSE )
98 fLogger->SetSource(
"DataLoader");
107 std::vector<TMVA::VariableTransformBase*>::iterator trfIt =
fDefaultTrfs.begin();
108 for (;trfIt !=
fDefaultTrfs.end(); trfIt++)
delete (*trfIt);
137 if (dsi!=0)
return *dsi;
157 if (trafoDefinition.
Contains(
"(")) {
161 Ssiz_t parLen = trafoDefinition.
Index(
")", parStart )-parStart+1;
163 trName = trafoDefinition(0,parStart);
164 trOptions = trafoDefinition(parStart,parLen);
165 trOptions.
Remove(parLen-1,1);
169 trName = trafoDefinition;
173 if (trName ==
"VT") {
178 Log() << kFATAL <<
" VT transformation must be passed a floating threshold value" <<
Endl;
183 threshold = trOptions.
Atof();
186 return transformedLoader;
189 Log() << kFATAL <<
"Incorrect transformation string provided, please check" <<
Endl;
191 Log() << kINFO <<
"No transformation applied, returning original loader" <<
Endl;
214 for (
UInt_t ivar=0; ivar<vars.size(); ivar++) {
215 TString vname = vars[ivar].GetExpression();
219 for (
UInt_t itgt=0; itgt<tgts.size(); itgt++) {
220 TString vname = tgts[itgt].GetExpression();
224 for (
UInt_t ispc=0; ispc<spec.size(); ispc++) {
225 TString vname = spec[ispc].GetExpression();
226 assignTree->
Branch( vname, &
fATreeEvent[vars.size()+tgts.size()+ispc], vname +
"/F" );
284 const std::vector<Double_t>& event,
Double_t weight )
328 for(
UInt_t i=0; i<size; i++) {
349 Log() << kFATAL <<
"<AddTree> cannot interpret tree type: \"" << treetype
350 <<
"\" should be \"Training\" or \"Test\" or \"Training and Testing\"" <<
Endl;
352 AddTree( tree, className, weight, cut, tt );
361 Log() << kFATAL <<
"Tree does not exist (empty pointer)." <<
Endl;
369 Log() << kINFO<<
"Add Tree " << tree->
GetName() <<
" of type " << className
379 AddTree( signal,
"Signal", weight,
TCut(
""), treetype );
388 TTree* signalTree =
new TTree(
"TreeS",
"Tree (S)" );
391 Log() << kINFO <<
"Create TTree objects from ASCII input files ... \n- Signal file : \"" 395 AddTree( signalTree,
"Signal", weight,
TCut(
""), treetype );
402 AddTree( signal,
"Signal", weight,
TCut(
""), treetype );
410 AddTree( signal,
"Background", weight,
TCut(
""), treetype );
419 TTree* bkgTree =
new TTree(
"TreeB",
"Tree (B)" );
422 Log() << kINFO <<
"Create TTree objects from ASCII input files ... \n- Background file : \"" 426 AddTree( bkgTree,
"Background", weight,
TCut(
""), treetype );
433 AddTree( signal,
"Background", weight,
TCut(
""), treetype );
440 AddTree( tree,
"Signal", weight );
447 AddTree( tree,
"Background", weight );
540 for (std::vector<TString>::iterator it=theVariables->begin();
608 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.
Data()) );
622 Ntrain, Ntrain, Ntest, Ntest) );
647 AddCut( sigcut,
"Signal" );
648 AddCut( bkgcut,
"Background" );
662 Log() <<
kInfo <<
"Splitting in k-folds has been already done" <<
Endl;
672 std::vector<Event*> TrainSigData;
673 std::vector<Event*> TrainBkgData;
674 std::vector<Event*> TestSigData;
675 std::vector<Event*> TestBkgData;
678 for(
UInt_t i=0; i<TrainingData.size(); ++i){
679 if( strncmp(
DefaultDataSetInfo().GetClassInfo( TrainingData.at(i)->GetClass() )->
GetName(),
"Signal", 6) == 0){ TrainSigData.push_back(TrainingData.at(i)); }
680 else if( strncmp(
DefaultDataSetInfo().GetClassInfo( TrainingData.at(i)->GetClass() )->
GetName(),
"Background", 10) == 0){ TrainBkgData.push_back(TrainingData.at(i)); }
682 Log() << kFATAL <<
"DataSets should only contain Signal and Background classes for classification, " <<
DefaultDataSetInfo().
GetClassInfo( TrainingData.at(i)->GetClass() )->
GetName() <<
" is not a recognised class" <<
Endl;
686 for(
UInt_t i=0; i<TestingData.size(); ++i){
687 if( strncmp(
DefaultDataSetInfo().GetClassInfo( TestingData.at(i)->GetClass() )->
GetName(),
"Signal", 6) == 0){ TestSigData.push_back(TestingData.at(i)); }
688 else if( strncmp(
DefaultDataSetInfo().GetClassInfo( TestingData.at(i)->GetClass() )->
GetName(),
"Background", 10) == 0){ TestBkgData.push_back(TestingData.at(i)); }
690 Log() << kFATAL <<
"DataSets should only contain Signal and Background classes for classification, " <<
DefaultDataSetInfo().
GetClassInfo( TestingData.at(i)->GetClass() )->
GetName() <<
" is not a recognised class" <<
Endl;
697 std::vector<std::vector<Event*>> tempSigEvents =
SplitSets(TrainSigData,0,2);
698 std::vector<std::vector<Event*>> tempBkgEvents =
SplitSets(TrainBkgData,0,2);
720 std::vector<Event*>* tempTrain =
new std::vector<Event*>;
721 std::vector<Event*>* tempTest =
new std::vector<Event*>;
727 for(
UInt_t i=0; i<numFolds; ++i){
761 tempTrain->reserve(nTrain);
762 tempTest->reserve(nTest);
765 for(
UInt_t j=0; j<numFolds; ++j){
813 std::vector<std::vector<Event*>> tempSets;
814 tempSets.resize(numFolds);
822 if(inSet == foldSize*numFolds){
828 if(tempSets.at(s).size()<foldSize){
829 tempSets.at(s).push_back(oldSet.at(i));
858 des->
AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
863 des->
AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
874 "CorrelationMatrix"+className,
"Correlation Matrix ("+className+
")");
DataSetInfo * GetDataSetInfo(const TString &dsiName)
returns datasetinfo object for given name
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
virtual const char * GetName() const
Returns name of object.
DataSetManager * fDataSetManager
Random number generator class based on M.
MsgLogger & Endl(MsgLogger &ml)
VariableInfo & AddTarget(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis ...
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
std::vector< VariableInfo > & GetSpectatorInfos()
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
void SetCut(const TCut &cut, const TString &className)
set the cut for the classes
std::vector< std::vector< TMVA::Event * > > fTrainBkgEvents
DataSetInfo & GetDataSetInfo()
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
DataSetInfo & DefaultDataSetInfo()
default creation
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
void ToLower()
Change string to lower-case.
void MakeKFoldDataSet(UInt_t numberFolds, bool validationSet=false)
Function required to split the training and testing datasets into a number of folds.
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
void AddCut(const TCut &cut, const TString &className)
set the cut for the classes
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
DataInputHandler * fDataInputHandler
Types::EAnalysisType fAnalysisType
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
TH2 * GetCorrelationMatrix(const TString &className)
returns the correlation matrix of datasets
std::vector< std::vector< TMVA::Event * > > fTestBkgEvents
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
VariableInfo & AddSpectator(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, char type='F', Bool_t normalized=kTRUE, void *external=0)
add a spectator (can be a complex expression) to the set of spectator variables used in the MV analys...
Class that contains all the information of a class.
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
virtual UInt_t Integer(UInt_t imax)
Returns a random integer on [ 0, imax-1 ].
void SetTree(TTree *tree, const TString &className, Double_t weight)
set background tree
void PrepareFoldDataSet(UInt_t foldNumber, Types::ETreeType tt)
Function for assigning the correct folds to the testing or training set.
Class that contains all the data information.
void SetInputVariables(std::vector< TString > *theVariables)
fill input variables in data set
DataSetInfo & AddDataSet(DataSetInfo &)
std::vector< VariableInfo > & GetTargetInfos()
void AddCut(const TString &cut, const TString &className="")
A specialized string object used for TTree selections.
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
void SetSplitOptions(const TString &so)
DataInputHandler & DataInput()
Service class for 2-Dim histogram classes.
ClassInfo * GetClassInfo(Int_t clNum) const
const TMatrixD * CorrelationMatrix(const TString &className) const
void SetWeightExpression(const TString &exp, const TString &className="")
set the weight expressions for the classes if class name is specified, set only for this class if cla...
DataSetInfo & AddDataSetInfo(DataSetInfo &dsi)
stores a copy of the dataset info object
char * Form(const char *fmt,...)
std::vector< TTree * > fTestAssignTree
Bool_t UserAssignEvents(UInt_t clIndex)
std::vector< Float_t > fATreeEvent
std::vector< std::vector< TMVA::Event * > > fTestSigEvents
void PrintClasses() const
DataLoader * MakeCopy(TString name)
Copy method use in VI and CV.
TString & Remove(Ssiz_t pos)
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators ...
Class that contains all the data information.
void SetBackgroundWeightExpression(const TString &variable)
unsigned long long ULong64_t
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter=' ')
Create or simply read branches from filename.
static void DestroyInstance()
static function: destroy TMVA instance
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
void SetWeightExpression(const TString &variable, const TString &className="")
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
void SetEventCollection(std::vector< Event *> *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
ClassInfo * AddClass(const TString &className)
void SetSignalWeightExpression(const TString &variable)
virtual Long64_t GetEntries() const
std::vector< std::vector< TMVA::Event * > > fTrainSigEvents
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Abstract ClassifierFactory template that handles arbitrary types.
std::vector< TTree * > fTrainAssignTree
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
std::vector< std::vector< TMVA::Event * > > fValidBkgEvents
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
friend void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
void SetSignalTree(TTree *signal, Double_t weight=1.0)
VariableInfo & AddVariable(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0, char varType='F', Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis ...
Double_t Atof() const
Return floating-point value contained in string.
A TTree object has a header with a name and a title.
std::vector< std::vector< TMVA::Event * > > SplitSets(std::vector< TMVA::Event *> &oldSet, int seedNum, int numFolds)
Splits the input vector in to equally sized randomly sampled folds.
std::vector< std::vector< TMVA::Event * > > fValidSigEvents
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
DataSet * GetDataSet() const
returns data set
std::vector< VariableInfo > & GetVariableInfos()
void SetCut(const TString &cut, const TString &className="")
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
const char * Data() const