83 fDataSetManager (
NULL ),
85 fTransformations ( "
I" ),
87 fDataAssignType ( kAssignEvents ),
89 fMakeFoldDataSet ( kFALSE )
93 fLogger->SetSource(
"DataLoader");
102 std::vector<TMVA::VariableTransformBase*>::iterator trfIt =
fDefaultTrfs.begin();
103 for (;trfIt !=
fDefaultTrfs.end(); trfIt++)
delete (*trfIt);
130 if (dsi!=0)
return *dsi;
149 if (trafoDefinition.
Contains(
"(")) {
153 Ssiz_t parLen = trafoDefinition.
Index(
")", parStart )-parStart+1;
155 trName = trafoDefinition(0,parStart);
156 trOptions = trafoDefinition(parStart,parLen);
157 trOptions.
Remove(parLen-1,1);
161 trName = trafoDefinition;
165 if (trName ==
"VT") {
170 Log() <<
kFATAL <<
" VT transformation must be passed a floating threshold value" <<
Endl;
174 threshold = trOptions.
Atof();
176 return transformedLoader;
179 Log() <<
kFATAL <<
"Incorrect transformation string provided, please check" <<
Endl;
181 Log() <<
kINFO <<
"No transformation applied, returning original loader" <<
Endl;
203 for (
UInt_t ivar=0; ivar<vars.size(); ivar++) {
204 TString vname = vars[ivar].GetExpression();
208 for (
UInt_t itgt=0; itgt<tgts.size(); itgt++) {
209 TString vname = tgts[itgt].GetExpression();
213 for (
UInt_t ispc=0; ispc<spec.size(); ispc++) {
214 TString vname = spec[ispc].GetExpression();
215 assignTree->
Branch( vname, &
fATreeEvent[vars.size()+tgts.size()+ispc], vname +
"/F" );
264 const std::vector<Double_t>& event,
Double_t weight )
308 for(
UInt_t i=0; i<size; i++) {
328 Log() <<
kFATAL <<
"<AddTree> cannot interpret tree type: \"" << treetype
329 <<
"\" should be \"Training\" or \"Test\" or \"Training and Testing\"" <<
Endl;
331 AddTree( tree, className, weight, cut, tt );
339 Log() <<
kFATAL <<
"Tree does not exist (empty pointer)." <<
Endl;
347 Log() <<
kINFO<<
"Add Tree " << tree->
GetName() <<
" of type " << className
356 AddTree( signal,
"Signal", weight,
TCut(
""), treetype );
365 TTree* signalTree =
new TTree(
"TreeS",
"Tree (S)" );
368 Log() <<
kINFO <<
"Create TTree objects from ASCII input files ... \n- Signal file : \"" 372 AddTree( signalTree,
"Signal", weight,
TCut(
""), treetype );
378 AddTree( signal,
"Signal", weight,
TCut(
""), treetype );
385 AddTree( signal,
"Background", weight,
TCut(
""), treetype );
393 TTree* bkgTree =
new TTree(
"TreeB",
"Tree (B)" );
396 Log() <<
kINFO <<
"Create TTree objects from ASCII input files ... \n- Background file : \"" 400 AddTree( bkgTree,
"Background", weight,
TCut(
""), treetype );
406 AddTree( signal,
"Background", weight,
TCut(
""), treetype );
412 AddTree( tree,
"Signal", weight );
418 AddTree( tree,
"Background", weight );
502 for (std::vector<TString>::iterator it=theVariables->begin();
563 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.
Data()) );
576 Ntrain, Ntrain, Ntest, Ntest) );
600 AddCut( sigcut,
"Signal" );
601 AddCut( bkgcut,
"Background" );
616 Log() <<
kInfo <<
"Splitting in k-folds has been already done" <<
Endl;
626 std::vector<Event*> TrainSigData;
627 std::vector<Event*> TrainBkgData;
628 std::vector<Event*> TestSigData;
629 std::vector<Event*> TestBkgData;
632 for(
UInt_t i=0; i<TrainingData.size(); ++i){
633 if( strncmp(
DefaultDataSetInfo().GetClassInfo( TrainingData.at(i)->GetClass() )->
GetName(),
"Signal", 6) == 0){ TrainSigData.push_back(TrainingData.at(i)); }
634 else if( strncmp(
DefaultDataSetInfo().GetClassInfo( TrainingData.at(i)->GetClass() )->
GetName(),
"Background", 10) == 0){ TrainBkgData.push_back(TrainingData.at(i)); }
640 for(
UInt_t i=0; i<TestingData.size(); ++i){
641 if( strncmp(
DefaultDataSetInfo().GetClassInfo( TestingData.at(i)->GetClass() )->
GetName(),
"Signal", 6) == 0){ TestSigData.push_back(TestingData.at(i)); }
642 else if( strncmp(
DefaultDataSetInfo().GetClassInfo( TestingData.at(i)->GetClass() )->
GetName(),
"Background", 10) == 0){ TestBkgData.push_back(TestingData.at(i)); }
651 std::vector<std::vector<Event*>> tempSigEvents =
SplitSets(TrainSigData,0,2);
652 std::vector<std::vector<Event*>> tempBkgEvents =
SplitSets(TrainBkgData,0,2);
673 std::vector<Event*>* tempTrain =
new std::vector<Event*>;
674 std::vector<Event*>* tempTest =
new std::vector<Event*>;
680 for(
UInt_t i=0; i<numFolds; ++i){
714 tempTrain->reserve(nTrain);
715 tempTest->reserve(nTest);
718 for(
UInt_t j=0; j<numFolds; ++j){
764 std::vector<std::vector<Event*>> tempSets;
765 tempSets.resize(numFolds);
773 if(inSet == foldSize*numFolds){
779 if(tempSets.at(s).size()<foldSize){
780 tempSets.at(s).push_back(oldSet.at(i));
807 des->
AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
812 des->
AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
822 "CorrelationMatrix"+className,
"Correlation Matrix ("+className+
")");
DataSetInfo * GetDataSetInfo(const TString &dsiName)
returns datasetinfo object for given name
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
virtual const char * GetName() const
Returns name of object.
DataSetManager * fDataSetManager
Random number generator class based on M.
MsgLogger & Endl(MsgLogger &ml)
VariableInfo & AddTarget(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis ...
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
std::vector< VariableInfo > & GetSpectatorInfos()
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
void SetCut(const TCut &cut, const TString &className)
set the cut for the classes
std::vector< std::vector< TMVA::Event * > > fTrainBkgEvents
DataSetInfo & GetDataSetInfo()
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
TTree * CreateEventAssignTrees(const TString &name)
DataSetInfo & DefaultDataSetInfo()
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
void ToLower()
Change string to lower-case.
void MakeKFoldDataSet(UInt_t numberFolds, bool validationSet=false)
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
void AddCut(const TCut &cut, const TString &className)
set the cut for the classes
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
DataInputHandler * fDataInputHandler
Types::EAnalysisType fAnalysisType
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
TH2 * GetCorrelationMatrix(const TString &className)
std::vector< std::vector< TMVA::Event * > > fTestBkgEvents
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
VariableInfo & AddSpectator(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, char type='F', Bool_t normalized=kTRUE, void *external=0)
add a spectator (can be a complex expression) to the set of spectator variables used in the MV analys...
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
virtual UInt_t Integer(UInt_t imax)
Returns a random integer on [ 0, imax-1 ].
void SetTree(TTree *tree, const TString &className, Double_t weight)
void PrepareFoldDataSet(UInt_t foldNumber, Types::ETreeType tt)
void SetInputVariables(std::vector< TString > *theVariables)
DataSetInfo & AddDataSet(DataSetInfo &)
std::vector< VariableInfo > & GetTargetInfos()
void AddCut(const TString &cut, const TString &className="")
A specialized string object used for TTree selections.
void SetInputTreesFromEventAssignTrees()
void SetSplitOptions(const TString &so)
DataInputHandler & DataInput()
Service class for 2-Dim histogram classes.
ClassInfo * GetClassInfo(Int_t clNum) const
const TMatrixD * CorrelationMatrix(const TString &className) const
void SetWeightExpression(const TString &exp, const TString &className="")
set the weight expressions for the classes if class name is specified, set only for this class if cla...
DataSetInfo & AddDataSetInfo(DataSetInfo &dsi)
stores a copy of the dataset info object
char * Form(const char *fmt,...)
std::vector< TTree * > fTestAssignTree
Bool_t UserAssignEvents(UInt_t clIndex)
std::vector< Float_t > fATreeEvent
std::vector< std::vector< TMVA::Event * > > fTestSigEvents
void PrintClasses() const
DataLoader * MakeCopy(TString name)
TString & Remove(Ssiz_t pos)
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
void SetBackgroundWeightExpression(const TString &variable)
unsigned long long ULong64_t
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter=' ')
Create or simply read branches from filename.
static void DestroyInstance()
static function: destroy TMVA instance
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
void SetWeightExpression(const TString &variable, const TString &className="")
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
void SetEventCollection(std::vector< Event *> *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
ClassInfo * AddClass(const TString &className)
void SetSignalWeightExpression(const TString &variable)
virtual Long64_t GetEntries() const
std::vector< std::vector< TMVA::Event * > > fTrainSigEvents
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Abstract ClassifierFactory template that handles arbitrary types.
std::vector< TTree * > fTrainAssignTree
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
std::vector< std::vector< TMVA::Event * > > fValidBkgEvents
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
friend void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
void SetSignalTree(TTree *signal, Double_t weight=1.0)
VariableInfo & AddVariable(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0, char varType='F', Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis ...
Double_t Atof() const
Return floating-point value contained in string.
A TTree object has a header with a name and a title.
std::vector< std::vector< TMVA::Event * > > SplitSets(std::vector< TMVA::Event *> &oldSet, int seedNum, int numFolds)
std::vector< std::vector< TMVA::Event * > > fValidSigEvents
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
DataSet * GetDataSet() const
returns data set
std::vector< VariableInfo > & GetVariableInfos()
void SetCut(const TString &cut, const TString &className="")
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
const char * Data() const