58 fDataSetInfo(dl->GetDataSetInfo()),
60 fEvents (fDataSetInfo.
GetDataSet()->GetEventCollection())
95 Log() << kINFO <<
"Number of variables before transformation: " << nvars <<
Endl;
103 Log() << kINFO <<
"Selecting variables whose variance is above threshold value = " << threshold <<
Endl;
106 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
107 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) <<
"Selected Variables";
108 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(10) <<
"Variance" <<
Endl;
109 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
110 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
111 Double_t variance = vars[ivar].GetVariance();
112 if (variance > threshold)
114 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << vars[ivar].GetExpression();
115 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << variance <<
Endl;
116 transformedLoader->
AddVariable(vars[ivar].GetExpression(), vars[ivar].GetVarType());
120 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
126 return transformedLoader;
142 if (x < vars[ivar].GetMin()) vars[ivar].SetMin(x);
143 if (x > vars[ivar].GetMax()) vars[ivar].SetMax(x);
146 if (x < tars[ivar-nvars].GetMin()) tars[ivar-nvars].SetMin(x);
147 if (x > tars[ivar-nvars].GetMax()) tars[ivar-nvars].SetMax(x);
164 UInt_t nevts = events.size();
167 TVectorD x0( nvars+ntgts ); x0 *= 0;
168 TVectorD v0( nvars+ntgts ); v0 *= 0;
171 for (
UInt_t ievt=0; ievt<nevts; ievt++) {
172 const Event* ev = events[ievt];
175 sumOfWeights += weight;
176 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
179 vars[ivar].SetMin(x);
180 vars[ivar].SetMax(x);
185 x0(ivar) += x*weight;
186 x2(ivar) += x*x*weight;
188 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
191 tars[itgt].SetMin(x);
192 tars[itgt].SetMax(x);
197 x0(nvars+itgt) += x*weight;
198 x2(nvars+itgt) += x*x*weight;
202 if (sumOfWeights <= 0) {
203 Log() << kFATAL <<
" the sum of event weights calculated for your input is == 0" 204 <<
" or exactly: " << sumOfWeights <<
" there is obviously some problem..."<<
Endl;
208 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
209 Double_t mean = x0(ivar)/sumOfWeights;
211 vars[ivar].SetMean( mean );
212 if (
x2(ivar)/sumOfWeights - mean*mean < 0) {
213 Log() << kFATAL <<
" the RMS of your input variable " << ivar
214 <<
" evaluates to an imaginary number: sqrt("<<
x2(ivar)/sumOfWeights - mean*mean
215 <<
") .. sometimes related to a problem with outliers and negative event weights" 218 vars[ivar].SetRMS(
TMath::Sqrt(
x2(ivar)/sumOfWeights - mean*mean) );
220 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
221 Double_t mean = x0(nvars+itgt)/sumOfWeights;
222 tars[itgt].SetMean( mean );
223 if (
x2(nvars+itgt)/sumOfWeights - mean*mean < 0) {
224 Log() << kFATAL <<
" the RMS of your target variable " << itgt
225 <<
" evaluates to an imaginary number: sqrt(" <<
x2(nvars+itgt)/sumOfWeights - mean*mean
226 <<
") .. sometimes related to a problem with outliers and negative event weights" 229 tars[itgt].SetRMS(
TMath::Sqrt(
x2(nvars+itgt)/sumOfWeights - mean*mean) );
233 for (
UInt_t ievt=0; ievt<nevts; ievt++) {
234 const Event* ev = events[ievt];
237 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
239 Double_t mean = vars[ivar].GetMean();
240 v0(ivar) += weight*(x-mean)*(x-mean);
243 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
245 Double_t mean = tars[itgt].GetMean();
246 v0(nvars+itgt) += weight*(x-mean)*(x-mean);
252 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
253 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) <<
"Variables";
254 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(10) <<
"Variance" <<
Endl;
255 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
258 Log() << std::setprecision(5);
259 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
260 Double_t variance = v0(ivar)/sumOfWeights;
261 vars[ivar].SetVariance( variance );
262 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << vars[ivar].GetExpression();
263 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << variance <<
Endl;
268 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
269 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) <<
"Targets";
270 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(10) <<
"Variance" <<
Endl;
271 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
273 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
274 Double_t variance = v0(nvars+itgt)/sumOfWeights;
275 tars[itgt].SetVariance( variance );
276 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << tars[itgt].GetExpression();
277 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << variance <<
Endl;
280 Log() << kINFO <<
"Set minNorm/maxNorm for variables to: " <<
Endl;
281 Log() << std::setprecision(3);
282 for (
UInt_t ivar=0; ivar<nvars; ivar++)
283 Log() <<
" " << vars[ivar].GetExpression()
284 <<
"\t: [" << vars[ivar].GetMin() <<
"\t, " << vars[ivar].GetMax() <<
"\t] " <<
Endl;
285 Log() << kINFO <<
"Set minNorm/maxNorm for targets to: " <<
Endl;
286 Log() << std::setprecision(3);
287 for (
UInt_t itgt=0; itgt<ntgts; itgt++)
288 Log() <<
" " << tars[itgt].GetExpression()
289 <<
"\t: [" << tars[itgt].GetMin() <<
"\t, " << tars[itgt].GetMax() <<
"\t] " <<
Endl;
290 Log() << std::setprecision(5);
298 des->
AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
303 des->
AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
UInt_t GetNVariables() const
MsgLogger & Endl(MsgLogger &ml)
TFileCollection * GetDataSet(const char *ds, const char *server)
GetDataSet wrapper.
DataSetInfo & GetDataSetInfo()
Int_t GetTargetNameMaxLength() const
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
static const double x2[5]
std::vector< std::vector< double > > Data
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
std::vector< VariableInfo > & GetTargetInfos()
Float_t GetTarget(UInt_t itgt) const
UInt_t GetNTargets() const
DataInputHandler & DataInput()
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Int_t GetVariableNameMaxLength() const
ostringstream derivative to redirect and format output
const TString & GetSplitOptions() const
const TCut & GetCut(Int_t i) const
Double_t Sqrt(Double_t x)
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
DataSet * GetDataSet() const
returns data set
std::vector< VariableInfo > & GetVariableInfos()