51 fDataSetInfo(dl->GetDataSetInfo()),
53 fEvents (fDataSetInfo.GetDataSet()->GetEventCollection())
87 const UInt_t nvars = fDataSetInfo.GetNVariables();
88 Log() << kINFO <<
"Number of variables before transformation: " << nvars <<
Endl;
89 std::vector<VariableInfo>& vars = fDataSetInfo.GetVariableInfos();
96 Log() << kINFO <<
"Selecting variables whose variance is above threshold value = " << threshold <<
Endl;
97 Int_t maxL = fDataSetInfo.GetVariableNameMaxLength();
99 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
100 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) <<
"Selected Variables";
101 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(10) <<
"Variance" <<
Endl;
102 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
103 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
104 Double_t variance = vars[ivar].GetVariance();
105 if (variance > threshold)
107 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << vars[ivar].GetExpression();
108 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << variance <<
Endl;
109 transformedLoader->
AddVariable(vars[ivar].GetExpression(), vars[ivar].GetVarType());
112 CopyDataLoader(transformedLoader,fDataLoader);
113 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
116 transformedLoader->
PrepareTrainingAndTestTree(fDataLoader->GetDataSetInfo().GetCut(
"Signal"), fDataLoader->GetDataSetInfo().GetCut(
"Background"), fDataLoader->GetDataSetInfo().GetSplitOptions());
119 return transformedLoader;
131 Int_t nvars = fDataSetInfo.GetNVariables();
132 std::vector<VariableInfo>& vars = fDataSetInfo.GetVariableInfos();
133 std::vector<VariableInfo>& tars = fDataSetInfo.GetTargetInfos();
135 if (
x < vars[ivar].GetMin()) vars[ivar].SetMin(
x);
136 if (
x > vars[ivar].GetMax()) vars[ivar].SetMax(
x);
139 if (
x < tars[ivar-nvars].GetMin()) tars[ivar-nvars].SetMin(
x);
140 if (
x > tars[ivar-nvars].GetMax()) tars[ivar-nvars].SetMax(
x);
150 const std::vector<TMVA::Event*>& events = fDataSetInfo.GetDataSet()->GetEventCollection();
152 const UInt_t nvars = fDataSetInfo.GetNVariables();
153 const UInt_t ntgts = fDataSetInfo.GetNTargets();
154 std::vector<VariableInfo>& vars = fDataSetInfo.GetVariableInfos();
155 std::vector<VariableInfo>& tars = fDataSetInfo.GetTargetInfos();
157 UInt_t nevts = events.size();
160 TVectorD x0( nvars+ntgts ); x0 *= 0;
164 for (
UInt_t ievt=0; ievt<nevts; ievt++) {
165 const Event* ev = events[ievt];
168 sumOfWeights += weight;
169 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
172 vars[ivar].SetMin(
x);
173 vars[ivar].SetMax(
x);
176 UpdateNorm(ivar,
x );
178 x0(ivar) +=
x*weight;
179 x2(ivar) +=
x*
x*weight;
181 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
184 tars[itgt].SetMin(
x);
185 tars[itgt].SetMax(
x);
188 UpdateNorm( nvars+itgt,
x );
190 x0(nvars+itgt) +=
x*weight;
191 x2(nvars+itgt) +=
x*
x*weight;
195 if (sumOfWeights <= 0) {
196 Log() << kFATAL <<
" the sum of event weights calculated for your input is == 0"
197 <<
" or exactly: " << sumOfWeights <<
" there is obviously some problem..."<<
Endl;
201 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
202 Double_t mean = x0(ivar)/sumOfWeights;
204 vars[ivar].SetMean( mean );
205 if (
x2(ivar)/sumOfWeights - mean*mean < 0) {
206 Log() << kFATAL <<
" the RMS of your input variable " << ivar
207 <<
" evaluates to an imaginary number: sqrt("<<
x2(ivar)/sumOfWeights - mean*mean
208 <<
") .. sometimes related to a problem with outliers and negative event weights"
211 vars[ivar].SetRMS(
TMath::Sqrt(
x2(ivar)/sumOfWeights - mean*mean) );
213 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
214 Double_t mean = x0(nvars+itgt)/sumOfWeights;
215 tars[itgt].SetMean( mean );
216 if (
x2(nvars+itgt)/sumOfWeights - mean*mean < 0) {
217 Log() << kFATAL <<
" the RMS of your target variable " << itgt
218 <<
" evaluates to an imaginary number: sqrt(" <<
x2(nvars+itgt)/sumOfWeights - mean*mean
219 <<
") .. sometimes related to a problem with outliers and negative event weights"
222 tars[itgt].SetRMS(
TMath::Sqrt(
x2(nvars+itgt)/sumOfWeights - mean*mean) );
226 for (
UInt_t ievt=0; ievt<nevts; ievt++) {
227 const Event* ev = events[ievt];
230 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
232 Double_t mean = vars[ivar].GetMean();
233 v0(ivar) += weight*(
x-mean)*(
x-mean);
236 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
238 Double_t mean = tars[itgt].GetMean();
239 v0(nvars+itgt) += weight*(
x-mean)*(
x-mean);
243 Int_t maxL = fDataSetInfo.GetVariableNameMaxLength();
245 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
246 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) <<
"Variables";
247 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(10) <<
"Variance" <<
Endl;
248 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
251 Log() << std::setprecision(5);
252 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
254 vars[ivar].SetVariance( variance );
255 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << vars[ivar].GetExpression();
256 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << variance <<
Endl;
259 maxL = fDataSetInfo.GetTargetNameMaxLength();
261 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
262 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) <<
"Targets";
263 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(10) <<
"Variance" <<
Endl;
264 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
266 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
267 Double_t variance =
v0(nvars+itgt)/sumOfWeights;
268 tars[itgt].SetVariance( variance );
269 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << tars[itgt].GetExpression();
270 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << variance <<
Endl;
273 Log() << kINFO <<
"Set minNorm/maxNorm for variables to: " <<
Endl;
274 Log() << std::setprecision(3);
275 for (
UInt_t ivar=0; ivar<nvars; ivar++)
276 Log() <<
" " << vars[ivar].GetExpression()
277 <<
"\t: [" << vars[ivar].GetMin() <<
"\t, " << vars[ivar].GetMax() <<
"\t] " <<
Endl;
278 Log() << kINFO <<
"Set minNorm/maxNorm for targets to: " <<
Endl;
279 Log() << std::setprecision(3);
280 for (
UInt_t itgt=0; itgt<ntgts; itgt++)
281 Log() <<
" " << tars[itgt].GetExpression()
282 <<
"\t: [" << tars[itgt].GetMin() <<
"\t, " << tars[itgt].GetMax() <<
"\t] " <<
Endl;
283 Log() << std::setprecision(5);
291 des->
AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
296 des->
AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
static const double x2[5]
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
DataInputHandler & DataInput()
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
DataSetInfo & GetDataSetInfo()
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
UInt_t GetNVariables() const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetTarget(UInt_t itgt) const
ostringstream derivative to redirect and format output
MsgLogger & Endl(MsgLogger &ml)
Double_t Sqrt(Double_t x)