73 fBackTransformedEvent(0),
74 fVariableTransform(tf),
78 fTransformName(trfName),
79 fVariableTypesAreCounted(false),
114 TString inputVariables = _inputVariables;
123 typedef std::set<Int_t> SelectedIndices;
125 SelectedIndices varIndices;
126 SelectedIndices tgtIndices;
127 SelectedIndices spctIndices;
129 if (inputVariables ==
"")
131 inputVariables =
"_V_,_T_";
146 if( variables.
Length() == 0 ){
147 for(
UInt_t ivar = 0; ivar < nvars; ++ivar ) {
148 fGet.push_back( std::pair<Char_t,UInt_t>(
'v',ivar) );
149 varIndices.insert( ivar );
154 Log() << kFATAL <<
"You selected variable with index : " << idx <<
" of only " << nvars <<
" variables." <<
Endl;
155 fGet.push_back( std::pair<Char_t,UInt_t>(
'v',idx) );
156 varIndices.insert( idx );
160 if( variables.
Length() == 0 ){
161 for(
UInt_t itgt = 0; itgt < ntgts; ++itgt ) {
162 fGet.push_back( std::pair<Char_t,UInt_t>(
't',itgt) );
163 tgtIndices.insert( itgt );
168 Log() << kFATAL <<
"You selected target with index : " << idx <<
" of only " << ntgts <<
" targets." <<
Endl;
169 fGet.push_back( std::pair<Char_t,UInt_t>(
't',idx) );
170 tgtIndices.insert( idx );
174 if( variables.
Length() == 0 ){
175 for(
UInt_t ispct = 0; ispct < nspcts; ++ispct ) {
176 fGet.push_back( std::pair<Char_t,UInt_t>(
's',ispct) );
177 spctIndices.insert( ispct );
182 Log() << kFATAL <<
"You selected spectator with index : " << idx <<
" of only " << nspcts <<
" spectators." <<
Endl;
183 fGet.push_back( std::pair<Char_t,UInt_t>(
's',idx) );
184 spctIndices.insert( idx );
186 }
else if(
TString(
"REARRANGE").BeginsWith(variables) ) {
189 Log() << kINFO <<
"Variable rearrangement set true: Variable order given in transformation option is used for input to transformation!" <<
Endl;
193 Int_t numIndices = varIndices.size()+tgtIndices.size()+spctIndices.size();
194 for(
UInt_t ivar = 0; ivar < nvars; ++ivar ) {
196 fGet.push_back( std::pair<Char_t,UInt_t>(
'v',ivar) );
197 varIndices.insert( ivar );
201 for(
UInt_t itgt = 0; itgt < ntgts; ++itgt ) {
203 fGet.push_back( std::pair<Char_t,UInt_t>(
't',itgt) );
204 tgtIndices.insert( itgt );
208 for(
UInt_t ispct = 0; ispct < nspcts; ++ispct ) {
210 fGet.push_back( std::pair<Char_t,UInt_t>(
's',ispct) );
211 spctIndices.insert( ispct );
215 Int_t numIndicesEndOfLoop = varIndices.size()+tgtIndices.size()+spctIndices.size();
216 if( numIndicesEndOfLoop == numIndices )
217 Log() << kWARNING <<
"Error at parsing the options for the variable transformations: Variable/Target/Spectator '" << variables.
Data() <<
"' not found." <<
Endl;
218 numIndices = numIndicesEndOfLoop;
223 if( putIntoVariables ) {
225 for( SelectedIndices::iterator it = varIndices.begin(), itEnd = varIndices.end(); it != itEnd; ++it ) {
226 fPut.push_back( std::pair<Char_t,UInt_t>(
'v',idx) );
229 for( SelectedIndices::iterator it = tgtIndices.begin(), itEnd = tgtIndices.end(); it != itEnd; ++it ) {
230 fPut.push_back( std::pair<Char_t,UInt_t>(
't',idx) );
233 for( SelectedIndices::iterator it = spctIndices.begin(), itEnd = spctIndices.end(); it != itEnd; ++it ) {
234 fPut.push_back( std::pair<Char_t,UInt_t>(
's',idx) );
238 for( SelectedIndices::iterator it = varIndices.begin(), itEnd = varIndices.end(); it != itEnd; ++it ) {
240 fPut.push_back( std::pair<Char_t,UInt_t>(
'v',idx) );
242 for( SelectedIndices::iterator it = tgtIndices.begin(), itEnd = tgtIndices.end(); it != itEnd; ++it ) {
244 fPut.push_back( std::pair<Char_t,UInt_t>(
't',idx) );
246 for( SelectedIndices::iterator it = spctIndices.begin(), itEnd = spctIndices.end(); it != itEnd; ++it ) {
248 fPut.push_back( std::pair<Char_t,UInt_t>(
's',idx) );
258 Log() << kHEADER <<
"Transformation, Variable selection : " <<
Endl;
267 for( ; itGet != itGetEnd; ++itGet ) {
270 Char_t inputType = (*itGet).first;
271 Int_t inputIdx = (*itGet).second;
273 TString inputLabel =
"NOT FOND";
274 if( inputType ==
'v' ) {
276 inputTypeString =
"variable";
278 else if( inputType ==
't' ){
280 inputTypeString =
"target";
282 else if( inputType ==
's' ){
284 inputTypeString =
"spectator";
287 TString outputTypeString =
"?";
289 Char_t outputType = (*itPut).first;
290 Int_t outputIdx = (*itPut).second;
292 TString outputLabel =
"NOT FOUND";
293 if( outputType ==
'v' ) {
295 outputTypeString =
"variable";
297 else if( outputType ==
't' ){
299 outputTypeString =
"target";
301 else if( outputType ==
's' ){
303 outputTypeString =
"spectator";
305 Log() << kINFO <<
"Input : " << inputTypeString.
Data() <<
" '" << inputLabel.
Data() <<
"'" <<
" <---> " <<
"Output : " << outputTypeString.
Data() <<
" '" << outputLabel.
Data() <<
"'" <<
Endl;
306 Log() << kDEBUG <<
"\t(index=" << inputIdx <<
")." <<
"\t(index=" << outputIdx <<
")." <<
Endl;
325 if( backTransformation && !
fPut.empty() ){
326 itEntry =
fPut.begin();
327 itEntryEnd =
fPut.end();
328 input.reserve(
fPut.size());
331 itEntry =
fGet.begin();
332 itEntryEnd =
fGet.end();
333 input.reserve(
fGet.size() );
338 for( ; itEntry != itEntryEnd; ++itEntry ) {
340 Int_t idx = (*itEntry).second;
345 input.push_back( event->
GetValue(idx) );
348 input.push_back( event->
GetTarget(idx) );
354 Log() << kFATAL <<
"VariableTransformBase/GetInput : unknown type '" << type <<
"'." <<
Endl;
358 catch(std::out_of_range& ){
359 input.push_back(0.
f);
360 mask.push_back(
kTRUE);
361 hasMaskedEntries =
kTRUE;
364 return hasMaskedEntries;
372 std::vector<Float_t>::iterator itOutput = output.begin();
373 std::vector<Char_t>::iterator itMask = mask.begin();
376 event->CopyVarValues( *oldEvent );
383 if( backTransformation ||
fPut.empty() ){
384 itEntry =
fGet.begin();
385 itEntryEnd =
fGet.end();
388 itEntry =
fPut.begin();
389 itEntryEnd =
fPut.end();
393 for( ; itEntry != itEntryEnd; ++itEntry ) {
400 Int_t idx = (*itEntry).second;
401 if (itOutput == output.end())
Log() << kFATAL <<
"Read beyond array boundaries in VariableTransformBase::SetOutput"<<
Endl;
406 event->SetVal( idx, value );
409 event->SetTarget( idx, value );
412 event->SetSpectator( idx, value );
415 Log() << kFATAL <<
"VariableTransformBase/GetInput : unknown type '" << type <<
"'." <<
Endl;
417 if( !(*itMask) ) ++itOutput;
421 }
catch( std::exception& except ){
422 Log() << kFATAL <<
"VariableTransformBase/SetOutput : exception/" << except.what() <<
Endl;
440 nvars = ntgts = nspcts = 0;
456 Log() << kFATAL <<
"VariableTransformBase/GetVariableTypeNumbers : unknown type '" << type <<
"'." <<
Endl;
479 UInt_t nevts = events.size();
482 TVectorD x0( nvars+ntgts ); x0 *= 0;
483 TVectorD v0( nvars+ntgts ); v0 *= 0;
486 for (
UInt_t ievt=0; ievt<nevts; ievt++) {
487 const Event* ev = events[ievt];
490 sumOfWeights += weight;
491 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
500 x0(ivar) += x*weight;
501 x2(ivar) += x*x*weight;
503 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
512 x0(nvars+itgt) += x*weight;
513 x2(nvars+itgt) += x*x*weight;
517 if (sumOfWeights <= 0) {
518 Log() << kFATAL <<
" the sum of event weights calculated for your input is == 0" 519 <<
" or exactly: " << sumOfWeights <<
" there is obviously some problem..."<<
Endl;
523 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
524 Double_t mean = x0(ivar)/sumOfWeights;
527 if (
x2(ivar)/sumOfWeights - mean*mean < 0) {
528 Log() << kFATAL <<
" the RMS of your input variable " << ivar
529 <<
" evaluates to an imaginary number: sqrt("<<
x2(ivar)/sumOfWeights - mean*mean
530 <<
") .. sometimes related to a problem with outliers and negative event weights" 535 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
536 Double_t mean = x0(nvars+itgt)/sumOfWeights;
537 Targets().at(itgt).SetMean( mean );
538 if (
x2(nvars+itgt)/sumOfWeights - mean*mean < 0) {
539 Log() << kFATAL <<
" the RMS of your target variable " << itgt
540 <<
" evaluates to an imaginary number: sqrt(" <<
x2(nvars+itgt)/sumOfWeights - mean*mean
541 <<
") .. sometimes related to a problem with outliers and negative event weights" 547 for (
UInt_t ievt=0; ievt<nevts; ievt++) {
548 const Event* ev = events[ievt];
550 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
553 v0(ivar) += weight*(x-mean)*(x-mean);
555 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
558 v0(nvars+itgt) += weight*(x-mean)*(x-mean);
564 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
565 Double_t variance = v0(ivar)/sumOfWeights;
566 Variables().at(ivar).SetVariance( variance );
567 Log() << kINFO <<
"Variable " <<
Variables().at(ivar).GetExpression() <<
" variance = " << variance <<
Endl;
569 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
570 Double_t variance = v0(nvars+itgt)/sumOfWeights;
571 Targets().at(itgt).SetVariance( variance );
572 Log() << kINFO <<
"Target " <<
Targets().at(itgt).GetExpression() <<
" variance = " << variance <<
Endl;
575 Log() << kVERBOSE <<
"Set minNorm/maxNorm for variables to: " <<
Endl;
576 Log() << std::setprecision(3);
580 Log() << kVERBOSE <<
"Set minNorm/maxNorm for targets to: " <<
Endl;
581 Log() << std::setprecision(3);
583 Log() <<
" " <<
Targets().at(itgt).GetInternalName()
584 <<
"\t: [" <<
Targets().at(itgt).GetMin() <<
"\t, " <<
Targets().at(itgt).GetMax() <<
"\t] " <<
Endl;
585 Log() << std::setprecision(5);
595 std::vector<TString>* strVec =
new std::vector<TString>;
597 strVec->push_back(
Variables()[ivar].GetLabel() +
"_[transformed]");
614 if (x <
Targets().at(ivar-nvars).GetMin())
Targets().at(ivar-nvars).SetMin(x);
615 if (x >
Targets().at(ivar-nvars).GetMax())
Targets().at(ivar-nvars).SetMax(x);
633 UInt_t idx = (*itGet).second;
641 typeString =
"Variable";
646 typeString =
"Target";
651 typeString =
"Spectator";
656 Log() << kFATAL <<
"VariableTransformBase/AttachXMLTo unknown variable type '" << type <<
"'." <<
Endl;
671 UInt_t idx = (*itPut).second;
679 typeString =
"Variable";
684 typeString =
"Target";
689 typeString =
"Spectator";
694 Log() << kFATAL <<
"VariableTransformBase/AttachXMLTo unknown variable type '" << type <<
"'." <<
Endl;
735 if( typeString ==
"Variable" ){
736 for(
UInt_t ivar = 0; ivar < nvars; ++ivar ) {
739 fGet.push_back( std::pair<Char_t,UInt_t>(
'v',ivar) );
743 }
else if( typeString ==
"Target" ){
744 for(
UInt_t itgt = 0; itgt < ntgts; ++itgt ) {
747 fGet.push_back( std::pair<Char_t,UInt_t>(
't',itgt) );
751 }
else if( typeString ==
"Spectator" ){
752 for(
UInt_t ispct = 0; ispct < nspcts; ++ispct ) {
755 fGet.push_back( std::pair<Char_t,UInt_t>(
's',ispct) );
760 Log() << kFATAL <<
"VariableTransformationBase/ReadFromXML : unknown type '" << typeString <<
"'." <<
Endl;
765 assert( nInputs ==
fGet.size() );
783 if( typeString ==
"Variable" ){
784 for(
UInt_t ivar = 0; ivar < nvars; ++ivar ) {
787 fPut.push_back( std::pair<Char_t,UInt_t>(
'v',ivar) );
791 }
else if( typeString ==
"Target" ){
792 for(
UInt_t itgt = 0; itgt < ntgts; ++itgt ) {
795 fPut.push_back( std::pair<Char_t,UInt_t>(
't',itgt) );
799 }
else if( typeString ==
"Spectator" ){
800 for(
UInt_t ispct = 0; ispct < nspcts; ++ispct ) {
803 fPut.push_back( std::pair<Char_t,UInt_t>(
's',ispct) );
808 Log() << kFATAL <<
"VariableTransformationBase/ReadFromXML : unknown type '" << typeString <<
"'." <<
Endl;
813 assert( nOutputs ==
fPut.size() );
824 fout <<
" // define the indices of the variables which are transformed by this transformation" << std::endl;
825 fout <<
" static std::vector<int> indicesGet;" << std::endl;
826 fout <<
" static std::vector<int> indicesPut;" << std::endl << std::endl;
827 fout <<
" if ( indicesGet.empty() ) {" << std::endl;
828 fout <<
" indicesGet.reserve(fNvars);" << std::endl;
832 Int_t idx = (*itEntry).second;
836 fout <<
" indicesGet.push_back( " << idx <<
");" << std::endl;
839 Log() << kWARNING <<
"MakeClass doesn't work with transformation of targets. The results will be wrong!" <<
Endl;
842 Log() << kWARNING <<
"MakeClass doesn't work with transformation of spectators. The results will be wrong!" <<
Endl;
845 Log() << kFATAL <<
"VariableTransformBase/GetInput : unknown type '" << type <<
"'." <<
Endl;
848 fout <<
" }" << std::endl;
849 fout <<
" if ( indicesPut.empty() ) {" << std::endl;
850 fout <<
" indicesPut.reserve(fNvars);" << std::endl;
854 Int_t idx = (*itEntry).second;
858 fout <<
" indicesPut.push_back( " << idx <<
");" << std::endl;
861 Log() << kWARNING <<
"MakeClass doesn't work with transformation of targets. The results will be wrong!" <<
Endl;
864 Log() << kWARNING <<
"MakeClass doesn't work with transformation of spectators. The results will be wrong!" <<
Endl;
867 Log() << kFATAL <<
"VariableTransformBase/PutInput : unknown type '" << type <<
"'." <<
Endl;
871 fout <<
" }" << std::endl;
874 }
else if( part == 1){
UInt_t GetNVariables() const
#define TMVA_VERSION_CODE
MsgLogger & Endl(MsgLogger &ml)
Collectable string class.
const TString & GetLabel() const
const TString & GetExpression() const
static const double x2[5]
Bool_t EndsWith(const char *pat, ECaseCompare cmp=kExact) const
Return true if string ends with the specified string.
Class that contains all the data information.
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Float_t GetTarget(UInt_t itgt) const
UInt_t GetNTargets() const
VariableInfo & GetTargetInfo(Int_t i)
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
UInt_t GetNSpectators(bool all=kTRUE) const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
TString & Remove(Ssiz_t pos)
VariableInfo & GetSpectatorInfo(Int_t i)
VariableInfo & GetVariableInfo(Int_t i)
ostringstream derivative to redirect and format output
Mother of all ROOT objects.
Int_t Atoi() const
Return integer value of string.
Double_t Sqrt(Double_t x)
Class for type info of MVA input variable.
Float_t GetSpectator(UInt_t ivar) const
return spectator content
void variables(TString dataset, TString fin="TMVA.root", TString dirName="InputVariables_Id", TString title="TMVA Input Variables", Bool_t isRegression=kFALSE, Bool_t useTMVAStyle=kTRUE)
const char * Data() const