Logo ROOT   6.14/05
Reference Guide
VariableTransform.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Omar Zapata
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : VariableTransformBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * *
19  * Copyright (c) 2005: *
20  * CERN, Switzerland *
21  * MPI-K Heidelberg, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
32 #include "TMVA/VariableInfo.h"
36 
37 #include "TMVA/Config.h"
38 #include "TMVA/DataSetInfo.h"
39 #include "TMVA/MsgLogger.h"
40 #include "TMVA/Ranking.h"
41 #include "TMVA/Tools.h"
42 #include "TMVA/Types.h"
43 #include "TMVA/VariableInfo.h"
44 #include "TMVA/Version.h"
46 #include "TMVA/MsgLogger.h"
47 
48 #include "TH1.h"
49 #include "TH2.h"
50 #include "THashTable.h"
51 #include "TList.h"
52 #include "TMath.h"
53 #include "TProfile.h"
54 #include "TVectorD.h"
55 
56 #include <algorithm>
57 #include <cassert>
58 #include <exception>
59 #include <iomanip>
60 #include <stdexcept>
61 #include <set>
62 
63 ////////////////////////////////////////////////////////////////////////////////
64 /// create variable transformations
65 
66 namespace TMVA {
67 void CreateVariableTransforms(const TString& trafoDefinitionIn,
68  TMVA::DataSetInfo& dataInfo,
69  TMVA::TransformationHandler& transformationHandler,
71 {
72  TString trafoDefinition(trafoDefinitionIn);
73  if (trafoDefinition == "None") return; // no transformations
74 
75  // workaround for transformations to complicated to be handled by makeclass
76  // count number of transformations with incomplete set of variables
77  TString trafoDefinitionCheck(trafoDefinitionIn);
78  int npartial = 0, ntrafo = 0;
79  for (Int_t pos = 0, siz = trafoDefinition.Sizeof(); pos < siz; ++pos) {
80  TString ch = trafoDefinition(pos,1);
81  if ( ch == "(" ) npartial++;
82  if ( ch == "+" || ch == ",") ntrafo++;
83  }
84  if (npartial>1) {
85  log << kWARNING
86  << "The use of multiple partial variable transformations during the "
87  "application phase can be properly invoked via the \"Reader\", but "
88  "it is not yet implemented in \"MakeClass\", the creation mechanism "
89  "for standalone C++ application classes. The standalone C++ class "
90  "produced by this training job is thus INCOMPLETE AND MUST NOT BE USED! "
91  "The transformation in question is: " << trafoDefinitionIn << Endl;
92  // ToDo make info and do not write the standalone class
93  //
94  // this does not work since this function is static
95  // fDisableWriting=true; // disable creation of stand-alone class
96  // ToDo we need to tell the transformation that it cannot write itself
97  }
98  // workaround end
99 
100  Int_t parenthesisCount = 0;
101  for (Int_t position = 0, size = trafoDefinition.Sizeof(); position < size; ++position) {
102  TString ch = trafoDefinition(position,1);
103  if (ch == "(") ++parenthesisCount;
104  else if (ch == ")") --parenthesisCount;
105  else if (ch == "," && parenthesisCount == 0) trafoDefinition.Replace(position,1,'+');
106  }
107 
108  TList* trList = gTools().ParseFormatLine( trafoDefinition, "+" );
109  TListIter trIt(trList);
110  while (TObjString* os = (TObjString*)trIt()) {
111  TString tdef = os->GetString();
112  Int_t idxCls = -1;
113 
114  TString variables = "";
115  if (tdef.Contains("(")) { // contains selection of variables
116  Ssiz_t parStart = tdef.Index( "(" );
117  Ssiz_t parLen = tdef.Index( ")", parStart )-parStart+1;
118 
119  variables = tdef(parStart,parLen);
120  tdef.Remove(parStart,parLen);
121  variables.Remove(parLen-1,1);
122  variables.Remove(0,1);
123  }
124 
125  TList* trClsList = gTools().ParseFormatLine( tdef, "_" ); // split entry to get trf-name and class-name
126  TListIter trClsIt(trClsList);
127  if (trClsList->GetSize() < 1)
128  log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Incorrect transformation string provided." << Endl;
129  const TString& trName = ((TObjString*)trClsList->At(0))->GetString();
130 
131  if (trClsList->GetEntries() > 1) {
132  TString trCls = "AllClasses";
133  ClassInfo *ci = NULL;
134  trCls = ((TObjString*)trClsList->At(1))->GetString();
135  if (trCls != "AllClasses") {
136  ci = dataInfo.GetClassInfo( trCls );
137  if (ci == NULL)
138  log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Class " << trCls << " not known for variable transformation "
139  << trName << ", please check." << Endl;
140  else
141  idxCls = ci->GetNumber();
142  }
143  }
144 
145  VariableTransformBase* transformation = NULL;
146  if (trName == "I" || trName == "Ident" || trName == "Identity") {
147  if (variables.Length() == 0) variables = "_V_";
148  transformation = new VariableIdentityTransform(dataInfo);
149  }
150  else if (trName == "D" || trName == "Deco" || trName == "Decorrelate") {
151  if (variables.Length() == 0) variables = "_V_";
152  transformation = new VariableDecorrTransform(dataInfo);
153  }
154  else if (trName == "P" || trName == "PCA") {
155  if (variables.Length() == 0) variables = "_V_";
156  transformation = new VariablePCATransform(dataInfo);
157  }
158  else if (trName == "U" || trName == "Uniform") {
159  if (variables.Length() == 0) variables = "_V_,_T_";
160  transformation = new VariableGaussTransform(dataInfo, "Uniform" );
161  }
162  else if (trName == "G" || trName == "Gauss") {
163  if (variables.Length() == 0) variables = "_V_";
164  transformation = new VariableGaussTransform(dataInfo);
165  }
166  else if (trName == "N" || trName == "Norm" || trName == "Normalise" || trName == "Normalize") {
167  if (variables.Length() == 0) variables = "_V_,_T_";
168  transformation = new VariableNormalizeTransform(dataInfo);
169  }
170  else
171  log << kFATAL << Form("Dataset[%s] : ",dataInfo.GetName())
172  << "<ProcessOptions> Variable transform '"
173  << trName << "' unknown." << Endl;
174 
175 
176  if (transformation) {
177  ClassInfo* clsInfo = dataInfo.GetClassInfo(idxCls);
178  if (clsInfo)
179  log << kHEADER << Form("[%s] : ",dataInfo.GetName())
180  << "Create Transformation \"" << trName << "\" with reference class "
181  << clsInfo->GetName() << "=("<< idxCls <<")" << Endl << Endl;
182  else
183  log << kHEADER << Form("[%s] : ",dataInfo.GetName())
184  << "Create Transformation \"" << trName << "\" with events from all classes."
185  << Endl << Endl;
186 
187  transformation->SelectInput(variables);
188  transformationHandler.AddTransformation(transformation, idxCls);
189  }
190  }
191 }
192 
193 }
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Collectable string class.
Definition: TObjString.h:28
virtual Int_t GetEntries() const
Definition: TCollection.h:177
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:634
Basic string class.
Definition: TString.h:131
int Int_t
Definition: RtypesCore.h:41
Iterator of linked list.
Definition: TList.h:197
TString & Replace(Ssiz_t pos, Ssiz_t n, const char *s)
Definition: TString.h:677
Class that contains all the information of a class.
Definition: ClassInfo.h:49
Class that contains all the data information.
Definition: DataSetInfo.h:60
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
A doubly linked list.
Definition: TList.h:44
Linear interpolation class.
ClassInfo * GetClassInfo(Int_t clNum) const
char * Form(const char *fmt,...)
Ssiz_t Length() const
Definition: TString.h:405
Linear interpolation class.
Class that contains all the data information.
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:354
Tools & gTools()
Gaussian Transformation of input variables.
Linear interpolation class.
TString & Remove(Ssiz_t pos)
Definition: TString.h:668
int Ssiz_t
Definition: RtypesCore.h:63
virtual Int_t Sizeof() const
Returns size string will occupy on I/O buffer.
Definition: TString.cxx:1309
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
UInt_t GetNumber() const
Definition: ClassInfo.h:65
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:67
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Abstract ClassifierFactory template that handles arbitrary types.
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:413
virtual void SelectInput(const TString &inputVariables, Bool_t putIntoVariables=kFALSE)
select the variables/targets/spectators which serve as input to the transformation ...
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Definition: TCollection.h:182
void variables(TString dataset, TString fin="TMVA.root", TString dirName="InputVariables_Id", TString title="TMVA Input Variables", Bool_t isRegression=kFALSE, Bool_t useTMVAStyle=kTRUE)
double log(double)
VariableTransformBase * AddTransformation(VariableTransformBase *, Int_t cls)