Logo ROOT   6.07/09
Reference Guide
VariableTransform.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Omar Zapata
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : VariableTransformBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * *
19  * Copyright (c) 2005: *
20  * CERN, Switzerland *
21  * MPI-K Heidelberg, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
32 #include "TMVA/VariableInfo.h"
36 
37 #include "TMVA/Config.h"
38 #include "TMVA/DataSetInfo.h"
39 #include "TMVA/MsgLogger.h"
40 #include "TMVA/Ranking.h"
41 #include "TMVA/Tools.h"
42 #include "TMVA/Types.h"
43 #include "TMVA/VariableInfo.h"
44 #include "TMVA/Version.h"
46 #include "TMVA/MsgLogger.h"
47 
48 #include "TH1.h"
49 #include "TH2.h"
50 #include "THashTable.h"
51 #include "TList.h"
52 #include "TMath.h"
53 #include "TProfile.h"
54 #include "TVectorD.h"
55 
56 #include <algorithm>
57 #include <cassert>
58 #include <exception>
59 #include <iomanip>
60 #include <stdexcept>
61 #include <set>
62 
63 ////////////////////////////////////////////////////////////////////////////////
64 /// create variable transformations
65 
66 namespace TMVA {
67 void CreateVariableTransforms( const TString& trafoDefinitionIn,
68  TMVA::DataSetInfo& dataInfo,
69  TMVA::TransformationHandler& transformationHandler,
71 {
72  TString trafoDefinition(trafoDefinitionIn);
73  if (trafoDefinition == "None") return; // no transformations
74 
75  // workaround for transformations to complicated to be handled by makeclass
76  // count number of transformations with incomplete set of variables
77  TString trafoDefinitionCheck(trafoDefinitionIn);
78  int npartial = 0, ntrafo=0;
79  for (Int_t pos = 0, siz = trafoDefinition.Sizeof(); pos < siz; ++pos) {
80  TString ch = trafoDefinition(pos,1);
81  if ( ch == "(" ) npartial++;
82  if ( ch == "+" || ch == ",") ntrafo++;
83  }
84  if (npartial>1) {
85  log << kWARNING << "The use of multiple partial variable transformations during the application phase can be properly invoked via the \"Reader\", but it is not yet implemented in \"MakeClass\", the creation mechanism for standalone C++ application classes. The standalone C++ class produced by this training job is thus INCOMPLETE AND MUST NOT BE USED! The transformation in question is: " << trafoDefinitionIn << Endl; // ToDo make info and do not write the standalone class
86  //
87  // this does not work since this function is static
88  // fDisableWriting=true; // disable creation of stand-alone class
89  // ToDo we need to tell the transformation that it cannot write itself
90  }
91  // workaround end
92 
93  Int_t parenthesisCount = 0;
94  for (Int_t position = 0, size = trafoDefinition.Sizeof(); position < size; ++position) {
95  TString ch = trafoDefinition(position,1);
96  if (ch == "(") ++parenthesisCount;
97  else if (ch == ")") --parenthesisCount;
98  else if (ch == "," && parenthesisCount == 0) trafoDefinition.Replace(position,1,'+');
99  }
100 
101  TList* trList = gTools().ParseFormatLine( trafoDefinition, "+" );
102  TListIter trIt(trList);
103  while (TObjString* os = (TObjString*)trIt()) {
104  TString tdef = os->GetString();
105  Int_t idxCls = -1;
106 
107  TString variables = "";
108  if (tdef.Contains("(")) { // contains selection of variables
109  Ssiz_t parStart = tdef.Index( "(" );
110  Ssiz_t parLen = tdef.Index( ")", parStart )-parStart+1;
111 
112  variables = tdef(parStart,parLen);
113  tdef.Remove(parStart,parLen);
114  variables.Remove(parLen-1,1);
115  variables.Remove(0,1);
116  }
117 
118  TList* trClsList = gTools().ParseFormatLine( tdef, "_" ); // split entry to get trf-name and class-name
119  TListIter trClsIt(trClsList);
120  if (trClsList->GetSize() < 1) log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Incorrect transformation string provided." << Endl;
121  const TString& trName = ((TObjString*)trClsList->At(0))->GetString();
122 
123  if (trClsList->GetEntries() > 1) {
124  TString trCls = "AllClasses";
125  ClassInfo *ci = NULL;
126  trCls = ((TObjString*)trClsList->At(1))->GetString();
127  if (trCls != "AllClasses") {
128  ci = dataInfo.GetClassInfo( trCls );
129  if (ci == NULL)
130  log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Class " << trCls << " not known for variable transformation "
131  << trName << ", please check." << Endl;
132  else
133  idxCls = ci->GetNumber();
134  }
135  }
136 
137  VariableTransformBase* transformation = NULL;
138  if (trName == "I" || trName == "Ident" || trName == "Identity") {
139  if (variables.Length() == 0) variables = "_V_";
140  transformation = new VariableIdentityTransform( dataInfo);
141  }
142  else if (trName == "D" || trName == "Deco" || trName == "Decorrelate") {
143  if (variables.Length() == 0) variables = "_V_";
144  transformation = new VariableDecorrTransform( dataInfo);
145  }
146  else if (trName == "P" || trName == "PCA") {
147  if (variables.Length() == 0) variables = "_V_";
148  transformation = new VariablePCATransform ( dataInfo);
149  }
150  else if (trName == "U" || trName == "Uniform") {
151  if (variables.Length() == 0) variables = "_V_,_T_";
152  transformation = new VariableGaussTransform ( dataInfo, "Uniform" );
153  }
154  else if (trName == "G" || trName == "Gauss") {
155  if (variables.Length() == 0) variables = "_V_";
156  transformation = new VariableGaussTransform ( dataInfo);
157  }
158  else if (trName == "N" || trName == "Norm" || trName == "Normalise" || trName == "Normalize") {
159  if (variables.Length() == 0) variables = "_V_,_T_";
160  transformation = new VariableNormalizeTransform( dataInfo);
161  }
162  else log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "<ProcessOptions> Variable transform '"
163  << trName << "' unknown." << Endl;
164 
165 
166  if (transformation) {
167  ClassInfo* clsInfo = dataInfo.GetClassInfo(idxCls);
168  if (clsInfo )
169  log << kHEADER <<Form("[%s] : ",dataInfo.GetName())
170  << "Create Transformation \"" << trName << "\" with reference class "
171  << clsInfo->GetName() << "=("<< idxCls <<")"<<Endl << Endl;
172  else
173  log << kHEADER <<Form("[%s] : ",dataInfo.GetName())
174  << "Create Transformation \"" << trName << "\" with events from all classes." << Endl << Endl;
175 
176  transformation->SelectInput( variables );
177  transformationHandler.AddTransformation(transformation, idxCls);
178  }
179  }
180  return;
181 }
182 
183 }
virtual Int_t GetEntries() const
Definition: TCollection.h:92
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
Ssiz_t Length() const
Definition: TString.h:390
Collectable string class.
Definition: TObjString.h:32
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:311
Iterator of linked list.
Definition: TList.h:187
TString & Replace(Ssiz_t pos, Ssiz_t n, const char *s)
Definition: TString.h:625
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:85
Tools & gTools()
Definition: Tools.cxx:79
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
A doubly linked list.
Definition: TList.h:47
ClassInfo * GetClassInfo(Int_t clNum) const
char * Form(const char *fmt,...)
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
TString & Remove(Ssiz_t pos)
Definition: TString.h:616
int Ssiz_t
Definition: RtypesCore.h:63
virtual Int_t GetSize() const
Definition: TCollection.h:95
Abstract ClassifierFactory template that handles arbitrary types.
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:567
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:413
#define NULL
Definition: Rtypes.h:82
virtual Int_t Sizeof() const
Returns size string will occupy on I/O buffer.
Definition: TString.cxx:1298
virtual void SelectInput(const TString &inputVariables, Bool_t putIntoVariables=kFALSE)
select the variables/targets/spectators which serve as input to the transformation ...
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:582
UInt_t GetNumber() const
Definition: ClassInfo.h:73
void variables(TString dataset, TString fin="TMVA.root", TString dirName="InputVariables_Id", TString title="TMVA Input Variables", Bool_t isRegression=kFALSE, Bool_t useTMVAStyle=kTRUE)
Definition: variables.cxx:10
double log(double)
VariableTransformBase * AddTransformation(VariableTransformBase *, Int_t cls)