Logo ROOT  
Reference Guide
VariableTransform.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Omar Zapata
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : VariableTransformBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * *
19  * Copyright (c) 2005: *
20  * CERN, Switzerland *
21  * MPI-K Heidelberg, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
31 #include "TMVA/VariableInfo.h"
35 
36 #include "TMVA/Config.h"
37 #include "TMVA/DataSetInfo.h"
38 #include "TMVA/MsgLogger.h"
39 #include "TMVA/Ranking.h"
40 #include "TMVA/Tools.h"
41 #include "TMVA/Types.h"
42 #include "TMVA/Version.h"
44 
45 #include "THashTable.h"
46 #include "TList.h"
47 #include "TObjString.h"
48 
49 #include <algorithm>
50 #include <cassert>
51 #include <exception>
52 #include <stdexcept>
53 #include <set>
54 
55 ////////////////////////////////////////////////////////////////////////////////
56 /// create variable transformations
57 
58 namespace TMVA {
59 void CreateVariableTransforms(const TString& trafoDefinitionIn,
60  TMVA::DataSetInfo& dataInfo,
61  TMVA::TransformationHandler& transformationHandler,
63 {
64  TString trafoDefinition(trafoDefinitionIn);
65  if (trafoDefinition == "None") return; // no transformations
66 
67  // workaround for transformations to complicated to be handled by makeclass
68  // count number of transformations with incomplete set of variables
69  TString trafoDefinitionCheck(trafoDefinitionIn);
70  int npartial = 0, ntrafo = 0;
71  for (Int_t pos = 0, siz = trafoDefinition.Sizeof(); pos < siz; ++pos) {
72  TString ch = trafoDefinition(pos,1);
73  if ( ch == "(" ) npartial++;
74  if ( ch == "+" || ch == ",") ntrafo++;
75  }
76  if (npartial>1) {
77  log << kWARNING
78  << "The use of multiple partial variable transformations during the "
79  "application phase can be properly invoked via the \"Reader\", but "
80  "it is not yet implemented in \"MakeClass\", the creation mechanism "
81  "for standalone C++ application classes. The standalone C++ class "
82  "produced by this training job is thus INCOMPLETE AND MUST NOT BE USED! "
83  "The transformation in question is: " << trafoDefinitionIn << Endl;
84  // ToDo make info and do not write the standalone class
85  //
86  // this does not work since this function is static
87  // fDisableWriting=true; // disable creation of stand-alone class
88  // ToDo we need to tell the transformation that it cannot write itself
89  }
90  // workaround end
91 
92  Int_t parenthesisCount = 0;
93  for (Int_t position = 0, size = trafoDefinition.Sizeof(); position < size; ++position) {
94  TString ch = trafoDefinition(position,1);
95  if (ch == "(") ++parenthesisCount;
96  else if (ch == ")") --parenthesisCount;
97  else if (ch == "," && parenthesisCount == 0) trafoDefinition.Replace(position,1,'+');
98  }
99 
100  TList* trList = gTools().ParseFormatLine( trafoDefinition, "+" );
101  TListIter trIt(trList);
102  while (TObjString* os = (TObjString*)trIt()) {
103  TString tdef = os->GetString();
104  Int_t idxCls = -1;
105 
106  TString variables = "";
107  if (tdef.Contains("(")) { // contains selection of variables
108  Ssiz_t parStart = tdef.Index( "(" );
109  Ssiz_t parLen = tdef.Index( ")", parStart )-parStart+1;
110 
111  variables = tdef(parStart,parLen);
112  tdef.Remove(parStart,parLen);
113  variables.Remove(parLen-1,1);
114  variables.Remove(0,1);
115  }
116 
117  TList* trClsList = gTools().ParseFormatLine( tdef, "_" ); // split entry to get trf-name and class-name
118  TListIter trClsIt(trClsList);
119  if (trClsList->GetSize() < 1)
120  log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Incorrect transformation string provided." << Endl;
121  const TString& trName = ((TObjString*)trClsList->At(0))->GetString();
122 
123  if (trClsList->GetEntries() > 1) {
124  TString trCls = "AllClasses";
125  ClassInfo *ci = NULL;
126  trCls = ((TObjString*)trClsList->At(1))->GetString();
127  if (trCls != "AllClasses") {
128  ci = dataInfo.GetClassInfo( trCls );
129  if (ci == NULL)
130  log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Class " << trCls << " not known for variable transformation "
131  << trName << ", please check." << Endl;
132  else
133  idxCls = ci->GetNumber();
134  }
135  }
136 
137  VariableTransformBase* transformation = NULL;
138  if (trName == "I" || trName == "Ident" || trName == "Identity") {
139  if (variables.Length() == 0) variables = "_V_";
140  transformation = new VariableIdentityTransform(dataInfo);
141  }
142  else if (trName == "D" || trName == "Deco" || trName == "Decorrelate") {
143  if (variables.Length() == 0) variables = "_V_";
144  transformation = new VariableDecorrTransform(dataInfo);
145  }
146  else if (trName == "P" || trName == "PCA") {
147  if (variables.Length() == 0) variables = "_V_";
148  transformation = new VariablePCATransform(dataInfo);
149  }
150  else if (trName == "U" || trName == "Uniform") {
151  if (variables.Length() == 0) variables = "_V_,_T_";
152  transformation = new VariableGaussTransform(dataInfo, "Uniform" );
153  }
154  else if (trName == "G" || trName == "Gauss") {
155  if (variables.Length() == 0) variables = "_V_";
156  transformation = new VariableGaussTransform(dataInfo);
157  }
158  else if (trName == "N" || trName == "Norm" || trName == "Normalise" || trName == "Normalize") {
159  if (variables.Length() == 0) variables = "_V_,_T_";
160  transformation = new VariableNormalizeTransform(dataInfo);
161  }
162  else
163  log << kFATAL << Form("Dataset[%s] : ",dataInfo.GetName())
164  << "<ProcessOptions> Variable transform '"
165  << trName << "' unknown." << Endl;
166 
167 
168  if (transformation) {
169  ClassInfo* clsInfo = dataInfo.GetClassInfo(idxCls);
170  if (clsInfo)
171  log << kHEADER << Form("[%s] : ",dataInfo.GetName())
172  << "Create Transformation \"" << trName << "\" with reference class "
173  << clsInfo->GetName() << "=("<< idxCls <<")" << Endl << Endl;
174  else
175  log << kHEADER << Form("[%s] : ",dataInfo.GetName())
176  << "Create Transformation \"" << trName << "\" with events from all classes."
177  << Endl << Endl;
178 
179  transformation->SelectInput(variables);
180  transformationHandler.AddTransformation(transformation, idxCls);
181  }
182  }
183 }
184 
185 }
VariablePCATransform.h
TCollection::GetEntries
virtual Int_t GetEntries() const
Definition: TCollection.h:177
TMVA::ClassInfo
Class that contains all the information of a class.
Definition: ClassInfo.h:49
TMVA::VariableIdentityTransform
Linear interpolation class.
Definition: VariableIdentityTransform.h:45
DataSetInfo.h
Form
char * Form(const char *fmt,...)
TObjString.h
TMVA::VariableNormalizeTransform
Linear interpolation class.
Definition: VariableNormalizeTransform.h:48
TString::Replace
TString & Replace(Ssiz_t pos, Ssiz_t n, const char *s)
Definition: TString.h:682
Ranking.h
TMVA::TransformationHandler
Class that contains all the data information.
Definition: TransformationHandler.h:56
TMVA::VariableGaussTransform
Gaussian Transformation of input variables.
Definition: VariableGaussTransform.h:72
log
double log(double)
VariableDecorrTransform.h
VariableInfo.h
VariableIdentityTransform.h
TString::Contains
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:624
TList.h
TMVA::VariableTransformBase
Linear interpolation class.
Definition: VariableTransformBase.h:54
VariableTransformBase.h
TString
Basic string class.
Definition: TString.h:136
TListIter
Iterator of linked list.
Definition: TList.h:200
Version.h
TMVA::VariablePCATransform
Linear interpolation class.
Definition: VariablePCATransform.h:48
TObjString
Collectable string class.
Definition: TObjString.h:28
TList::At
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:357
TMVA::Tools::ParseFormatLine
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:413
TMVA::DataSetInfo
Class that contains all the data information.
Definition: DataSetInfo.h:62
MsgLogger.h
TransformationHandler.h
TMVA::variables
void variables(TString dataset, TString fin="TMVA.root", TString dirName="InputVariables_Id", TString title="TMVA Input Variables", Bool_t isRegression=kFALSE, Bool_t useTMVAStyle=kTRUE)
TString::Remove
TString & Remove(Ssiz_t pos)
Definition: TString.h:673
TMVA::DataSetInfo::GetClassInfo
ClassInfo * GetClassInfo(Int_t clNum) const
Definition: DataSetInfo.cxx:146
VariableGaussTransform.h
TMVA::VariableDecorrTransform
Linear interpolation class.
Definition: VariableDecorrTransform.h:49
VariableNormalizeTransform.h
TMVA::ClassInfo::GetNumber
UInt_t GetNumber() const
Definition: ClassInfo.h:65
Types.h
TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Config.h
TString::Index
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:639
TString::Sizeof
virtual Int_t Sizeof() const
Returns size string will occupy on I/O buffer.
Definition: TString.cxx:1334
TMVA::MsgLogger
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
TCollection::GetSize
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Definition: TCollection.h:182
TMVA::VariableTransformBase::SelectInput
virtual void SelectInput(const TString &inputVariables, Bool_t putIntoVariables=kFALSE)
select the variables/targets/spectators which serve as input to the transformation
Definition: VariableTransformBase.cxx:110
TMVA::DataSetInfo::GetName
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:71
TMVA::TransformationHandler::AddTransformation
VariableTransformBase * AddTransformation(VariableTransformBase *, Int_t cls)
Definition: TransformationHandler.cxx:105
Tools.h
TNamed::GetName
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
TMVA::gTools
Tools & gTools()
TList
A doubly linked list.
Definition: TList.h:44
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
int
TMVA::CreateVariableTransforms
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
Definition: VariableTransform.cxx:59
THashTable.h