Logo ROOT  
Reference Guide
VariableTransform.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Omar Zapata
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : VariableTransformBase *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * *
19 * Copyright (c) 2005: *
20 * CERN, Switzerland *
21 * MPI-K Heidelberg, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
32#include "TMVA/VariableInfo.h"
36
37#include "TMVA/Config.h"
38#include "TMVA/DataSetInfo.h"
39#include "TMVA/MsgLogger.h"
40#include "TMVA/Ranking.h"
41#include "TMVA/Tools.h"
42#include "TMVA/Types.h"
43#include "TMVA/VariableInfo.h"
44#include "TMVA/Version.h"
46#include "TMVA/MsgLogger.h"
47
48#include "TH1.h"
49#include "TH2.h"
50#include "THashTable.h"
51#include "TList.h"
52#include "TMath.h"
53#include "TProfile.h"
54#include "TVectorD.h"
55
56#include <algorithm>
57#include <cassert>
58#include <exception>
59#include <iomanip>
60#include <stdexcept>
61#include <set>
62
63////////////////////////////////////////////////////////////////////////////////
64/// create variable transformations
65
66namespace TMVA {
67void CreateVariableTransforms(const TString& trafoDefinitionIn,
68 TMVA::DataSetInfo& dataInfo,
69 TMVA::TransformationHandler& transformationHandler,
71{
72 TString trafoDefinition(trafoDefinitionIn);
73 if (trafoDefinition == "None") return; // no transformations
74
75 // workaround for transformations to complicated to be handled by makeclass
76 // count number of transformations with incomplete set of variables
77 TString trafoDefinitionCheck(trafoDefinitionIn);
78 int npartial = 0, ntrafo = 0;
79 for (Int_t pos = 0, siz = trafoDefinition.Sizeof(); pos < siz; ++pos) {
80 TString ch = trafoDefinition(pos,1);
81 if ( ch == "(" ) npartial++;
82 if ( ch == "+" || ch == ",") ntrafo++;
83 }
84 if (npartial>1) {
85 log << kWARNING
86 << "The use of multiple partial variable transformations during the "
87 "application phase can be properly invoked via the \"Reader\", but "
88 "it is not yet implemented in \"MakeClass\", the creation mechanism "
89 "for standalone C++ application classes. The standalone C++ class "
90 "produced by this training job is thus INCOMPLETE AND MUST NOT BE USED! "
91 "The transformation in question is: " << trafoDefinitionIn << Endl;
92 // ToDo make info and do not write the standalone class
93 //
94 // this does not work since this function is static
95 // fDisableWriting=true; // disable creation of stand-alone class
96 // ToDo we need to tell the transformation that it cannot write itself
97 }
98 // workaround end
99
100 Int_t parenthesisCount = 0;
101 for (Int_t position = 0, size = trafoDefinition.Sizeof(); position < size; ++position) {
102 TString ch = trafoDefinition(position,1);
103 if (ch == "(") ++parenthesisCount;
104 else if (ch == ")") --parenthesisCount;
105 else if (ch == "," && parenthesisCount == 0) trafoDefinition.Replace(position,1,'+');
106 }
107
108 TList* trList = gTools().ParseFormatLine( trafoDefinition, "+" );
109 TListIter trIt(trList);
110 while (TObjString* os = (TObjString*)trIt()) {
111 TString tdef = os->GetString();
112 Int_t idxCls = -1;
113
114 TString variables = "";
115 if (tdef.Contains("(")) { // contains selection of variables
116 Ssiz_t parStart = tdef.Index( "(" );
117 Ssiz_t parLen = tdef.Index( ")", parStart )-parStart+1;
118
119 variables = tdef(parStart,parLen);
120 tdef.Remove(parStart,parLen);
121 variables.Remove(parLen-1,1);
122 variables.Remove(0,1);
123 }
124
125 TList* trClsList = gTools().ParseFormatLine( tdef, "_" ); // split entry to get trf-name and class-name
126 TListIter trClsIt(trClsList);
127 if (trClsList->GetSize() < 1)
128 log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Incorrect transformation string provided." << Endl;
129 const TString& trName = ((TObjString*)trClsList->At(0))->GetString();
130
131 if (trClsList->GetEntries() > 1) {
132 TString trCls = "AllClasses";
133 ClassInfo *ci = NULL;
134 trCls = ((TObjString*)trClsList->At(1))->GetString();
135 if (trCls != "AllClasses") {
136 ci = dataInfo.GetClassInfo( trCls );
137 if (ci == NULL)
138 log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Class " << trCls << " not known for variable transformation "
139 << trName << ", please check." << Endl;
140 else
141 idxCls = ci->GetNumber();
142 }
143 }
144
145 VariableTransformBase* transformation = NULL;
146 if (trName == "I" || trName == "Ident" || trName == "Identity") {
147 if (variables.Length() == 0) variables = "_V_";
148 transformation = new VariableIdentityTransform(dataInfo);
149 }
150 else if (trName == "D" || trName == "Deco" || trName == "Decorrelate") {
151 if (variables.Length() == 0) variables = "_V_";
152 transformation = new VariableDecorrTransform(dataInfo);
153 }
154 else if (trName == "P" || trName == "PCA") {
155 if (variables.Length() == 0) variables = "_V_";
156 transformation = new VariablePCATransform(dataInfo);
157 }
158 else if (trName == "U" || trName == "Uniform") {
159 if (variables.Length() == 0) variables = "_V_,_T_";
160 transformation = new VariableGaussTransform(dataInfo, "Uniform" );
161 }
162 else if (trName == "G" || trName == "Gauss") {
163 if (variables.Length() == 0) variables = "_V_";
164 transformation = new VariableGaussTransform(dataInfo);
165 }
166 else if (trName == "N" || trName == "Norm" || trName == "Normalise" || trName == "Normalize") {
167 if (variables.Length() == 0) variables = "_V_,_T_";
168 transformation = new VariableNormalizeTransform(dataInfo);
169 }
170 else
171 log << kFATAL << Form("Dataset[%s] : ",dataInfo.GetName())
172 << "<ProcessOptions> Variable transform '"
173 << trName << "' unknown." << Endl;
174
175
176 if (transformation) {
177 ClassInfo* clsInfo = dataInfo.GetClassInfo(idxCls);
178 if (clsInfo)
179 log << kHEADER << Form("[%s] : ",dataInfo.GetName())
180 << "Create Transformation \"" << trName << "\" with reference class "
181 << clsInfo->GetName() << "=("<< idxCls <<")" << Endl << Endl;
182 else
183 log << kHEADER << Form("[%s] : ",dataInfo.GetName())
184 << "Create Transformation \"" << trName << "\" with events from all classes."
185 << Endl << Endl;
186
187 transformation->SelectInput(variables);
188 transformationHandler.AddTransformation(transformation, idxCls);
189 }
190 }
191}
192
193}
int Int_t
Definition: RtypesCore.h:41
int Ssiz_t
Definition: RtypesCore.h:63
double log(double)
char * Form(const char *fmt,...)
virtual Int_t GetEntries() const
Definition: TCollection.h:177
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Definition: TCollection.h:182
Iterator of linked list.
Definition: TList.h:200
A doubly linked list.
Definition: TList.h:44
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:354
Class that contains all the information of a class.
Definition: ClassInfo.h:49
UInt_t GetNumber() const
Definition: ClassInfo.h:65
Class that contains all the data information.
Definition: DataSetInfo.h:60
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:69
ClassInfo * GetClassInfo(Int_t clNum) const
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:413
Class that contains all the data information.
VariableTransformBase * AddTransformation(VariableTransformBase *, Int_t cls)
Linear interpolation class.
Gaussian Transformation of input variables.
Linear interpolation class.
Linear interpolation class.
virtual void SelectInput(const TString &inputVariables, Bool_t putIntoVariables=kFALSE)
select the variables/targets/spectators which serve as input to the transformation
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Collectable string class.
Definition: TObjString.h:28
Basic string class.
Definition: TString.h:131
TString & Replace(Ssiz_t pos, Ssiz_t n, const char *s)
Definition: TString.h:677
TString & Remove(Ssiz_t pos)
Definition: TString.h:668
virtual Int_t Sizeof() const
Returns size string will occupy on I/O buffer.
Definition: TString.cxx:1334
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:634
create variable transformations
Tools & gTools()
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
void variables(TString dataset, TString fin="TMVA.root", TString dirName="InputVariables_Id", TString title="TMVA Input Variables", Bool_t isRegression=kFALSE, Bool_t useTMVAStyle=kTRUE)
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158