Logo ROOT  
Reference Guide
VariableTransform.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Omar Zapata
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : VariableTransformBase *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * *
19 * Copyright (c) 2005: *
20 * CERN, Switzerland *
21 * MPI-K Heidelberg, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
31#include "TMVA/VariableInfo.h"
35
36#include "TMVA/Config.h"
37#include "TMVA/DataSetInfo.h"
38#include "TMVA/MsgLogger.h"
39#include "TMVA/Ranking.h"
40#include "TMVA/Tools.h"
41#include "TMVA/Types.h"
42#include "TMVA/Version.h"
44
45#include "TH1.h"
46#include "TH2.h"
47#include "THashTable.h"
48#include "TList.h"
49#include "TMath.h"
50#include "TProfile.h"
51#include "TVectorD.h"
52#include "TObjString.h"
53
54#include <algorithm>
55#include <cassert>
56#include <exception>
57#include <iomanip>
58#include <stdexcept>
59#include <set>
60
61////////////////////////////////////////////////////////////////////////////////
62/// create variable transformations
63
64namespace TMVA {
65void CreateVariableTransforms(const TString& trafoDefinitionIn,
66 TMVA::DataSetInfo& dataInfo,
67 TMVA::TransformationHandler& transformationHandler,
69{
70 TString trafoDefinition(trafoDefinitionIn);
71 if (trafoDefinition == "None") return; // no transformations
72
73 // workaround for transformations to complicated to be handled by makeclass
74 // count number of transformations with incomplete set of variables
75 TString trafoDefinitionCheck(trafoDefinitionIn);
76 int npartial = 0, ntrafo = 0;
77 for (Int_t pos = 0, siz = trafoDefinition.Sizeof(); pos < siz; ++pos) {
78 TString ch = trafoDefinition(pos,1);
79 if ( ch == "(" ) npartial++;
80 if ( ch == "+" || ch == ",") ntrafo++;
81 }
82 if (npartial>1) {
83 log << kWARNING
84 << "The use of multiple partial variable transformations during the "
85 "application phase can be properly invoked via the \"Reader\", but "
86 "it is not yet implemented in \"MakeClass\", the creation mechanism "
87 "for standalone C++ application classes. The standalone C++ class "
88 "produced by this training job is thus INCOMPLETE AND MUST NOT BE USED! "
89 "The transformation in question is: " << trafoDefinitionIn << Endl;
90 // ToDo make info and do not write the standalone class
91 //
92 // this does not work since this function is static
93 // fDisableWriting=true; // disable creation of stand-alone class
94 // ToDo we need to tell the transformation that it cannot write itself
95 }
96 // workaround end
97
98 Int_t parenthesisCount = 0;
99 for (Int_t position = 0, size = trafoDefinition.Sizeof(); position < size; ++position) {
100 TString ch = trafoDefinition(position,1);
101 if (ch == "(") ++parenthesisCount;
102 else if (ch == ")") --parenthesisCount;
103 else if (ch == "," && parenthesisCount == 0) trafoDefinition.Replace(position,1,'+');
104 }
105
106 TList* trList = gTools().ParseFormatLine( trafoDefinition, "+" );
107 TListIter trIt(trList);
108 while (TObjString* os = (TObjString*)trIt()) {
109 TString tdef = os->GetString();
110 Int_t idxCls = -1;
111
112 TString variables = "";
113 if (tdef.Contains("(")) { // contains selection of variables
114 Ssiz_t parStart = tdef.Index( "(" );
115 Ssiz_t parLen = tdef.Index( ")", parStart )-parStart+1;
116
117 variables = tdef(parStart,parLen);
118 tdef.Remove(parStart,parLen);
119 variables.Remove(parLen-1,1);
120 variables.Remove(0,1);
121 }
122
123 TList* trClsList = gTools().ParseFormatLine( tdef, "_" ); // split entry to get trf-name and class-name
124 TListIter trClsIt(trClsList);
125 if (trClsList->GetSize() < 1)
126 log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Incorrect transformation string provided." << Endl;
127 const TString& trName = ((TObjString*)trClsList->At(0))->GetString();
128
129 if (trClsList->GetEntries() > 1) {
130 TString trCls = "AllClasses";
131 ClassInfo *ci = NULL;
132 trCls = ((TObjString*)trClsList->At(1))->GetString();
133 if (trCls != "AllClasses") {
134 ci = dataInfo.GetClassInfo( trCls );
135 if (ci == NULL)
136 log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Class " << trCls << " not known for variable transformation "
137 << trName << ", please check." << Endl;
138 else
139 idxCls = ci->GetNumber();
140 }
141 }
142
143 VariableTransformBase* transformation = NULL;
144 if (trName == "I" || trName == "Ident" || trName == "Identity") {
145 if (variables.Length() == 0) variables = "_V_";
146 transformation = new VariableIdentityTransform(dataInfo);
147 }
148 else if (trName == "D" || trName == "Deco" || trName == "Decorrelate") {
149 if (variables.Length() == 0) variables = "_V_";
150 transformation = new VariableDecorrTransform(dataInfo);
151 }
152 else if (trName == "P" || trName == "PCA") {
153 if (variables.Length() == 0) variables = "_V_";
154 transformation = new VariablePCATransform(dataInfo);
155 }
156 else if (trName == "U" || trName == "Uniform") {
157 if (variables.Length() == 0) variables = "_V_,_T_";
158 transformation = new VariableGaussTransform(dataInfo, "Uniform" );
159 }
160 else if (trName == "G" || trName == "Gauss") {
161 if (variables.Length() == 0) variables = "_V_";
162 transformation = new VariableGaussTransform(dataInfo);
163 }
164 else if (trName == "N" || trName == "Norm" || trName == "Normalise" || trName == "Normalize") {
165 if (variables.Length() == 0) variables = "_V_,_T_";
166 transformation = new VariableNormalizeTransform(dataInfo);
167 }
168 else
169 log << kFATAL << Form("Dataset[%s] : ",dataInfo.GetName())
170 << "<ProcessOptions> Variable transform '"
171 << trName << "' unknown." << Endl;
172
173
174 if (transformation) {
175 ClassInfo* clsInfo = dataInfo.GetClassInfo(idxCls);
176 if (clsInfo)
177 log << kHEADER << Form("[%s] : ",dataInfo.GetName())
178 << "Create Transformation \"" << trName << "\" with reference class "
179 << clsInfo->GetName() << "=("<< idxCls <<")" << Endl << Endl;
180 else
181 log << kHEADER << Form("[%s] : ",dataInfo.GetName())
182 << "Create Transformation \"" << trName << "\" with events from all classes."
183 << Endl << Endl;
184
185 transformation->SelectInput(variables);
186 transformationHandler.AddTransformation(transformation, idxCls);
187 }
188 }
189}
190
191}
double log(double)
char * Form(const char *fmt,...)
virtual Int_t GetEntries() const
Definition: TCollection.h:177
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Definition: TCollection.h:182
Iterator of linked list.
Definition: TList.h:200
A doubly linked list.
Definition: TList.h:44
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:356
Class that contains all the information of a class.
Definition: ClassInfo.h:49
UInt_t GetNumber() const
Definition: ClassInfo.h:65
Class that contains all the data information.
Definition: DataSetInfo.h:60
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:69
ClassInfo * GetClassInfo(Int_t clNum) const
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:412
Class that contains all the data information.
VariableTransformBase * AddTransformation(VariableTransformBase *, Int_t cls)
Linear interpolation class.
Gaussian Transformation of input variables.
Linear interpolation class.
Linear interpolation class.
virtual void SelectInput(const TString &inputVariables, Bool_t putIntoVariables=kFALSE)
select the variables/targets/spectators which serve as input to the transformation
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Collectable string class.
Definition: TObjString.h:28
Basic string class.
Definition: TString.h:131
TString & Replace(Ssiz_t pos, Ssiz_t n, const char *s)
Definition: TString.h:677
TString & Remove(Ssiz_t pos)
Definition: TString.h:668
virtual Int_t Sizeof() const
Returns size string will occupy on I/O buffer.
Definition: TString.cxx:1334
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:634
create variable transformations
Tools & gTools()
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
void variables(TString dataset, TString fin="TMVA.root", TString dirName="InputVariables_Id", TString title="TMVA Input Variables", Bool_t isRegression=kFALSE, Bool_t useTMVAStyle=kTRUE)
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158