Logo ROOT  
Reference Guide
VariableGaussTransform.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Eckhard v. Toerne
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : VariableGaussTransform *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17 * Eckhard v. Toerne <evt@uni-bonn.de> - Uni Bonn, Germany *
18 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19 * *
20 * Copyright (c) 2005-2011: *
21 * CERN, Switzerland *
22 * MPI-K Heidelberg, Germany *
23 * U. of Bonn, Germany *
24 * *
25 * Redistribution and use in source and binary forms, with or without *
26 * modification, are permitted according to the terms listed in LICENSE *
27 * (http://tmva.sourceforge.net/LICENSE) *
28 **********************************************************************************/
29
30/*! \class TMVA::VariableGaussTransform
31\ingroup TMVA
32Gaussian Transformation of input variables.
33*/
34
36
37#include "TMVA/DataSetInfo.h"
38#include "TMVA/MsgLogger.h"
39#include "TMVA/PDF.h"
40#include "TMVA/Tools.h"
41#include "TMVA/Types.h"
42#include "TMVA/Version.h"
43
44#include "TCanvas.h"
45#include "TGraph.h"
46#include "TH1F.h"
47#include "TMath.h"
48#include "TVectorF.h"
49#include "TVectorD.h"
50
51#include <exception>
52#include <iostream>
53#include <iomanip>
54#include <list>
55#include <limits>
56#include <stdexcept>
57
59
60////////////////////////////////////////////////////////////////////////////////
61/// constructor
62/// can only be applied one after the other when they are created. But in order to
63/// determine the Gauss transformation
64
66: VariableTransformBase( dsi, Types::kGauss, "Gauss" ),
67 fFlatNotGauss(kFALSE),
68 fPdfMinSmooth(0),
69 fPdfMaxSmooth(0),
70 fElementsperbin(0)
71{
72 if (strcor=="Uniform") {fFlatNotGauss = kTRUE;
73 SetName("Uniform");
74 }
75}
76
77////////////////////////////////////////////////////////////////////////////////
78/// destructor
79
81{
82 CleanUpCumulativeArrays();
83}
84
85////////////////////////////////////////////////////////////////////////////////
86
88{
89}
90
91////////////////////////////////////////////////////////////////////////////////
92/// calculate the cumulative distributions
93
95{
96 Initialize();
97
98 if (!IsEnabled() || IsCreated()) return kTRUE;
99
100 Log() << kINFO << "Preparing the Gaussian transformation..." << Endl;
101
102 UInt_t inputSize = fGet.size();
103 SetNVariables(inputSize);
104
105 if (inputSize > 200) {
106 Log() << kWARNING << "----------------------------------------------------------------------------"
107 << Endl;
108 Log() << kWARNING
109 << ": More than 200 variables, I hope you have enough memory!!!!" << Endl;
110 Log() << kWARNING << "----------------------------------------------------------------------------"
111 << Endl;
112 // return kFALSE;
113 }
114
115 GetCumulativeDist( events );
116
117 SetCreated( kTRUE );
118
119 return kTRUE;
120}
121
122////////////////////////////////////////////////////////////////////////////////
123/// apply the Gauss transformation
124
126{
127 if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
128 //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
129 //EVT if (cls <0 || cls > GetNClasses() ) {
130 //EVT cls = GetNClasses();
131 //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
132 //EVT}
133 if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
134 //EVT workaround end
135
136 // get the variable vector of the current event
137 UInt_t inputSize = fGet.size();
138
139 std::vector<Float_t> input(0);
140 std::vector<Float_t> output(0);
141
142 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
143 GetInput( ev, input, mask );
144
145 std::vector<Char_t>::iterator itMask = mask.begin();
146
147 // TVectorD vec( inputSize );
148 // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
149 Double_t cumulant;
150 //transformation
151 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
152
153 if ( (*itMask) ){
154 ++itMask;
155 continue;
156 }
157
158 if (0 != fCumulativePDF[ivar][cls]) {
159 // first make it flat
160 if(fTMVAVersion>TMVA_VERSION(3,9,7))
161 cumulant = (fCumulativePDF[ivar][cls])->GetVal(input.at(ivar));
162 else
163 cumulant = OldCumulant(input.at(ivar), fCumulativePDF[ivar][cls]->GetOriginalHist() );
164 cumulant = TMath::Min(cumulant,1.-10e-10);
165 cumulant = TMath::Max(cumulant,0.+10e-10);
166
167 if (fFlatNotGauss)
168 output.push_back( cumulant );
169 else {
170 // sanity correction for out-of-range values
171 Double_t maxErfInvArgRange = 0.99999999;
172 Double_t arg = 2.0*cumulant - 1.0;
173 arg = TMath::Min(+maxErfInvArgRange,arg);
174 arg = TMath::Max(-maxErfInvArgRange,arg);
175
176 output.push_back( 1.414213562*TMath::ErfInverse(arg) );
177 }
178 }
179 }
180
181 if (fTransformedEvent==0 || fTransformedEvent->GetNVariables()!=ev->GetNVariables()) {
182 if (fTransformedEvent!=0) { delete fTransformedEvent; fTransformedEvent = 0; }
183 fTransformedEvent = new Event();
184 }
185
186 SetOutput( fTransformedEvent, output, mask, ev );
187
188 return fTransformedEvent;
189}
190
191////////////////////////////////////////////////////////////////////////////////
192/// apply the inverse Gauss or inverse uniform transformation
193
195{
196 if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
197 //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
198 //EVT if (cls <0 || cls > GetNClasses() ) {
199 //EVT cls = GetNClasses();
200 //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
201 //EVT}
202 if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
203 //EVT workaround end
204
205 // get the variable vector of the current event
206 UInt_t inputSize = fGet.size();
207
208 std::vector<Float_t> input(0);
209 std::vector<Float_t> output(0);
210
211 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
212 GetInput( ev, input, mask, kTRUE );
213
214 std::vector<Char_t>::iterator itMask = mask.begin();
215
216 // TVectorD vec( inputSize );
217 // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
218 Double_t invCumulant;
219 //transformation
220 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
221
222 if ( (*itMask) ){
223 ++itMask;
224 continue;
225 }
226
227 if (0 != fCumulativePDF[ivar][cls]) {
228 invCumulant = input.at(ivar);
229
230 // first de-gauss ist if gaussianized
231 if (!fFlatNotGauss)
232 invCumulant = (TMath::Erf(invCumulant/1.414213562)+1)/2.f;
233
234 // then de-uniform the values
235 if(fTMVAVersion>TMVA_VERSION(4,0,0))
236 invCumulant = (fCumulativePDF[ivar][cls])->GetValInverse(invCumulant,kTRUE);
237 else
238 Log() << kFATAL << "Inverse Uniform/Gauss transformation not implemented for TMVA versions before 4.1.0" << Endl;
239
240 output.push_back(invCumulant);
241 }
242 }
243
244 if (fBackTransformedEvent==0) fBackTransformedEvent = new Event( *ev );
245
246 SetOutput( fBackTransformedEvent, output, mask, ev, kTRUE );
247
248 return fBackTransformedEvent;
249}
250
251////////////////////////////////////////////////////////////////////////////////
252/// fill the cumulative distributions
253
254void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector< Event*>& events )
255{
256 const UInt_t inputSize = fGet.size();
257 // const UInt_t nCls = GetNClasses();
258
259 // const UInt_t nvar = GetNVariables();
260 UInt_t nevt = events.size();
261
262 const UInt_t nClasses = GetNClasses();
263 UInt_t numDist = nClasses+1; // calculate cumulative distributions for all "event" classes separately + one where all classes are treated (added) together
264
265 if (GetNClasses() == 1 ) numDist = nClasses; // for regression, if there is only one class, there is no "sum" of classes, hence
266
267 UInt_t **nbins = new UInt_t*[numDist];
268
269 std::list< TMVA::TMVAGaussPair > **listsForBinning = new std::list<TMVA::TMVAGaussPair>* [numDist];
270 std::vector< Float_t > **vsForBinning = new std::vector<Float_t>* [numDist];
271 for (UInt_t i=0; i < numDist; i++) {
272 listsForBinning[i] = new std::list<TMVA::TMVAGaussPair> [inputSize];
273 vsForBinning[i] = new std::vector<Float_t> [inputSize];
274 nbins[i] = new UInt_t[inputSize]; // nbins[0] = number of bins for signal distributions. It depends on the number of entries, thus it's the same for all the input variables, but it isn't necessary for some "weird" reason.
275 }
276
277 std::vector<Float_t> input;
278 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
279
280 // perform event loop
281 Float_t *sumOfWeights = new Float_t[numDist];
282 Float_t *minWeight = new Float_t[numDist];
283 Float_t *maxWeight = new Float_t[numDist];
284 for (UInt_t i=0; i<numDist; i++) {
285 sumOfWeights[i]=0;
286 minWeight[i]=10E10; // TODO: change this to std::max ?
287 maxWeight[i]=0; // QUESTION: wouldn't there be negative events possible?
288 }
289 for (UInt_t ievt=0; ievt < nevt; ievt++) {
290 const Event* ev= events[ievt];
291 Int_t cls = ev->GetClass();
292 Float_t eventWeight = ev->GetWeight();
293 sumOfWeights[cls] += eventWeight;
294 if (minWeight[cls] > eventWeight) minWeight[cls]=eventWeight;
295 if (maxWeight[cls] < eventWeight) maxWeight[cls]=eventWeight;
296 if (numDist>1) sumOfWeights[numDist-1] += eventWeight;
297
298 Bool_t hasMaskedEntries = GetInput( ev, input, mask );
299 if( hasMaskedEntries ){
300 Log() << kWARNING << "Incomplete event" << Endl;
301 std::ostringstream oss;
302 ev->Print(oss);
303 Log() << oss.str();
304 Log() << kFATAL << "Targets or variables masked by transformation. Apparently (a) value(s) is/are missing in this event." << Endl;
305 }
306
307
308 Int_t ivar = 0;
309 for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
310 Float_t value = (*itInput);
311 listsForBinning[cls][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
312 if (numDist>1)listsForBinning[numDist-1][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
313 ++ivar;
314 }
315 }
316 if (numDist > 1) {
317 for (UInt_t icl=0; icl<numDist-1; icl++){
318 minWeight[numDist-1] = TMath::Min(minWeight[icl],minWeight[numDist-1]);
319 maxWeight[numDist-1] = TMath::Max(maxWeight[icl],maxWeight[numDist-1]);
320 }
321 }
322
323 // Sorting the lists, getting nbins ...
324 const UInt_t nevmin=10; // minimum number of events per bin (to make sure we get reasonable distributions)
325 const UInt_t nbinsmax=2000; // maximum number of bins
326
327 for (UInt_t icl=0; icl< numDist; icl++){
328 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
329 listsForBinning[icl][ivar].sort();
330 std::list< TMVA::TMVAGaussPair >::iterator it;
331 Float_t sumPerBin = sumOfWeights[icl]/nbinsmax;
332 sumPerBin=TMath::Max(minWeight[icl]*nevmin,sumPerBin);
333 Float_t sum=0;
334 Float_t ev_value=listsForBinning[icl][ivar].begin()->GetValue();
335 Float_t lastev_value=ev_value;
336 const Float_t eps = 1.e-4;
337 vsForBinning[icl][ivar].push_back(ev_value-eps);
338 vsForBinning[icl][ivar].push_back(ev_value);
339
340 for (it=listsForBinning[icl][ivar].begin(); it != listsForBinning[icl][ivar].end(); ++it){
341 sum+= it->GetWeight();
342 if (sum >= sumPerBin) {
343 ev_value=it->GetValue();
344 if (ev_value>lastev_value) { // protection against bin width of 0
345 vsForBinning[icl][ivar].push_back(ev_value);
346 sum = 0.;
347 lastev_value=ev_value;
348 }
349 }
350 }
351 if (sum!=0) vsForBinning[icl][ivar].push_back(listsForBinning[icl][ivar].back().GetValue());
352 nbins[icl][ivar] = vsForBinning[icl][ivar].size();
353 }
354 }
355
356 delete[] sumOfWeights;
357 delete[] minWeight;
358 delete[] maxWeight;
359
360 // create histogram for the cumulative distribution.
361 fCumulativeDist.resize(inputSize);
362 for (UInt_t icls = 0; icls < numDist; icls++) {
363 for (UInt_t ivar=0; ivar < inputSize; ivar++){
364 Float_t* binnings = new Float_t[nbins[icls][ivar]];
365 //the binning for this particular histogram:
366 for (UInt_t k =0 ; k < nbins[icls][ivar]; k++){
367 binnings[k] = vsForBinning[icls][ivar][k];
368 }
369 fCumulativeDist[ivar].resize(numDist);
370 if (0 != fCumulativeDist[ivar][icls] ) {
371 delete fCumulativeDist[ivar][icls];
372 }
373 fCumulativeDist[ivar][icls] = new TH1F(Form("Cumulative_Var%d_cls%d",ivar,icls),
374 Form("Cumulative_Var%d_cls%d",ivar,icls),
375 nbins[icls][ivar] -1, // class icls
376 binnings);
377 fCumulativeDist[ivar][icls]->SetDirectory(0);
378 delete [] binnings;
379 }
380 }
381
382 // Deallocation
383 for (UInt_t i=0; i<numDist; i++) {
384 delete [] listsForBinning[numDist-i-1];
385 delete [] vsForBinning[numDist-i-1];
386 delete [] nbins[numDist-i-1];
387 }
388 delete [] listsForBinning;
389 delete [] vsForBinning;
390 delete [] nbins;
391
392 // perform event loop
393 std::vector<Int_t> ic(numDist);
394 for (UInt_t ievt=0; ievt<nevt; ievt++) {
395
396 const Event* ev= events[ievt];
397 Int_t cls = ev->GetClass();
398 Float_t eventWeight = ev->GetWeight();
399
400 GetInput( ev, input, mask );
401
402 Int_t ivar = 0;
403 for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
404 Float_t value = (*itInput);
405 fCumulativeDist[ivar][cls]->Fill(value,eventWeight);
406 if (numDist>1) fCumulativeDist[ivar][numDist-1]->Fill(value,eventWeight);
407
408 ++ivar;
409 }
410 }
411
412 // clean up
413 CleanUpCumulativeArrays("PDF");
414
415 // now sum up in order to get the real cumulative distribution
416 Double_t sum = 0, total=0;
417 fCumulativePDF.resize(inputSize);
418 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
419 // fCumulativePDF.resize(ivar+1);
420 for (UInt_t icls=0; icls<numDist; icls++) {
421 (fCumulativeDist[ivar][icls])->Smooth();
422 sum = 0;
423 total = 0.;
424 for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
425 Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
426 if (val>0) total += val;
427 }
428 for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
429 Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
430 if (val>0) sum += val;
431 (fCumulativeDist[ivar][icls])->SetBinContent(ibin,sum/total);
432 }
433 // create PDf
434 fCumulativePDF[ivar].push_back(new PDF( Form("GaussTransform var%d cls%d",ivar,icls), fCumulativeDist[ivar][icls], PDF::kSpline1, fPdfMinSmooth, fPdfMaxSmooth,kFALSE,kFALSE));
435 }
436 }
437}
438
439////////////////////////////////////////////////////////////////////////////////
440
442{
443 Log() << kFATAL << "VariableGaussTransform::WriteTransformationToStream is obsolete" << Endl;
444}
445
446////////////////////////////////////////////////////////////////////////////////
447/// clean up of cumulative arrays
448
450 if (opt == "ALL" || opt == "PDF"){
451 for (UInt_t ivar=0; ivar<fCumulativePDF.size(); ivar++) {
452 for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++) {
453 if (0 != fCumulativePDF[ivar][icls]) delete fCumulativePDF[ivar][icls];
454 }
455 }
456 fCumulativePDF.clear();
457 }
458 if (opt == "ALL" || opt == "Dist"){
459 for (UInt_t ivar=0; ivar<fCumulativeDist.size(); ivar++) {
460 for (UInt_t icls=0; icls<fCumulativeDist[ivar].size(); icls++) {
461 if (0 != fCumulativeDist[ivar][icls]) delete fCumulativeDist[ivar][icls];
462 }
463 }
464 fCumulativeDist.clear();
465 }
466}
467////////////////////////////////////////////////////////////////////////////////
468/// create XML description of Gauss transformation
469
471 void* trfxml = gTools().AddChild(parent, "Transform");
472 gTools().AddAttr(trfxml, "Name", "Gauss");
473 gTools().AddAttr(trfxml, "FlatOrGauss", (fFlatNotGauss?"Flat":"Gauss") );
474
476
477 UInt_t nvar = fGet.size();
478 for (UInt_t ivar=0; ivar<nvar; ivar++) {
479 void* varxml = gTools().AddChild( trfxml, "Variable");
480 // gTools().AddAttr( varxml, "Name", Variables()[ivar].GetLabel() );
481 gTools().AddAttr( varxml, "VarIndex", ivar );
482
483 if ( fCumulativePDF[ivar][0]==0 ||
484 (fCumulativePDF[ivar].size()>1 && fCumulativePDF[ivar][1]==0 ))
485 Log() << kFATAL << "Cumulative histograms for variable " << ivar << " don't exist, can't write it to weight file" << Endl;
486
487 for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++){
488 void* pdfxml = gTools().AddChild( varxml, Form("CumulativePDF_cls%d",icls));
489 (fCumulativePDF[ivar][icls])->AddXMLTo(pdfxml);
490 }
491 }
492}
493
494////////////////////////////////////////////////////////////////////////////////
495/// Read the transformation matrices from the xml node
496
498 // clean up first
499 CleanUpCumulativeArrays();
500 TString FlatOrGauss;
501
502 gTools().ReadAttr(trfnode, "FlatOrGauss", FlatOrGauss );
503
504 if (FlatOrGauss == "Flat") fFlatNotGauss = kTRUE;
505 else fFlatNotGauss = kFALSE;
506
507 Bool_t newFormat = kFALSE;
508
509 void* inpnode = NULL;
510
511 inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format
512 if( inpnode!=NULL )
513 newFormat = kTRUE; // new xml format
514
515 void* varnode = NULL;
516 if( newFormat ){
517 // ------------- new format --------------------
518 // read input
520
521 varnode = gTools().GetNextChild(inpnode);
522 }else
523 varnode = gTools().GetChild(trfnode);
524
525 // Read the cumulative distribution
526
527 TString varname, histname, classname;
528 UInt_t ivar;
529 while(varnode) {
530 if( gTools().HasAttr(varnode,"Name") )
531 gTools().ReadAttr(varnode, "Name", varname);
532 gTools().ReadAttr(varnode, "VarIndex", ivar);
533
534 void* clsnode = gTools().GetChild( varnode);
535
536 while(clsnode) {
537 void* pdfnode = gTools().GetChild( clsnode);
538 PDF* pdfToRead = new PDF(TString("tempName"),kFALSE);
539 pdfToRead->ReadXML(pdfnode); // pdfnode
540 // push_back PDF
541 fCumulativePDF.resize( ivar+1 );
542 fCumulativePDF[ivar].push_back(pdfToRead);
543 clsnode = gTools().GetNextChild(clsnode);
544 }
545
546 varnode = gTools().GetNextChild(varnode);
547 }
548 SetCreated();
549}
550
551////////////////////////////////////////////////////////////////////////////////
552/// Read the cumulative distribution
553
555{
556 Bool_t addDirStatus = TH1::AddDirectoryStatus();
557 TH1::AddDirectory(0); // this avoids the binding of the hists in TMVA::PDF to the current ROOT file
558 char buf[512];
559 istr.getline(buf,512);
560
561 TString strvar, dummy;
562
563 while (!(buf[0]=='#'&& buf[1]=='#')) { // if line starts with ## return
564 char* p = buf;
565 while (*p==' ' || *p=='\t') p++; // 'remove' leading whitespace
566 if (*p=='#' || *p=='\0') {
567 istr.getline(buf,512);
568 continue; // if comment or empty line, read the next line
569 }
570 std::stringstream sstr(buf);
571 sstr >> strvar;
572
573 if (strvar=="CumulativeHistogram") {
574 UInt_t type(0), ivar(0);
575 TString devnullS(""),hname("");
576 Int_t nbins(0);
577
578 // coverity[tainted_data_argument]
579 sstr >> type >> ivar >> hname >> nbins >> fElementsperbin;
580
581 Float_t *Binnings = new Float_t[nbins+1];
582 Float_t val;
583 istr >> devnullS; // read the line "BinBoundaries" ..
584 for (Int_t ibin=0; ibin<nbins+1; ibin++) {
585 istr >> val;
586 Binnings[ibin]=val;
587 }
588
589 if(ivar>=fCumulativeDist.size()) fCumulativeDist.resize(ivar+1);
590 if(type>=fCumulativeDist[ivar].size()) fCumulativeDist[ivar].resize(type+1);
591
592 TH1F * histToRead = fCumulativeDist[ivar][type];
593 if ( histToRead !=0 ) delete histToRead;
594 // recreate the cumulative histogram to be filled with the values read
595 histToRead = new TH1F( hname, hname, nbins, Binnings );
596 histToRead->SetDirectory(0);
597 fCumulativeDist[ivar][type]=histToRead;
598
599 istr >> devnullS; // read the line "BinContent" ..
600 for (Int_t ibin=0; ibin<nbins; ibin++) {
601 istr >> val;
602 histToRead->SetBinContent(ibin+1,val);
603 }
604
605 PDF* pdf = new PDF(hname,histToRead,PDF::kSpline0, 0, 0, kFALSE, kFALSE);
606 // push_back PDF
607 fCumulativePDF.resize(ivar+1);
608 fCumulativePDF[ivar].resize(type+1);
609 fCumulativePDF[ivar][type] = pdf;
610 delete [] Binnings;
611 }
612
613 // if (strvar=="TransformToFlatInsetadOfGauss=") { // don't correct this spelling mistake
614 if (strvar=="Uniform") { // don't correct this spelling mistake
615 sstr >> fFlatNotGauss;
616 istr.getline(buf,512);
617 break;
618 }
619
620 istr.getline(buf,512); // reading the next line
621 }
622 TH1::AddDirectory(addDirStatus);
623
624 UInt_t classIdx=(classname=="signal")?0:1;
625 for(UInt_t ivar=0; ivar<fCumulativePDF.size(); ++ivar) {
626 PDF* src = fCumulativePDF[ivar][classIdx];
627 fCumulativePDF[ivar].push_back(new PDF(src->GetName(),fCumulativeDist[ivar][classIdx],PDF::kSpline0, 0, 0, kFALSE, kFALSE) );
628 }
629
630 SetTMVAVersion(TMVA_VERSION(3,9,7));
631
632 SetCreated();
633}
634
635////////////////////////////////////////////////////////////////////////////////
636
638
639 Int_t bin = h->FindBin(x);
640 bin = TMath::Max(bin,1);
641 bin = TMath::Min(bin,h->GetNbinsX());
642
643 Double_t cumulant;
644 Double_t x0, x1, y0, y1;
645 Double_t total = h->GetNbinsX()*fElementsperbin;
646 Double_t supmin = 0.5/total;
647
648 x0 = h->GetBinLowEdge(TMath::Max(bin,1));
649 x1 = h->GetBinLowEdge(TMath::Min(bin,h->GetNbinsX())+1);
650
651 y0 = h->GetBinContent(TMath::Max(bin-1,0)); // Y0 = F(x0); Y0 >= 0
652 y1 = h->GetBinContent(TMath::Min(bin, h->GetNbinsX()+1)); // Y1 = F(x1); Y1 <= 1
653
654 if (bin == 0) {
655 y0 = supmin;
656 y1 = supmin;
657 }
658 if (bin == 1) {
659 y0 = supmin;
660 }
661 if (bin > h->GetNbinsX()) {
662 y0 = 1.-supmin;
663 y1 = 1.-supmin;
664 }
665 if (bin == h->GetNbinsX()) {
666 y1 = 1.-supmin;
667 }
668
669 if (x0 == x1) {
670 cumulant = y1;
671 } else {
672 cumulant = y0 + (y1-y0)*(x-x0)/(x1-x0);
673 }
674
675 if (x <= h->GetBinLowEdge(1)){
676 cumulant = supmin;
677 }
678 if (x >= h->GetBinLowEdge(h->GetNbinsX()+1)){
679 cumulant = 1-supmin;
680 }
681 return cumulant;
682}
683
684////////////////////////////////////////////////////////////////////////////////
685/// prints the transformation
686
688{
689 Int_t cls = 0;
690 Log() << kINFO << "I do not know yet how to print this... look in the weight file " << cls << ":" << Endl;
691 cls++;
692}
693
694////////////////////////////////////////////////////////////////////////////////
695/// creates the transformation function
696///
697
698void TMVA::VariableGaussTransform::MakeFunction( std::ostream& fout, const TString& fcncName,
699 Int_t part, UInt_t trCounter, Int_t )
700{
701 const UInt_t nvar = fGet.size();
702 UInt_t numDist = GetNClasses() + 1;
703 Int_t nBins = -1;
704 for (UInt_t icls=0; icls<numDist; icls++) {
705 for (UInt_t ivar=0; ivar<nvar; ivar++) {
706 Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
707 if (nbin > nBins) nBins=nbin;
708 }
709 }
710
711 // creates the gauss transformation function
712 if (part==1) {
713 fout << std::endl;
714 fout << " int nvar;" << std::endl;
715 fout << std::endl;
716 // declare variables
717 fout << " double cumulativeDist["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
718 fout << " double X["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
719 fout << " double xMin["<<nvar<<"]["<<numDist<<"];"<<std::endl;
720 fout << " double xMax["<<nvar<<"]["<<numDist<<"];"<<std::endl;
721 fout << " int nbins["<<nvar<<"]["<<numDist<<"];"<<std::endl;
722 }
723 if (part==2) {
724 fout << std::endl;
725 fout << "#include \"math.h\"" << std::endl;
726 fout << std::endl;
727 fout << "//_______________________________________________________________________" << std::endl;
728 fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl;
729 fout << "{" << std::endl;
730 fout << " // Gauss/Uniform transformation, initialisation" << std::endl;
731 fout << " nvar=" << nvar << ";" << std::endl;
732 for (UInt_t icls=0; icls<numDist; icls++) {
733 for (UInt_t ivar=0; ivar<nvar; ivar++) {
734 Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
735 fout << " nbins["<<ivar<<"]["<<icls<<"]="<<nbin<<";"<<std::endl;
736 }
737 }
738
739 // fill meat here
740 // loop over nvar , cls, loop over nBins
741 // fill cumulativeDist with fCumulativePDF[ivar][cls])->GetValue(vec(ivar)
742 for (UInt_t icls=0; icls<numDist; icls++) {
743 for (UInt_t ivar=0; ivar<nvar; ivar++) {
744 // Int_t idx = 0;
745 try{
746 // idx = fGet.at(ivar).second;
747 Char_t type = fGet.at(ivar).first;
748 if( type != 'v' ){
749 Log() << kWARNING << "MakeClass for the Gauss transformation works only for the transformation of variables. The transformation of targets/spectators is not implemented." << Endl;
750 }
751 }catch( std::out_of_range &){
752 Log() << kWARNING << "MakeClass for the Gauss transformation searched for a non existing variable index (" << ivar << ")" << Endl;
753 }
754
755 // Double_t xmn=Variables()[idx].GetMin();
756 // Double_t xmx=Variables()[idx].GetMax();
757 Double_t xmn = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[0];
758 Double_t xmx = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[(fCumulativePDF[ivar][icls])->GetGraph()->GetN()-1];
759
760 fout << " xMin["<<ivar<<"]["<<icls<<"]="<< gTools().StringFromDouble(xmn)<<";"<<std::endl;
761 fout << " xMax["<<ivar<<"]["<<icls<<"]="<<gTools().StringFromDouble(xmx)<<";"<<std::endl;
762 for (Int_t ibin=0; ibin<(fCumulativePDF[ivar][icls])->GetGraph()->GetN(); ibin++) {
763 fout << " cumulativeDist[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetY()[ibin])<< ";"<<std::endl;
764 fout << " X[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetX()[ibin])<< ";"<<std::endl;
765
766 }
767 }
768 }
769 fout << "}" << std::endl;
770 fout << std::endl;
771 fout << "//_______________________________________________________________________" << std::endl;
772 fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int clsIn) const" << std::endl;
773 fout << "{" << std::endl;
774 fout << " // Gauss/Uniform transformation" << std::endl;
775 fout << " int cls=clsIn;" << std::endl;
776 fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl;
777 fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl;
778 fout << " else cls = "<<(fCumulativePDF[0].size()==1?0:2)<<";"<< std::endl;
779 fout << " }"<< std::endl;
780
781 fout << " // copy the variables which are going to be transformed "<< std::endl;
782 VariableTransformBase::MakeFunction(fout, fcncName, 0, trCounter, 0 );
783 fout << " static std::vector<double> dv; "<< std::endl;
784 fout << " dv.resize(nvar); "<< std::endl;
785 fout << " for (int ivar=0; ivar<nvar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)]; "<< std::endl;
786 fout << " "<< std::endl;
787 fout << " bool FlatNotGauss = "<< (fFlatNotGauss? "true": "false") <<"; "<< std::endl;
788 fout << " double cumulant; "<< std::endl;
789 fout << " //const int nvar = "<<nvar<<"; "<< std::endl;
790 fout << " for (int ivar=0; ivar<nvar; ivar++) { "<< std::endl;
791 fout << " int nbin = nbins[ivar][cls]; "<< std::endl;
792 fout << " int ibin=0; "<< std::endl;
793 fout << " while (dv[ivar] > X[ivar][cls][ibin]) ibin++; "<< std::endl;
794 fout << " "<< std::endl;
795 fout << " if (ibin<0) { ibin=0;} "<< std::endl;
796 fout << " if (ibin>=nbin) { ibin=nbin-1;} "<< std::endl;
797 fout << " int nextbin = ibin; "<< std::endl;
798 fout << " if ((dv[ivar] > X[ivar][cls][ibin] && ibin !=nbin-1) || ibin==0) "<< std::endl;
799 fout << " nextbin++; "<< std::endl;
800 fout << " else "<< std::endl;
801 fout << " nextbin--; "<< std::endl;
802 fout << " "<< std::endl;
803 fout << " double dx = X[ivar][cls][ibin]- X[ivar][cls][nextbin]; "<< std::endl;
804 fout << " double dy = cumulativeDist[ivar][cls][ibin] - cumulativeDist[ivar][cls][nextbin]; "<< std::endl;
805 fout << " cumulant = cumulativeDist[ivar][cls][ibin] + (dv[ivar] - X[ivar][cls][ibin])* dy/dx;"<< std::endl;
806 fout << " "<< std::endl;
807 fout << " "<< std::endl;
808 fout << " if (cumulant>1.-10e-10) cumulant = 1.-10e-10; "<< std::endl;
809 fout << " if (cumulant<10e-10) cumulant = 10e-10; "<< std::endl;
810 fout << " if (FlatNotGauss) dv[ivar] = cumulant; "<< std::endl;
811 fout << " else { "<< std::endl;
812 fout << " double maxErfInvArgRange = 0.99999999; "<< std::endl;
813 fout << " double arg = 2.0*cumulant - 1.0; "<< std::endl;
814 fout << " if (arg > maxErfInvArgRange) arg= maxErfInvArgRange; "<< std::endl;
815 fout << " if (arg < -maxErfInvArgRange) arg=-maxErfInvArgRange; "<< std::endl;
816 fout << " double inverf=0., stp=1. ; "<< std::endl;
817 fout << " while (stp >1.e-10){; "<< std::endl;
818 fout << " if (erf(inverf)>arg) inverf -=stp ; "<< std::endl;
819 fout << " else if (erf(inverf)<=arg && erf(inverf+stp)>=arg) stp=stp/5. ; "<< std::endl;
820 fout << " else inverf += stp; "<< std::endl;
821 fout << " } ; "<< std::endl;
822 fout << " //dv[ivar] = 1.414213562*TMath::ErfInverse(arg); "<< std::endl;
823 fout << " dv[ivar] = 1.414213562* inverf; "<< std::endl;
824 fout << " } "<< std::endl;
825 fout << " } "<< std::endl;
826 fout << " // copy the transformed variables back "<< std::endl;
827 fout << " for (int ivar=0; ivar<nvar; ivar++) iv[indicesPut.at(ivar)] = dv[ivar]; "<< std::endl;
828 fout << "} "<< std::endl;
829 }
830}
#define h(i)
Definition: RSha256.hxx:106
#define e(i)
Definition: RSha256.hxx:103
static const double x1[5]
static RooMathCoreReg dummy
int Int_t
Definition: RtypesCore.h:41
char Char_t
Definition: RtypesCore.h:29
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
float Float_t
Definition: RtypesCore.h:53
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
static unsigned int total
int type
Definition: TGX11.cxx:120
char * Form(const char *fmt,...)
#define TMVA_VERSION(a, b, c)
Definition: Version.h:48
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
The TH1 histogram class.
Definition: TH1.h:56
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8381
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1226
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8666
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition: TH1.cxx:706
Class that contains all the data information.
Definition: DataSetInfo.h:60
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:309
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition: Event.cxx:382
UInt_t GetClass() const
Definition: Event.h:86
void Print(std::ostream &o) const
print method
Definition: Event.cxx:352
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
void ReadXML(void *pdfnode)
XML file reading.
Definition: PDF.cxx:960
@ kSpline1
Definition: PDF.h:70
@ kSpline0
Definition: PDF.h:70
const char * GetName() const
Returns name of object.
Definition: PDF.h:116
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1136
TString StringFromDouble(Double_t d)
string tools
Definition: Tools.cxx:1245
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:337
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:355
Singleton class for Global types used by TMVA.
Definition: Types.h:73
Gaussian Transformation of input variables.
VariableGaussTransform(DataSetInfo &dsi, TString strcor="")
constructor can only be applied one after the other when they are created.
virtual void AttachXMLTo(void *parent)
create XML description of Gauss transformation
virtual void PrintTransformation(std::ostream &o)
prints the transformation
virtual void ReadFromXML(void *trfnode)
Read the transformation matrices from the xml node.
void WriteTransformationToStream(std::ostream &) const
virtual ~VariableGaussTransform(void)
destructor
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)
creates the transformation function
void ReadTransformationFromStream(std::istream &, const TString &)
Read the cumulative distribution.
virtual const Event * InverseTransform(const Event *const, Int_t cls) const
apply the inverse Gauss or inverse uniform transformation
void GetCumulativeDist(const std::vector< Event * > &)
fill the cumulative distributions
void CleanUpCumulativeArrays(TString opt="ALL")
clean up of cumulative arrays
virtual const Event * Transform(const Event *const, Int_t cls) const
apply the Gauss transformation
Bool_t PrepareTransformation(const std::vector< Event * > &)
calculate the cumulative distributions
Double_t OldCumulant(Float_t x, TH1 *h) const
Linear interpolation class.
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)=0
getinput and setoutput equivalent
virtual void ReadFromXML(void *trfnode)=0
Read the input variables from the XML node.
void SetName(const TString &c)
virtual void AttachXMLTo(void *parent)=0
create XML description the transformation (write out info of selected variables)
Basic string class.
Definition: TString.h:131
Double_t x[n]
Definition: legend1.C:17
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:212
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition: TMath.cxx:184
Double_t ErfInverse(Double_t x)
returns the inverse error function x must be <-1<x<1
Definition: TMath.cxx:203
Double_t Log(Double_t x)
Definition: TMath.h:750
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:180
static long int sum(long int i)
Definition: Factory.cxx:2276
static void output(int code)
Definition: gifencode.c:226