Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
VariableGaussTransform.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Eckhard v. Toerne
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : VariableGaussTransform *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17 * Eckhard v. Toerne <evt@uni-bonn.de> - Uni Bonn, Germany *
18 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19 * *
20 * Copyright (c) 2005-2011: *
21 * CERN, Switzerland *
22 * MPI-K Heidelberg, Germany *
23 * U. of Bonn, Germany *
24 * *
25 * Redistribution and use in source and binary forms, with or without *
26 * modification, are permitted according to the terms listed in LICENSE *
27 * (http://tmva.sourceforge.net/LICENSE) *
28 **********************************************************************************/
29
30/*! \class TMVA::VariableGaussTransform
31\ingroup TMVA
32Gaussian Transformation of input variables.
33*/
34
36
37#include "TMVA/DataSetInfo.h"
38#include "TMVA/MsgLogger.h"
39#include "TMVA/PDF.h"
40#include "TMVA/Tools.h"
41#include "TMVA/Types.h"
42#include "TMVA/Version.h"
43
44#include "TH1F.h"
45#include "TMath.h"
46#include "TVectorF.h"
47#include "TVectorD.h"
48
49#include <exception>
50#include <iostream>
51#include <list>
52#include <limits>
53#include <stdexcept>
54
56
57////////////////////////////////////////////////////////////////////////////////
58/// constructor
59/// can only be applied one after the other when they are created. But in order to
60/// determine the Gauss transformation
61
63: VariableTransformBase( dsi, Types::kGauss, "Gauss" ),
64 fFlatNotGauss(kFALSE),
65 fPdfMinSmooth(0),
66 fPdfMaxSmooth(0),
67 fElementsperbin(0)
68{
69 if (strcor=="Uniform") {fFlatNotGauss = kTRUE;
70 SetName("Uniform");
71 }
72}
73
74////////////////////////////////////////////////////////////////////////////////
75/// destructor
76
78{
79 CleanUpCumulativeArrays();
80}
81
82////////////////////////////////////////////////////////////////////////////////
83
85{
86}
87
88////////////////////////////////////////////////////////////////////////////////
89/// calculate the cumulative distributions
90
92{
93 Initialize();
94
95 if (!IsEnabled() || IsCreated()) return kTRUE;
96
97 Log() << kINFO << "Preparing the Gaussian transformation..." << Endl;
98
99 UInt_t inputSize = fGet.size();
100 SetNVariables(inputSize);
101
102 if (inputSize > 200) {
103 Log() << kWARNING << "----------------------------------------------------------------------------"
104 << Endl;
105 Log() << kWARNING
106 << ": More than 200 variables, I hope you have enough memory!!!!" << Endl;
107 Log() << kWARNING << "----------------------------------------------------------------------------"
108 << Endl;
109 // return kFALSE;
110 }
111
112 GetCumulativeDist( events );
113
114 SetCreated( kTRUE );
115
116 return kTRUE;
117}
118
119////////////////////////////////////////////////////////////////////////////////
120/// apply the Gauss transformation
121
123{
124 if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
125 //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
126 //EVT if (cls <0 || cls > GetNClasses() ) {
127 //EVT cls = GetNClasses();
128 //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
129 //EVT}
130 if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
131 //EVT workaround end
132
133 // get the variable vector of the current event
134 UInt_t inputSize = fGet.size();
135
136 std::vector<Float_t> input(0);
137 std::vector<Float_t> output(0);
138
139 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
140 GetInput( ev, input, mask );
141
142 std::vector<Char_t>::iterator itMask = mask.begin();
143
144 // TVectorD vec( inputSize );
145 // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
146 Double_t cumulant;
147 //transformation
148 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
149
150 if ( (*itMask) ){
151 ++itMask;
152 continue;
153 }
154
155 if (0 != fCumulativePDF[ivar][cls]) {
156 // first make it flat
157 if(fTMVAVersion>TMVA_VERSION(3,9,7))
158 cumulant = (fCumulativePDF[ivar][cls])->GetVal(input.at(ivar));
159 else
160 cumulant = OldCumulant(input.at(ivar), fCumulativePDF[ivar][cls]->GetOriginalHist() );
161 cumulant = TMath::Min(cumulant,1.-10e-10);
162 cumulant = TMath::Max(cumulant,0.+10e-10);
163
164 if (fFlatNotGauss)
165 output.push_back( cumulant );
166 else {
167 // sanity correction for out-of-range values
168 Double_t maxErfInvArgRange = 0.99999999;
169 Double_t arg = 2.0*cumulant - 1.0;
170 arg = TMath::Min(+maxErfInvArgRange,arg);
171 arg = TMath::Max(-maxErfInvArgRange,arg);
172
173 output.push_back( 1.414213562*TMath::ErfInverse(arg) );
174 }
175 }
176 }
177
178 if (fTransformedEvent==0 || fTransformedEvent->GetNVariables()!=ev->GetNVariables()) {
179 if (fTransformedEvent!=0) { delete fTransformedEvent; fTransformedEvent = 0; }
180 fTransformedEvent = new Event();
181 }
182
183 SetOutput( fTransformedEvent, output, mask, ev );
184
185 return fTransformedEvent;
186}
187
188////////////////////////////////////////////////////////////////////////////////
189/// apply the inverse Gauss or inverse uniform transformation
190
192{
193 if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
194 //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
195 //EVT if (cls <0 || cls > GetNClasses() ) {
196 //EVT cls = GetNClasses();
197 //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
198 //EVT}
199 if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
200 //EVT workaround end
201
202 // get the variable vector of the current event
203 UInt_t inputSize = fGet.size();
204
205 std::vector<Float_t> input(0);
206 std::vector<Float_t> output(0);
207
208 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
209 GetInput( ev, input, mask, kTRUE );
210
211 std::vector<Char_t>::iterator itMask = mask.begin();
212
213 // TVectorD vec( inputSize );
214 // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
215 Double_t invCumulant;
216 //transformation
217 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
218
219 if ( (*itMask) ){
220 ++itMask;
221 continue;
222 }
223
224 if (0 != fCumulativePDF[ivar][cls]) {
225 invCumulant = input.at(ivar);
226
227 // first de-gauss ist if gaussianized
228 if (!fFlatNotGauss)
229 invCumulant = (TMath::Erf(invCumulant/1.414213562)+1)/2.f;
230
231 // then de-uniform the values
232 if(fTMVAVersion>TMVA_VERSION(4,0,0))
233 invCumulant = (fCumulativePDF[ivar][cls])->GetValInverse(invCumulant,kTRUE);
234 else
235 Log() << kFATAL << "Inverse Uniform/Gauss transformation not implemented for TMVA versions before 4.1.0" << Endl;
236
237 output.push_back(invCumulant);
238 }
239 }
240
241 if (fBackTransformedEvent==0) fBackTransformedEvent = new Event( *ev );
242
243 SetOutput( fBackTransformedEvent, output, mask, ev, kTRUE );
244
245 return fBackTransformedEvent;
246}
247
248////////////////////////////////////////////////////////////////////////////////
249/// fill the cumulative distributions
250
251void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector< Event*>& events )
252{
253 const UInt_t inputSize = fGet.size();
254 // const UInt_t nCls = GetNClasses();
255
256 // const UInt_t nvar = GetNVariables();
257 UInt_t nevt = events.size();
258
259 const UInt_t nClasses = GetNClasses();
260 UInt_t numDist = nClasses+1; // calculate cumulative distributions for all "event" classes separately + one where all classes are treated (added) together
261
262 if (GetNClasses() == 1 ) numDist = nClasses; // for regression, if there is only one class, there is no "sum" of classes, hence
263
264 UInt_t **nbins = new UInt_t*[numDist];
265
266 std::list< TMVA::TMVAGaussPair > **listsForBinning = new std::list<TMVA::TMVAGaussPair>* [numDist];
267 std::vector< Float_t > **vsForBinning = new std::vector<Float_t>* [numDist];
268 for (UInt_t i=0; i < numDist; i++) {
269 listsForBinning[i] = new std::list<TMVA::TMVAGaussPair> [inputSize];
270 vsForBinning[i] = new std::vector<Float_t> [inputSize];
271 nbins[i] = new UInt_t[inputSize]; // nbins[0] = number of bins for signal distributions. It depends on the number of entries, thus it's the same for all the input variables, but it isn't necessary for some "weird" reason.
272 }
273
274 std::vector<Float_t> input;
275 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
276
277 // perform event loop
278 Float_t *sumOfWeights = new Float_t[numDist];
279 Float_t *minWeight = new Float_t[numDist];
280 Float_t *maxWeight = new Float_t[numDist];
281 for (UInt_t i=0; i<numDist; i++) {
282 sumOfWeights[i]=0;
283 minWeight[i]=10E10; // TODO: change this to std::max ?
284 maxWeight[i]=0; // QUESTION: wouldn't there be negative events possible?
285 }
286 for (UInt_t ievt=0; ievt < nevt; ievt++) {
287 const Event* ev= events[ievt];
288 Int_t cls = ev->GetClass();
289 Float_t eventWeight = ev->GetWeight();
290 sumOfWeights[cls] += eventWeight;
291 if (minWeight[cls] > eventWeight) minWeight[cls]=eventWeight;
292 if (maxWeight[cls] < eventWeight) maxWeight[cls]=eventWeight;
293 if (numDist>1) sumOfWeights[numDist-1] += eventWeight;
294
295 Bool_t hasMaskedEntries = GetInput( ev, input, mask );
296 if( hasMaskedEntries ){
297 Log() << kWARNING << "Incomplete event" << Endl;
298 std::ostringstream oss;
299 ev->Print(oss);
300 Log() << oss.str();
301 Log() << kFATAL << "Targets or variables masked by transformation. Apparently (a) value(s) is/are missing in this event." << Endl;
302 }
303
304
305 Int_t ivar = 0;
306 for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
307 Float_t value = (*itInput);
308 listsForBinning[cls][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
309 if (numDist>1)listsForBinning[numDist-1][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
310 ++ivar;
311 }
312 }
313 if (numDist > 1) {
314 for (UInt_t icl=0; icl<numDist-1; icl++){
315 minWeight[numDist-1] = TMath::Min(minWeight[icl],minWeight[numDist-1]);
316 maxWeight[numDist-1] = TMath::Max(maxWeight[icl],maxWeight[numDist-1]);
317 }
318 }
319
320 // Sorting the lists, getting nbins ...
321 const UInt_t nevmin=10; // minimum number of events per bin (to make sure we get reasonable distributions)
322 const UInt_t nbinsmax=2000; // maximum number of bins
323
324 for (UInt_t icl=0; icl< numDist; icl++){
325 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
326 listsForBinning[icl][ivar].sort();
327 std::list< TMVA::TMVAGaussPair >::iterator it;
328 Float_t sumPerBin = sumOfWeights[icl]/nbinsmax;
329 sumPerBin=TMath::Max(minWeight[icl]*nevmin,sumPerBin);
330 Float_t sum=0;
331 Float_t ev_value=listsForBinning[icl][ivar].begin()->GetValue();
332 Float_t lastev_value=ev_value;
333 const Float_t eps = 1.e-4;
334 vsForBinning[icl][ivar].push_back(ev_value-eps);
335 vsForBinning[icl][ivar].push_back(ev_value);
336
337 for (it=listsForBinning[icl][ivar].begin(); it != listsForBinning[icl][ivar].end(); ++it){
338 sum+= it->GetWeight();
339 if (sum >= sumPerBin) {
340 ev_value=it->GetValue();
341 if (ev_value>lastev_value) { // protection against bin width of 0
342 vsForBinning[icl][ivar].push_back(ev_value);
343 sum = 0.;
344 lastev_value=ev_value;
345 }
346 }
347 }
348 if (sum!=0) vsForBinning[icl][ivar].push_back(listsForBinning[icl][ivar].back().GetValue());
349 nbins[icl][ivar] = vsForBinning[icl][ivar].size();
350 }
351 }
352
353 delete[] sumOfWeights;
354 delete[] minWeight;
355 delete[] maxWeight;
356
357 // create histogram for the cumulative distribution.
358 fCumulativeDist.resize(inputSize);
359 for (UInt_t icls = 0; icls < numDist; icls++) {
360 for (UInt_t ivar=0; ivar < inputSize; ivar++){
361 Float_t* binnings = new Float_t[nbins[icls][ivar]];
362 //the binning for this particular histogram:
363 for (UInt_t k =0 ; k < nbins[icls][ivar]; k++){
364 binnings[k] = vsForBinning[icls][ivar][k];
365 }
366 fCumulativeDist[ivar].resize(numDist);
367 if (0 != fCumulativeDist[ivar][icls] ) {
368 delete fCumulativeDist[ivar][icls];
369 }
370 fCumulativeDist[ivar][icls] = new TH1F(Form("Cumulative_Var%d_cls%d",ivar,icls),
371 Form("Cumulative_Var%d_cls%d",ivar,icls),
372 nbins[icls][ivar] -1, // class icls
373 binnings);
374 fCumulativeDist[ivar][icls]->SetDirectory(0);
375 delete [] binnings;
376 }
377 }
378
379 // Deallocation
380 for (UInt_t i=0; i<numDist; i++) {
381 delete [] listsForBinning[numDist-i-1];
382 delete [] vsForBinning[numDist-i-1];
383 delete [] nbins[numDist-i-1];
384 }
385 delete [] listsForBinning;
386 delete [] vsForBinning;
387 delete [] nbins;
388
389 // perform event loop
390 std::vector<Int_t> ic(numDist);
391 for (UInt_t ievt=0; ievt<nevt; ievt++) {
392
393 const Event* ev= events[ievt];
394 Int_t cls = ev->GetClass();
395 Float_t eventWeight = ev->GetWeight();
396
397 GetInput( ev, input, mask );
398
399 Int_t ivar = 0;
400 for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
401 Float_t value = (*itInput);
402 fCumulativeDist[ivar][cls]->Fill(value,eventWeight);
403 if (numDist>1) fCumulativeDist[ivar][numDist-1]->Fill(value,eventWeight);
404
405 ++ivar;
406 }
407 }
408
409 // clean up
410 CleanUpCumulativeArrays("PDF");
411
412 // now sum up in order to get the real cumulative distribution
413 Double_t sum = 0, total=0;
414 fCumulativePDF.resize(inputSize);
415 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
416 // fCumulativePDF.resize(ivar+1);
417 for (UInt_t icls=0; icls<numDist; icls++) {
418 (fCumulativeDist[ivar][icls])->Smooth();
419 sum = 0;
420 total = 0.;
421 for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
422 Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
423 if (val>0) total += val;
424 }
425 for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
426 Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
427 if (val>0) sum += val;
428 (fCumulativeDist[ivar][icls])->SetBinContent(ibin,sum/total);
429 }
430 // create PDf
431 fCumulativePDF[ivar].push_back(new PDF( Form("GaussTransform var%d cls%d",ivar,icls), fCumulativeDist[ivar][icls], PDF::kSpline1, fPdfMinSmooth, fPdfMaxSmooth,kFALSE,kFALSE));
432 }
433 }
434}
435
436////////////////////////////////////////////////////////////////////////////////
437
439{
440 Log() << kFATAL << "VariableGaussTransform::WriteTransformationToStream is obsolete" << Endl;
441}
442
443////////////////////////////////////////////////////////////////////////////////
444/// clean up of cumulative arrays
445
447 if (opt == "ALL" || opt == "PDF"){
448 for (UInt_t ivar=0; ivar<fCumulativePDF.size(); ivar++) {
449 for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++) {
450 if (0 != fCumulativePDF[ivar][icls]) delete fCumulativePDF[ivar][icls];
451 }
452 }
453 fCumulativePDF.clear();
454 }
455 if (opt == "ALL" || opt == "Dist"){
456 for (UInt_t ivar=0; ivar<fCumulativeDist.size(); ivar++) {
457 for (UInt_t icls=0; icls<fCumulativeDist[ivar].size(); icls++) {
458 if (0 != fCumulativeDist[ivar][icls]) delete fCumulativeDist[ivar][icls];
459 }
460 }
461 fCumulativeDist.clear();
462 }
463}
464////////////////////////////////////////////////////////////////////////////////
465/// create XML description of Gauss transformation
466
468 void* trfxml = gTools().AddChild(parent, "Transform");
469 gTools().AddAttr(trfxml, "Name", "Gauss");
470 gTools().AddAttr(trfxml, "FlatOrGauss", (fFlatNotGauss?"Flat":"Gauss") );
471
473
474 UInt_t nvar = fGet.size();
475 for (UInt_t ivar=0; ivar<nvar; ivar++) {
476 void* varxml = gTools().AddChild( trfxml, "Variable");
477 // gTools().AddAttr( varxml, "Name", Variables()[ivar].GetLabel() );
478 gTools().AddAttr( varxml, "VarIndex", ivar );
479
480 if ( fCumulativePDF[ivar][0]==0 ||
481 (fCumulativePDF[ivar].size()>1 && fCumulativePDF[ivar][1]==0 ))
482 Log() << kFATAL << "Cumulative histograms for variable " << ivar << " don't exist, can't write it to weight file" << Endl;
483
484 for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++){
485 void* pdfxml = gTools().AddChild( varxml, Form("CumulativePDF_cls%d",icls));
486 (fCumulativePDF[ivar][icls])->AddXMLTo(pdfxml);
487 }
488 }
489}
490
491////////////////////////////////////////////////////////////////////////////////
492/// Read the transformation matrices from the xml node
493
495 // clean up first
496 CleanUpCumulativeArrays();
497 TString FlatOrGauss;
498
499 gTools().ReadAttr(trfnode, "FlatOrGauss", FlatOrGauss );
500
501 if (FlatOrGauss == "Flat") fFlatNotGauss = kTRUE;
502 else fFlatNotGauss = kFALSE;
503
504 Bool_t newFormat = kFALSE;
505
506 void* inpnode = NULL;
507
508 inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format
509 if( inpnode!=NULL )
510 newFormat = kTRUE; // new xml format
511
512 void* varnode = NULL;
513 if( newFormat ){
514 // ------------- new format --------------------
515 // read input
517
518 varnode = gTools().GetNextChild(inpnode);
519 }else
520 varnode = gTools().GetChild(trfnode);
521
522 // Read the cumulative distribution
523
524 TString varname, histname, classname;
525 UInt_t ivar;
526 while(varnode) {
527 if( gTools().HasAttr(varnode,"Name") )
528 gTools().ReadAttr(varnode, "Name", varname);
529 gTools().ReadAttr(varnode, "VarIndex", ivar);
530
531 void* clsnode = gTools().GetChild( varnode);
532
533 while(clsnode) {
534 void* pdfnode = gTools().GetChild( clsnode);
535 PDF* pdfToRead = new PDF(TString("tempName"),kFALSE);
536 pdfToRead->ReadXML(pdfnode); // pdfnode
537 // push_back PDF
538 fCumulativePDF.resize( ivar+1 );
539 fCumulativePDF[ivar].push_back(pdfToRead);
540 clsnode = gTools().GetNextChild(clsnode);
541 }
542
543 varnode = gTools().GetNextChild(varnode);
544 }
545 SetCreated();
546}
547
548////////////////////////////////////////////////////////////////////////////////
549/// Read the cumulative distribution
550
552{
553 Bool_t addDirStatus = TH1::AddDirectoryStatus();
554 TH1::AddDirectory(0); // this avoids the binding of the hists in TMVA::PDF to the current ROOT file
555 char buf[512];
556 istr.getline(buf,512);
557
558 TString strvar, dummy;
559
560 while (!(buf[0]=='#'&& buf[1]=='#')) { // if line starts with ## return
561 char* p = buf;
562 while (*p==' ' || *p=='\t') p++; // 'remove' leading whitespace
563 if (*p=='#' || *p=='\0') {
564 istr.getline(buf,512);
565 continue; // if comment or empty line, read the next line
566 }
567 std::stringstream sstr(buf);
568 sstr >> strvar;
569
570 if (strvar=="CumulativeHistogram") {
571 UInt_t type(0), ivar(0);
572 TString devnullS(""),hname("");
573 Int_t nbins(0);
574
575 // coverity[tainted_data_argument]
576 sstr >> type >> ivar >> hname >> nbins >> fElementsperbin;
577
578 Float_t *Binnings = new Float_t[nbins+1];
579 Float_t val;
580 istr >> devnullS; // read the line "BinBoundaries" ..
581 for (Int_t ibin=0; ibin<nbins+1; ibin++) {
582 istr >> val;
583 Binnings[ibin]=val;
584 }
585
586 if(ivar>=fCumulativeDist.size()) fCumulativeDist.resize(ivar+1);
587 if(type>=fCumulativeDist[ivar].size()) fCumulativeDist[ivar].resize(type+1);
588
589 TH1F * histToRead = fCumulativeDist[ivar][type];
590 if ( histToRead !=0 ) delete histToRead;
591 // recreate the cumulative histogram to be filled with the values read
592 histToRead = new TH1F( hname, hname, nbins, Binnings );
593 histToRead->SetDirectory(0);
594 fCumulativeDist[ivar][type]=histToRead;
595
596 istr >> devnullS; // read the line "BinContent" ..
597 for (Int_t ibin=0; ibin<nbins; ibin++) {
598 istr >> val;
599 histToRead->SetBinContent(ibin+1,val);
600 }
601
602 PDF* pdf = new PDF(hname,histToRead,PDF::kSpline0, 0, 0, kFALSE, kFALSE);
603 // push_back PDF
604 fCumulativePDF.resize(ivar+1);
605 fCumulativePDF[ivar].resize(type+1);
606 fCumulativePDF[ivar][type] = pdf;
607 delete [] Binnings;
608 }
609
610 // if (strvar=="TransformToFlatInsetadOfGauss=") { // don't correct this spelling mistake
611 if (strvar=="Uniform") { // don't correct this spelling mistake
612 sstr >> fFlatNotGauss;
613 istr.getline(buf,512);
614 break;
615 }
616
617 istr.getline(buf,512); // reading the next line
618 }
619 TH1::AddDirectory(addDirStatus);
620
621 UInt_t classIdx=(classname=="signal")?0:1;
622 for(UInt_t ivar=0; ivar<fCumulativePDF.size(); ++ivar) {
623 PDF* src = fCumulativePDF[ivar][classIdx];
624 fCumulativePDF[ivar].push_back(new PDF(src->GetName(),fCumulativeDist[ivar][classIdx],PDF::kSpline0, 0, 0, kFALSE, kFALSE) );
625 }
626
627 SetTMVAVersion(TMVA_VERSION(3,9,7));
628
629 SetCreated();
630}
631
632////////////////////////////////////////////////////////////////////////////////
633
635
636 Int_t bin = h->FindBin(x);
637 bin = TMath::Max(bin,1);
638 bin = TMath::Min(bin,h->GetNbinsX());
639
640 Double_t cumulant;
641 Double_t x0, x1, y0, y1;
642 Double_t total = h->GetNbinsX()*fElementsperbin;
643 Double_t supmin = 0.5/total;
644
645 x0 = h->GetBinLowEdge(TMath::Max(bin,1));
646 x1 = h->GetBinLowEdge(TMath::Min(bin,h->GetNbinsX())+1);
647
648 y0 = h->GetBinContent(TMath::Max(bin-1,0)); // Y0 = F(x0); Y0 >= 0
649 y1 = h->GetBinContent(TMath::Min(bin, h->GetNbinsX()+1)); // Y1 = F(x1); Y1 <= 1
650
651 if (bin == 0) {
652 y0 = supmin;
653 y1 = supmin;
654 }
655 if (bin == 1) {
656 y0 = supmin;
657 }
658 if (bin > h->GetNbinsX()) {
659 y0 = 1.-supmin;
660 y1 = 1.-supmin;
661 }
662 if (bin == h->GetNbinsX()) {
663 y1 = 1.-supmin;
664 }
665
666 if (x0 == x1) {
667 cumulant = y1;
668 } else {
669 cumulant = y0 + (y1-y0)*(x-x0)/(x1-x0);
670 }
671
672 if (x <= h->GetBinLowEdge(1)){
673 cumulant = supmin;
674 }
675 if (x >= h->GetBinLowEdge(h->GetNbinsX()+1)){
676 cumulant = 1-supmin;
677 }
678 return cumulant;
679}
680
681////////////////////////////////////////////////////////////////////////////////
682/// prints the transformation
683
685{
686 Int_t cls = 0;
687 Log() << kINFO << "I do not know yet how to print this... look in the weight file " << cls << ":" << Endl;
688 cls++;
689}
690
691////////////////////////////////////////////////////////////////////////////////
692/// creates the transformation function
693///
694
695void TMVA::VariableGaussTransform::MakeFunction( std::ostream& fout, const TString& fcncName,
696 Int_t part, UInt_t trCounter, Int_t )
697{
698 const UInt_t nvar = fGet.size();
699 UInt_t numDist = GetNClasses() + 1;
700 Int_t nBins = -1;
701 for (UInt_t icls=0; icls<numDist; icls++) {
702 for (UInt_t ivar=0; ivar<nvar; ivar++) {
703 Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
704 if (nbin > nBins) nBins=nbin;
705 }
706 }
707
708 // creates the gauss transformation function
709 if (part==1) {
710 fout << std::endl;
711 fout << " int nvar;" << std::endl;
712 fout << std::endl;
713 // declare variables
714 fout << " double cumulativeDist["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
715 fout << " double X["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
716 fout << " double xMin["<<nvar<<"]["<<numDist<<"];"<<std::endl;
717 fout << " double xMax["<<nvar<<"]["<<numDist<<"];"<<std::endl;
718 fout << " int nbins["<<nvar<<"]["<<numDist<<"];"<<std::endl;
719 }
720 if (part==2) {
721 fout << std::endl;
722 fout << "#include \"math.h\"" << std::endl;
723 fout << std::endl;
724 fout << "//_______________________________________________________________________" << std::endl;
725 fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl;
726 fout << "{" << std::endl;
727 fout << " // Gauss/Uniform transformation, initialisation" << std::endl;
728 fout << " nvar=" << nvar << ";" << std::endl;
729 for (UInt_t icls=0; icls<numDist; icls++) {
730 for (UInt_t ivar=0; ivar<nvar; ivar++) {
731 Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
732 fout << " nbins["<<ivar<<"]["<<icls<<"]="<<nbin<<";"<<std::endl;
733 }
734 }
735
736 // fill meat here
737 // loop over nvar , cls, loop over nBins
738 // fill cumulativeDist with fCumulativePDF[ivar][cls])->GetValue(vec(ivar)
739 for (UInt_t icls=0; icls<numDist; icls++) {
740 for (UInt_t ivar=0; ivar<nvar; ivar++) {
741 // Int_t idx = 0;
742 try{
743 // idx = fGet.at(ivar).second;
744 Char_t type = fGet.at(ivar).first;
745 if( type != 'v' ){
746 Log() << kWARNING << "MakeClass for the Gauss transformation works only for the transformation of variables. The transformation of targets/spectators is not implemented." << Endl;
747 }
748 }catch( std::out_of_range &){
749 Log() << kWARNING << "MakeClass for the Gauss transformation searched for a non existing variable index (" << ivar << ")" << Endl;
750 }
751
752 // Double_t xmn=Variables()[idx].GetMin();
753 // Double_t xmx=Variables()[idx].GetMax();
754 Double_t xmn = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[0];
755 Double_t xmx = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[(fCumulativePDF[ivar][icls])->GetGraph()->GetN()-1];
756
757 fout << " xMin["<<ivar<<"]["<<icls<<"]="<< gTools().StringFromDouble(xmn)<<";"<<std::endl;
758 fout << " xMax["<<ivar<<"]["<<icls<<"]="<<gTools().StringFromDouble(xmx)<<";"<<std::endl;
759 for (Int_t ibin=0; ibin<(fCumulativePDF[ivar][icls])->GetGraph()->GetN(); ibin++) {
760 fout << " cumulativeDist[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetY()[ibin])<< ";"<<std::endl;
761 fout << " X[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetX()[ibin])<< ";"<<std::endl;
762
763 }
764 }
765 }
766 fout << "}" << std::endl;
767 fout << std::endl;
768 fout << "//_______________________________________________________________________" << std::endl;
769 fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int clsIn) const" << std::endl;
770 fout << "{" << std::endl;
771 fout << " // Gauss/Uniform transformation" << std::endl;
772 fout << " int cls=clsIn;" << std::endl;
773 fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl;
774 fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl;
775 fout << " else cls = "<<(fCumulativePDF[0].size()==1?0:2)<<";"<< std::endl;
776 fout << " }"<< std::endl;
777
778 fout << " // copy the variables which are going to be transformed "<< std::endl;
779 VariableTransformBase::MakeFunction(fout, fcncName, 0, trCounter, 0 );
780 fout << " static std::vector<double> dv; "<< std::endl;
781 fout << " dv.resize(nvar); "<< std::endl;
782 fout << " for (int ivar=0; ivar<nvar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)]; "<< std::endl;
783 fout << " "<< std::endl;
784 fout << " bool FlatNotGauss = "<< (fFlatNotGauss? "true": "false") <<"; "<< std::endl;
785 fout << " double cumulant; "<< std::endl;
786 fout << " //const int nvar = "<<nvar<<"; "<< std::endl;
787 fout << " for (int ivar=0; ivar<nvar; ivar++) { "<< std::endl;
788 fout << " int nbin = nbins[ivar][cls]; "<< std::endl;
789 fout << " int ibin=0; "<< std::endl;
790 fout << " while (dv[ivar] > X[ivar][cls][ibin]) ibin++; "<< std::endl;
791 fout << " "<< std::endl;
792 fout << " if (ibin<0) { ibin=0;} "<< std::endl;
793 fout << " if (ibin>=nbin) { ibin=nbin-1;} "<< std::endl;
794 fout << " int nextbin = ibin; "<< std::endl;
795 fout << " if ((dv[ivar] > X[ivar][cls][ibin] && ibin !=nbin-1) || ibin==0) "<< std::endl;
796 fout << " nextbin++; "<< std::endl;
797 fout << " else "<< std::endl;
798 fout << " nextbin--; "<< std::endl;
799 fout << " "<< std::endl;
800 fout << " double dx = X[ivar][cls][ibin]- X[ivar][cls][nextbin]; "<< std::endl;
801 fout << " double dy = cumulativeDist[ivar][cls][ibin] - cumulativeDist[ivar][cls][nextbin]; "<< std::endl;
802 fout << " cumulant = cumulativeDist[ivar][cls][ibin] + (dv[ivar] - X[ivar][cls][ibin])* dy/dx;"<< std::endl;
803 fout << " "<< std::endl;
804 fout << " "<< std::endl;
805 fout << " if (cumulant>1.-10e-10) cumulant = 1.-10e-10; "<< std::endl;
806 fout << " if (cumulant<10e-10) cumulant = 10e-10; "<< std::endl;
807 fout << " if (FlatNotGauss) dv[ivar] = cumulant; "<< std::endl;
808 fout << " else { "<< std::endl;
809 fout << " double maxErfInvArgRange = 0.99999999; "<< std::endl;
810 fout << " double arg = 2.0*cumulant - 1.0; "<< std::endl;
811 fout << " if (arg > maxErfInvArgRange) arg= maxErfInvArgRange; "<< std::endl;
812 fout << " if (arg < -maxErfInvArgRange) arg=-maxErfInvArgRange; "<< std::endl;
813 fout << " double inverf=0., stp=1. ; "<< std::endl;
814 fout << " while (stp >1.e-10){; "<< std::endl;
815 fout << " if (erf(inverf)>arg) inverf -=stp ; "<< std::endl;
816 fout << " else if (erf(inverf)<=arg && erf(inverf+stp)>=arg) stp=stp/5. ; "<< std::endl;
817 fout << " else inverf += stp; "<< std::endl;
818 fout << " } ; "<< std::endl;
819 fout << " //dv[ivar] = 1.414213562*TMath::ErfInverse(arg); "<< std::endl;
820 fout << " dv[ivar] = 1.414213562* inverf; "<< std::endl;
821 fout << " } "<< std::endl;
822 fout << " } "<< std::endl;
823 fout << " // copy the transformed variables back "<< std::endl;
824 fout << " for (int ivar=0; ivar<nvar; ivar++) iv[indicesPut.at(ivar)] = dv[ivar]; "<< std::endl;
825 fout << "} "<< std::endl;
826 }
827}
#define h(i)
Definition RSha256.hxx:106
#define e(i)
Definition RSha256.hxx:103
static const double x1[5]
char Char_t
Definition RtypesCore.h:37
const Bool_t kFALSE
Definition RtypesCore.h:92
double Double_t
Definition RtypesCore.h:59
float Float_t
Definition RtypesCore.h:57
const Bool_t kTRUE
Definition RtypesCore.h:91
#define ClassImp(name)
Definition Rtypes.h:364
static unsigned int total
int type
Definition TGX11.cxx:121
char * Form(const char *fmt,...)
#define TMVA_VERSION(a, b, c)
Definition Version.h:48
1-D histogram with a float per channel (see TH1 documentation)}
Definition TH1.h:575
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:58
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition TH1.cxx:8777
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition TH1.cxx:1282
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition TH1.cxx:9062
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition TH1.cxx:750
Class that contains all the data information.
Definition DataSetInfo.h:62
UInt_t GetNVariables() const
accessor to the number of variables
Definition Event.cxx:308
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition Event.cxx:381
UInt_t GetClass() const
Definition Event.h:86
void Print(std::ostream &o) const
print method
Definition Event.cxx:351
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition PDF.h:63
void ReadXML(void *pdfnode)
XML file reading.
Definition PDF.cxx:961
@ kSpline1
Definition PDF.h:70
@ kSpline0
Definition PDF.h:70
const char * GetName() const
Returns name of object.
Definition PDF.h:116
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition Tools.cxx:1174
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition Tools.cxx:1136
TString StringFromDouble(Double_t d)
string tools
Definition Tools.cxx:1245
void * GetChild(void *parent, const char *childname=0)
get child node
Definition Tools.cxx:1162
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:335
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:353
Singleton class for Global types used by TMVA.
Definition Types.h:73
Gaussian Transformation of input variables.
VariableGaussTransform(DataSetInfo &dsi, TString strcor="")
constructor can only be applied one after the other when they are created.
virtual void AttachXMLTo(void *parent)
create XML description of Gauss transformation
virtual void PrintTransformation(std::ostream &o)
prints the transformation
virtual void ReadFromXML(void *trfnode)
Read the transformation matrices from the xml node.
void WriteTransformationToStream(std::ostream &) const
virtual ~VariableGaussTransform(void)
destructor
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)
creates the transformation function
void ReadTransformationFromStream(std::istream &, const TString &)
Read the cumulative distribution.
virtual const Event * InverseTransform(const Event *const, Int_t cls) const
apply the inverse Gauss or inverse uniform transformation
void GetCumulativeDist(const std::vector< Event * > &)
fill the cumulative distributions
void CleanUpCumulativeArrays(TString opt="ALL")
clean up of cumulative arrays
virtual const Event * Transform(const Event *const, Int_t cls) const
apply the Gauss transformation
Bool_t PrepareTransformation(const std::vector< Event * > &)
calculate the cumulative distributions
Double_t OldCumulant(Float_t x, TH1 *h) const
Linear interpolation class.
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)=0
getinput and setoutput equivalent
virtual void ReadFromXML(void *trfnode)=0
Read the input variables from the XML node.
virtual void AttachXMLTo(void *parent)=0
create XML description the transformation (write out info of selected variables)
Basic string class.
Definition TString.h:136
Double_t x[n]
Definition legend1.C:17
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:158
Short_t Max(Short_t a, Short_t b)
Definition TMathBase.h:212
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition TMath.cxx:184
Double_t ErfInverse(Double_t x)
returns the inverse error function x must be <-1<x<1
Definition TMath.cxx:203
Short_t Min(Short_t a, Short_t b)
Definition TMathBase.h:180
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345
static void output(int code)
Definition gifencode.c:226