Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
VariableGaussTransform.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Eckhard v. Toerne
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : VariableGaussTransform *
8 * *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17 * Eckhard v. Toerne <evt@uni-bonn.de> - Uni Bonn, Germany *
18 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19 * *
20 * Copyright (c) 2005-2011: *
21 * CERN, Switzerland *
22 * MPI-K Heidelberg, Germany *
23 * U. of Bonn, Germany *
24 * *
25 * Redistribution and use in source and binary forms, with or without *
26 * modification, are permitted according to the terms listed in LICENSE *
27 * (see tmva/doc/LICENSE) *
28 **********************************************************************************/
29
30/*! \class TMVA::VariableGaussTransform
31\ingroup TMVA
32Gaussian Transformation of input variables.
33*/
34
36
37#include "TMVA/DataSetInfo.h"
38#include "TMVA/MsgLogger.h"
39#include "TMVA/PDF.h"
40#include "TMVA/Tools.h"
41#include "TMVA/Types.h"
42#include "TMVA/Version.h"
43
44#include "TH1F.h"
45#include "TMath.h"
46#include "TVectorF.h"
47#include "TVectorD.h"
48
49#include <exception>
50#include <iostream>
51#include <list>
52#include <limits>
53#include <stdexcept>
54
55
56////////////////////////////////////////////////////////////////////////////////
57/// constructor
58/// can only be applied one after the other when they are created. But in order to
59/// determine the Gauss transformation
60
63 fFlatNotGauss(kFALSE),
64 fPdfMinSmooth(0),
65 fPdfMaxSmooth(0),
66 fElementsperbin(0)
67{
68 if (strcor=="Uniform") {fFlatNotGauss = kTRUE;
69 SetName("Uniform");
70 }
71}
72
73////////////////////////////////////////////////////////////////////////////////
74/// destructor
75
77{
78 CleanUpCumulativeArrays();
79}
80
81////////////////////////////////////////////////////////////////////////////////
82
86
87////////////////////////////////////////////////////////////////////////////////
88/// calculate the cumulative distributions
89
91{
92 Initialize();
93
94 if (!IsEnabled() || IsCreated()) return kTRUE;
95
96 Log() << kINFO << "Preparing the Gaussian transformation..." << Endl;
97
98 UInt_t inputSize = fGet.size();
99 SetNVariables(inputSize);
100
101 if (inputSize > 200) {
102 Log() << kWARNING << "----------------------------------------------------------------------------"
103 << Endl;
104 Log() << kWARNING
105 << ": More than 200 variables, I hope you have enough memory!!!!" << Endl;
106 Log() << kWARNING << "----------------------------------------------------------------------------"
107 << Endl;
108 // return kFALSE;
109 }
110
111 GetCumulativeDist( events );
112
113 SetCreated( kTRUE );
114
115 return kTRUE;
116}
117
118////////////////////////////////////////////////////////////////////////////////
119/// apply the Gauss transformation
120
122{
123 if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
124 //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
125 //EVT if (cls <0 || cls > GetNClasses() ) {
126 //EVT cls = GetNClasses();
127 //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
128 //EVT}
129 if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
130 //EVT workaround end
131
132 // get the variable vector of the current event
133 UInt_t inputSize = fGet.size();
134
135 std::vector<Float_t> input(0);
136 std::vector<Float_t> output(0);
137
138 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
139 GetInput( ev, input, mask );
140
141 std::vector<Char_t>::iterator itMask = mask.begin();
142
143 // TVectorD vec( inputSize );
144 // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
146 //transformation
147 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
148
149 if ( (*itMask) ){
150 ++itMask;
151 continue;
152 }
153
154 if (0 != fCumulativePDF[ivar][cls]) {
155 // first make it flat
156 if(fTMVAVersion>TMVA_VERSION(3,9,7))
157 cumulant = (fCumulativePDF[ivar][cls])->GetVal(input.at(ivar));
158 else
159 cumulant = OldCumulant(input.at(ivar), fCumulativePDF[ivar][cls]->GetOriginalHist() );
160 cumulant = TMath::Min(cumulant,1.-10e-10);
161 cumulant = TMath::Max(cumulant,0.+10e-10);
162
163 if (fFlatNotGauss)
164 output.push_back( cumulant );
165 else {
166 // sanity correction for out-of-range values
167 Double_t maxErfInvArgRange = 0.99999999;
168 Double_t arg = 2.0*cumulant - 1.0;
169 arg = TMath::Min(+maxErfInvArgRange,arg);
170 arg = TMath::Max(-maxErfInvArgRange,arg);
171
172 output.push_back( 1.414213562*TMath::ErfInverse(arg) );
173 }
174 }
175 }
176
177 if (fTransformedEvent==0 || fTransformedEvent->GetNVariables()!=ev->GetNVariables()) {
178 if (fTransformedEvent!=0) { delete fTransformedEvent; fTransformedEvent = 0; }
179 fTransformedEvent = new Event();
180 }
181
182 SetOutput( fTransformedEvent, output, mask, ev );
183
184 return fTransformedEvent;
185}
186
187////////////////////////////////////////////////////////////////////////////////
188/// apply the inverse Gauss or inverse uniform transformation
189
191{
192 if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
193 //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
194 //EVT if (cls <0 || cls > GetNClasses() ) {
195 //EVT cls = GetNClasses();
196 //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
197 //EVT}
198 if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
199 //EVT workaround end
200
201 // get the variable vector of the current event
202 UInt_t inputSize = fGet.size();
203
204 std::vector<Float_t> input(0);
205 std::vector<Float_t> output(0);
206
207 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
208 GetInput( ev, input, mask, kTRUE );
209
210 std::vector<Char_t>::iterator itMask = mask.begin();
211
212 // TVectorD vec( inputSize );
213 // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
215 //transformation
216 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
217
218 if ( (*itMask) ){
219 ++itMask;
220 continue;
221 }
222
223 if (0 != fCumulativePDF[ivar][cls]) {
224 invCumulant = input.at(ivar);
225
226 // first de-gauss ist if gaussianized
227 if (!fFlatNotGauss)
228 invCumulant = (TMath::Erf(invCumulant/1.414213562)+1)/2.f;
229
230 // then de-uniform the values
231 if(fTMVAVersion>TMVA_VERSION(4,0,0))
232 invCumulant = (fCumulativePDF[ivar][cls])->GetValInverse(invCumulant,kTRUE);
233 else
234 Log() << kFATAL << "Inverse Uniform/Gauss transformation not implemented for TMVA versions before 4.1.0" << Endl;
235
236 output.push_back(invCumulant);
237 }
238 }
239
240 if (fBackTransformedEvent==0) fBackTransformedEvent = new Event( *ev );
241
242 SetOutput( fBackTransformedEvent, output, mask, ev, kTRUE );
243
244 return fBackTransformedEvent;
245}
246
247////////////////////////////////////////////////////////////////////////////////
248/// fill the cumulative distributions
249
250void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector< Event*>& events )
251{
252 const UInt_t inputSize = fGet.size();
253 // const UInt_t nCls = GetNClasses();
254
255 // const UInt_t nvar = GetNVariables();
256 UInt_t nevt = events.size();
257
258 const UInt_t nClasses = GetNClasses();
259 UInt_t numDist = nClasses+1; // calculate cumulative distributions for all "event" classes separately + one where all classes are treated (added) together
260
261 if (GetNClasses() == 1 ) numDist = nClasses; // for regression, if there is only one class, there is no "sum" of classes, hence
262
263 UInt_t **nbins = new UInt_t*[numDist];
264
265 std::list< TMVA::TMVAGaussPair > **listsForBinning = new std::list<TMVA::TMVAGaussPair>* [numDist];
266 std::vector< Float_t > **vsForBinning = new std::vector<Float_t>* [numDist];
267 for (UInt_t i=0; i < numDist; i++) {
268 listsForBinning[i] = new std::list<TMVA::TMVAGaussPair> [inputSize];
269 vsForBinning[i] = new std::vector<Float_t> [inputSize];
270 nbins[i] = new UInt_t[inputSize]; // nbins[0] = number of bins for signal distributions. It depends on the number of entries, thus it's the same for all the input variables, but it isn't necessary for some "weird" reason.
271 }
272
273 std::vector<Float_t> input;
274 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
275
276 // perform event loop
280 for (UInt_t i=0; i<numDist; i++) {
281 sumOfWeights[i]=0;
282 minWeight[i]=10E10; // TODO: change this to std::max ?
283 maxWeight[i]=0; // QUESTION: wouldn't there be negative events possible?
284 }
285 for (UInt_t ievt=0; ievt < nevt; ievt++) {
286 const Event* ev= events[ievt];
287 Int_t cls = ev->GetClass();
288 Float_t eventWeight = ev->GetWeight();
289 sumOfWeights[cls] += eventWeight;
290 if (minWeight[cls] > eventWeight) minWeight[cls]=eventWeight;
291 if (maxWeight[cls] < eventWeight) maxWeight[cls]=eventWeight;
292 if (numDist>1) sumOfWeights[numDist-1] += eventWeight;
293
294 Bool_t hasMaskedEntries = GetInput( ev, input, mask );
295 if( hasMaskedEntries ){
296 Log() << kWARNING << "Incomplete event" << Endl;
297 std::ostringstream oss;
298 ev->Print(oss);
299 Log() << oss.str();
300 Log() << kFATAL << "Targets or variables masked by transformation. Apparently (a) value(s) is/are missing in this event." << Endl;
301 }
302
303
304 Int_t ivar = 0;
305 for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
306 Float_t value = (*itInput);
307 listsForBinning[cls][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
308 if (numDist>1)listsForBinning[numDist-1][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
309 ++ivar;
310 }
311 }
312 if (numDist > 1) {
313 for (UInt_t icl=0; icl<numDist-1; icl++){
316 }
317 }
318
319 // Sorting the lists, getting nbins ...
320 const UInt_t nevmin=10; // minimum number of events per bin (to make sure we get reasonable distributions)
321 const UInt_t nbinsmax=2000; // maximum number of bins
322
323 for (UInt_t icl=0; icl< numDist; icl++){
324 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
325 listsForBinning[icl][ivar].sort();
326 std::list< TMVA::TMVAGaussPair >::iterator it;
329 Float_t sum=0;
332 const Float_t eps = 1.e-4;
333 vsForBinning[icl][ivar].push_back(ev_value-eps);
334 vsForBinning[icl][ivar].push_back(ev_value);
335
336 for (it=listsForBinning[icl][ivar].begin(); it != listsForBinning[icl][ivar].end(); ++it){
337 sum+= it->GetWeight();
338 if (sum >= sumPerBin) {
339 ev_value=it->GetValue();
340 if (ev_value>lastev_value) { // protection against bin width of 0
341 vsForBinning[icl][ivar].push_back(ev_value);
342 sum = 0.;
344 }
345 }
346 }
347 if (sum!=0) vsForBinning[icl][ivar].push_back(listsForBinning[icl][ivar].back().GetValue());
348 nbins[icl][ivar] = vsForBinning[icl][ivar].size();
349 }
350 }
351
352 delete[] sumOfWeights;
353 delete[] minWeight;
354 delete[] maxWeight;
355
356 // create histogram for the cumulative distribution.
357 fCumulativeDist.resize(inputSize);
358 for (UInt_t icls = 0; icls < numDist; icls++) {
359 for (UInt_t ivar=0; ivar < inputSize; ivar++){
360 Float_t* binnings = new Float_t[nbins[icls][ivar]];
361 //the binning for this particular histogram:
362 for (UInt_t k =0 ; k < nbins[icls][ivar]; k++){
363 binnings[k] = vsForBinning[icls][ivar][k];
364 }
365 fCumulativeDist[ivar].resize(numDist);
366 if (fCumulativeDist[ivar][icls] ) {
367 delete fCumulativeDist[ivar][icls];
368 }
369 fCumulativeDist[ivar][icls] = new TH1F(TString::Format("Cumulative_Var%d_cls%d",ivar,icls),
370 TString::Format("Cumulative_Var%d_cls%d",ivar,icls),
371 nbins[icls][ivar] -1, // class icls
372 binnings);
373 fCumulativeDist[ivar][icls]->SetDirectory(nullptr);
374 delete [] binnings;
375 }
376 }
377
378 // Deallocation
379 for (UInt_t i=0; i<numDist; i++) {
380 delete [] listsForBinning[numDist-i-1];
381 delete [] vsForBinning[numDist-i-1];
382 delete [] nbins[numDist-i-1];
383 }
384 delete [] listsForBinning;
385 delete [] vsForBinning;
386 delete [] nbins;
387
388 // perform event loop
389 std::vector<Int_t> ic(numDist);
390 for (UInt_t ievt=0; ievt<nevt; ievt++) {
391
392 const Event* ev= events[ievt];
393 Int_t cls = ev->GetClass();
394 Float_t eventWeight = ev->GetWeight();
395
396 GetInput( ev, input, mask );
397
398 Int_t ivar = 0;
399 for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
400 Float_t value = (*itInput);
401 fCumulativeDist[ivar][cls]->Fill(value,eventWeight);
402 if (numDist>1) fCumulativeDist[ivar][numDist-1]->Fill(value,eventWeight);
403
404 ++ivar;
405 }
406 }
407
408 // clean up
409 CleanUpCumulativeArrays("PDF");
410
411 // now sum up in order to get the real cumulative distribution
412 Double_t sum = 0, total=0;
413 fCumulativePDF.resize(inputSize);
414 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
415 // fCumulativePDF.resize(ivar+1);
416 for (UInt_t icls=0; icls<numDist; icls++) {
417 (fCumulativeDist[ivar][icls])->Smooth();
418 sum = 0;
419 total = 0.;
420 for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
421 Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
422 if (val>0) total += val;
423 }
424 for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
425 Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
426 if (val>0) sum += val;
427 (fCumulativeDist[ivar][icls])->SetBinContent(ibin,sum/total);
428 }
429 // create PDf
430 fCumulativePDF[ivar].push_back(new PDF( TString::Format("GaussTransform var%d cls%d",ivar,icls), fCumulativeDist[ivar][icls], PDF::kSpline1, fPdfMinSmooth, fPdfMaxSmooth,kFALSE,kFALSE));
431 }
432 }
433}
434
435////////////////////////////////////////////////////////////////////////////////
436
438{
439 Log() << kFATAL << "VariableGaussTransform::WriteTransformationToStream is obsolete" << Endl;
440}
441
442////////////////////////////////////////////////////////////////////////////////
443/// clean up of cumulative arrays
444
446 if (opt == "ALL" || opt == "PDF"){
447 for (UInt_t ivar=0; ivar<fCumulativePDF.size(); ivar++) {
448 for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++) {
449 if (0 != fCumulativePDF[ivar][icls]) delete fCumulativePDF[ivar][icls];
450 }
451 }
452 fCumulativePDF.clear();
453 }
454 if (opt == "ALL" || opt == "Dist"){
455 for (UInt_t ivar=0; ivar<fCumulativeDist.size(); ivar++) {
456 for (UInt_t icls=0; icls<fCumulativeDist[ivar].size(); icls++) {
457 if (0 != fCumulativeDist[ivar][icls]) delete fCumulativeDist[ivar][icls];
458 }
459 }
460 fCumulativeDist.clear();
461 }
462}
463////////////////////////////////////////////////////////////////////////////////
464/// create XML description of Gauss transformation
465
467 void* trfxml = gTools().AddChild(parent, "Transform");
468 gTools().AddAttr(trfxml, "Name", "Gauss");
469 gTools().AddAttr(trfxml, "FlatOrGauss", (fFlatNotGauss?"Flat":"Gauss") );
470
472
473 UInt_t nvar = fGet.size();
474 for (UInt_t ivar=0; ivar<nvar; ivar++) {
475 void* varxml = gTools().AddChild( trfxml, "Variable");
476 // gTools().AddAttr( varxml, "Name", Variables()[ivar].GetLabel() );
477 gTools().AddAttr( varxml, "VarIndex", ivar );
478
479 if ( fCumulativePDF[ivar][0]==0 ||
480 (fCumulativePDF[ivar].size()>1 && fCumulativePDF[ivar][1]==0 ))
481 Log() << kFATAL << "Cumulative histograms for variable " << ivar << " don't exist, can't write it to weight file" << Endl;
482
483 for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++){
484 void* pdfxml = gTools().AddChild( varxml, TString::Format("CumulativePDF_cls%d",icls));
485 (fCumulativePDF[ivar][icls])->AddXMLTo(pdfxml);
486 }
487 }
488}
489
490////////////////////////////////////////////////////////////////////////////////
491/// Read the transformation matrices from the xml node
492
494 // clean up first
495 CleanUpCumulativeArrays();
497
498 gTools().ReadAttr(trfnode, "FlatOrGauss", FlatOrGauss );
499
500 if (FlatOrGauss == "Flat") fFlatNotGauss = kTRUE;
501 else fFlatNotGauss = kFALSE;
502
504
505 void* inpnode = NULL;
506
507 inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format
508 if( inpnode!=NULL )
509 newFormat = kTRUE; // new xml format
510
511 void* varnode = NULL;
512 if( newFormat ){
513 // ------------- new format --------------------
514 // read input
516
518 }else
520
521 // Read the cumulative distribution
522
523 TString varname, histname, classname;
524 UInt_t ivar;
525 while(varnode) {
526 if( gTools().HasAttr(varnode,"Name") )
527 gTools().ReadAttr(varnode, "Name", varname);
528 gTools().ReadAttr(varnode, "VarIndex", ivar);
529
530 void* clsnode = gTools().GetChild( varnode);
531
532 while(clsnode) {
533 void* pdfnode = gTools().GetChild( clsnode);
534 PDF* pdfToRead = new PDF(TString("tempName"),kFALSE);
535 pdfToRead->ReadXML(pdfnode); // pdfnode
536 // push_back PDF
537 fCumulativePDF.resize( ivar+1 );
538 fCumulativePDF[ivar].push_back(pdfToRead);
540 }
541
543 }
544 SetCreated();
545}
546
547////////////////////////////////////////////////////////////////////////////////
548/// Read the cumulative distribution
549
551{
553 TH1::AddDirectory(0); // this avoids the binding of the hists in TMVA::PDF to the current ROOT file
554 char buf[512];
555 istr.getline(buf,512);
556
557 TString strvar, dummy;
558
559 while (!(buf[0]=='#'&& buf[1]=='#')) { // if line starts with ## return
560 char* p = buf;
561 while (*p==' ' || *p=='\t') p++; // 'remove' leading whitespace
562 if (*p=='#' || *p=='\0') {
563 istr.getline(buf,512);
564 continue; // if comment or empty line, read the next line
565 }
566 std::stringstream sstr(buf);
567 sstr >> strvar;
568
569 if (strvar=="CumulativeHistogram") {
570 UInt_t type(0), ivar(0);
571 TString devnullS(""),hname("");
572 Int_t nbins(0);
573
574 // coverity[tainted_data_argument]
575 sstr >> type >> ivar >> hname >> nbins >> fElementsperbin;
576
577 Float_t *Binnings = new Float_t[nbins+1];
578 Float_t val;
579 istr >> devnullS; // read the line "BinBoundaries" ..
580 for (Int_t ibin=0; ibin<nbins+1; ibin++) {
581 istr >> val;
582 Binnings[ibin]=val;
583 }
584
585 if(ivar>=fCumulativeDist.size()) fCumulativeDist.resize(ivar+1);
586 if(type>=fCumulativeDist[ivar].size()) fCumulativeDist[ivar].resize(type+1);
587
588 TH1F * histToRead = fCumulativeDist[ivar][type];
589 if ( histToRead !=0 ) delete histToRead;
590 // recreate the cumulative histogram to be filled with the values read
591 histToRead = new TH1F( hname, hname, nbins, Binnings );
592 histToRead->SetDirectory(nullptr);
593 fCumulativeDist[ivar][type]=histToRead;
594
595 istr >> devnullS; // read the line "BinContent" ..
596 for (Int_t ibin=0; ibin<nbins; ibin++) {
597 istr >> val;
598 histToRead->SetBinContent(ibin+1,val);
599 }
600
601 PDF* pdf = new PDF(hname,histToRead,PDF::kSpline0, 0, 0, kFALSE, kFALSE);
602 // push_back PDF
603 fCumulativePDF.resize(ivar+1);
604 fCumulativePDF[ivar].resize(type+1);
605 fCumulativePDF[ivar][type] = pdf;
606 delete [] Binnings;
607 }
608
609 // if (strvar=="TransformToFlatInsetadOfGauss=") { // don't correct this spelling mistake
610 if (strvar=="Uniform") { // don't correct this spelling mistake
611 sstr >> fFlatNotGauss;
612 istr.getline(buf,512);
613 break;
614 }
615
616 istr.getline(buf,512); // reading the next line
617 }
619
620 UInt_t classIdx=(classname=="signal")?0:1;
621 for(UInt_t ivar=0; ivar<fCumulativePDF.size(); ++ivar) {
622 PDF* src = fCumulativePDF[ivar][classIdx];
623 fCumulativePDF[ivar].push_back(new PDF(src->GetName(),fCumulativeDist[ivar][classIdx],PDF::kSpline0, 0, 0, kFALSE, kFALSE) );
624 }
625
626 SetTMVAVersion(TMVA_VERSION(3,9,7));
627
628 SetCreated();
629}
630
631////////////////////////////////////////////////////////////////////////////////
632
634
635 Int_t bin = h->FindBin(x);
636 bin = TMath::Max(bin,1);
637 bin = TMath::Min(bin,h->GetNbinsX());
638
640 Double_t x0, x1, y0, y1;
641 Double_t total = h->GetNbinsX()*fElementsperbin;
642 Double_t supmin = 0.5/total;
643
644 x0 = h->GetBinLowEdge(TMath::Max(bin,1));
645 x1 = h->GetBinLowEdge(TMath::Min(bin,h->GetNbinsX())+1);
646
647 y0 = h->GetBinContent(TMath::Max(bin-1,0)); // Y0 = F(x0); Y0 >= 0
648 y1 = h->GetBinContent(TMath::Min(bin, h->GetNbinsX()+1)); // Y1 = F(x1); Y1 <= 1
649
650 if (bin == 0) {
651 y0 = supmin;
652 y1 = supmin;
653 }
654 if (bin == 1) {
655 y0 = supmin;
656 }
657 if (bin > h->GetNbinsX()) {
658 y0 = 1.-supmin;
659 y1 = 1.-supmin;
660 }
661 if (bin == h->GetNbinsX()) {
662 y1 = 1.-supmin;
663 }
664
665 if (x0 == x1) {
666 cumulant = y1;
667 } else {
668 cumulant = y0 + (y1-y0)*(x-x0)/(x1-x0);
669 }
670
671 if (x <= h->GetBinLowEdge(1)){
673 }
674 if (x >= h->GetBinLowEdge(h->GetNbinsX()+1)){
675 cumulant = 1-supmin;
676 }
677 return cumulant;
678}
679
680////////////////////////////////////////////////////////////////////////////////
681/// prints the transformation
682
684{
685 Int_t cls = 0;
686 Log() << kINFO << "I do not know yet how to print this... look in the weight file " << cls << ":" << Endl;
687 cls++;
688}
689
690////////////////////////////////////////////////////////////////////////////////
691/// creates the transformation function
692///
693
696{
697 const UInt_t nvar = fGet.size();
698 UInt_t numDist = GetNClasses() + 1;
699 Int_t nBins = -1;
700 for (UInt_t icls=0; icls<numDist; icls++) {
701 for (UInt_t ivar=0; ivar<nvar; ivar++) {
702 Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
703 if (nbin > nBins) nBins=nbin;
704 }
705 }
706
707 // creates the gauss transformation function
708 if (part==1) {
709 fout << std::endl;
710 fout << " int nvar;" << std::endl;
711 fout << std::endl;
712 // declare variables
713 fout << " double cumulativeDist["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
714 fout << " double X["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
715 fout << " double xMin["<<nvar<<"]["<<numDist<<"];"<<std::endl;
716 fout << " double xMax["<<nvar<<"]["<<numDist<<"];"<<std::endl;
717 fout << " int nbins["<<nvar<<"]["<<numDist<<"];"<<std::endl;
718 }
719 if (part==2) {
720 fout << std::endl;
721 fout << "#include \"math.h\"" << std::endl;
722 fout << std::endl;
723 fout << "//_______________________________________________________________________" << std::endl;
724 fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl;
725 fout << "{" << std::endl;
726 fout << " // Gauss/Uniform transformation, initialisation" << std::endl;
727 fout << " nvar=" << nvar << ";" << std::endl;
728 for (UInt_t icls=0; icls<numDist; icls++) {
729 for (UInt_t ivar=0; ivar<nvar; ivar++) {
730 Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
731 fout << " nbins["<<ivar<<"]["<<icls<<"]="<<nbin<<";"<<std::endl;
732 }
733 }
734
735 // fill meat here
736 // loop over nvar , cls, loop over nBins
737 // fill cumulativeDist with fCumulativePDF[ivar][cls])->GetValue(vec(ivar)
738 for (UInt_t icls=0; icls<numDist; icls++) {
739 for (UInt_t ivar=0; ivar<nvar; ivar++) {
740 // Int_t idx = 0;
741 try{
742 // idx = fGet.at(ivar).second;
743 Char_t type = fGet.at(ivar).first;
744 if( type != 'v' ){
745 Log() << kWARNING << "MakeClass for the Gauss transformation works only for the transformation of variables. The transformation of targets/spectators is not implemented." << Endl;
746 }
747 }catch( std::out_of_range &){
748 Log() << kWARNING << "MakeClass for the Gauss transformation searched for a non existing variable index (" << ivar << ")" << Endl;
749 }
750
751 // Double_t xmn=Variables()[idx].GetMin();
752 // Double_t xmx=Variables()[idx].GetMax();
753 Double_t xmn = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[0];
754 Double_t xmx = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[(fCumulativePDF[ivar][icls])->GetGraph()->GetN()-1];
755
756 fout << " xMin["<<ivar<<"]["<<icls<<"]="<< gTools().StringFromDouble(xmn)<<";"<<std::endl;
757 fout << " xMax["<<ivar<<"]["<<icls<<"]="<<gTools().StringFromDouble(xmx)<<";"<<std::endl;
758 for (Int_t ibin=0; ibin<(fCumulativePDF[ivar][icls])->GetGraph()->GetN(); ibin++) {
759 fout << " cumulativeDist[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetY()[ibin])<< ";"<<std::endl;
760 fout << " X[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetX()[ibin])<< ";"<<std::endl;
761
762 }
763 }
764 }
765 fout << "}" << std::endl;
766 fout << std::endl;
767 fout << "//_______________________________________________________________________" << std::endl;
768 fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int clsIn) const" << std::endl;
769 fout << "{" << std::endl;
770 fout << " // Gauss/Uniform transformation" << std::endl;
771 fout << " int cls=clsIn;" << std::endl;
772 fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl;
773 fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl;
774 fout << " else cls = "<<(fCumulativePDF[0].size()==1?0:2)<<";"<< std::endl;
775 fout << " }"<< std::endl;
776
777 fout << " // copy the variables which are going to be transformed "<< std::endl;
779 fout << " static std::vector<double> dv; "<< std::endl;
780 fout << " dv.resize(nvar); "<< std::endl;
781 fout << " for (int ivar=0; ivar<nvar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)]; "<< std::endl;
782 fout << " "<< std::endl;
783 fout << " bool FlatNotGauss = "<< (fFlatNotGauss? "true": "false") <<"; "<< std::endl;
784 fout << " double cumulant; "<< std::endl;
785 fout << " //const int nvar = "<<nvar<<"; "<< std::endl;
786 fout << " for (int ivar=0; ivar<nvar; ivar++) { "<< std::endl;
787 fout << " int nbin = nbins[ivar][cls]; "<< std::endl;
788 fout << " int ibin=0; "<< std::endl;
789 fout << " while (dv[ivar] > X[ivar][cls][ibin]) ibin++; "<< std::endl;
790 fout << " "<< std::endl;
791 fout << " if (ibin<0) { ibin=0;} "<< std::endl;
792 fout << " if (ibin>=nbin) { ibin=nbin-1;} "<< std::endl;
793 fout << " int nextbin = ibin; "<< std::endl;
794 fout << " if ((dv[ivar] > X[ivar][cls][ibin] && ibin !=nbin-1) || ibin==0) "<< std::endl;
795 fout << " nextbin++; "<< std::endl;
796 fout << " else "<< std::endl;
797 fout << " nextbin--; "<< std::endl;
798 fout << " "<< std::endl;
799 fout << " double dx = X[ivar][cls][ibin]- X[ivar][cls][nextbin]; "<< std::endl;
800 fout << " double dy = cumulativeDist[ivar][cls][ibin] - cumulativeDist[ivar][cls][nextbin]; "<< std::endl;
801 fout << " cumulant = cumulativeDist[ivar][cls][ibin] + (dv[ivar] - X[ivar][cls][ibin])* dy/dx;"<< std::endl;
802 fout << " "<< std::endl;
803 fout << " "<< std::endl;
804 fout << " if (cumulant>1.-10e-10) cumulant = 1.-10e-10; "<< std::endl;
805 fout << " if (cumulant<10e-10) cumulant = 10e-10; "<< std::endl;
806 fout << " if (FlatNotGauss) dv[ivar] = cumulant; "<< std::endl;
807 fout << " else { "<< std::endl;
808 fout << " double maxErfInvArgRange = 0.99999999; "<< std::endl;
809 fout << " double arg = 2.0*cumulant - 1.0; "<< std::endl;
810 fout << " if (arg > maxErfInvArgRange) arg= maxErfInvArgRange; "<< std::endl;
811 fout << " if (arg < -maxErfInvArgRange) arg=-maxErfInvArgRange; "<< std::endl;
812 fout << " double inverf=0., stp=1. ; "<< std::endl;
813 fout << " while (stp >1.e-10){; "<< std::endl;
814 fout << " if (erf(inverf)>arg) inverf -=stp ; "<< std::endl;
815 fout << " else if (erf(inverf)<=arg && erf(inverf+stp)>=arg) stp=stp/5. ; "<< std::endl;
816 fout << " else inverf += stp; "<< std::endl;
817 fout << " } ; "<< std::endl;
818 fout << " //dv[ivar] = 1.414213562*TMath::ErfInverse(arg); "<< std::endl;
819 fout << " dv[ivar] = 1.414213562* inverf; "<< std::endl;
820 fout << " } "<< std::endl;
821 fout << " } "<< std::endl;
822 fout << " // copy the transformed variables back "<< std::endl;
823 fout << " for (int ivar=0; ivar<nvar; ivar++) iv[indicesPut.at(ivar)] = dv[ivar]; "<< std::endl;
824 fout << "} "<< std::endl;
825 }
826}
#define h(i)
Definition RSha256.hxx:106
#define e(i)
Definition RSha256.hxx:103
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
char Char_t
Character 1 byte (char)
Definition RtypesCore.h:51
float Float_t
Float 4 bytes (float)
Definition RtypesCore.h:71
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
static unsigned int total
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char x1
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char y1
Double_t GetBinContent(Int_t bin) const override
#define TMVA_VERSION(a, b, c)
Definition Version.h:48
const_iterator begin() const
const_iterator end() const
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:879
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:109
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition TH1.cxx:1263
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition TH1.cxx:741
Class that contains all the data information.
Definition DataSetInfo.h:62
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition PDF.h:63
@ kSpline1
Definition PDF.h:70
@ kSpline0
Definition PDF.h:70
TString StringFromDouble(Double_t d)
string tools
Definition Tools.cxx:1233
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1150
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347
void * AddChild(void *parent, const char *childname, const char *content=nullptr, bool isRootNode=false)
add child node
Definition Tools.cxx:1124
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1162
Singleton class for Global types used by TMVA.
Definition Types.h:71
VariableGaussTransform(DataSetInfo &dsi, TString strcor="")
constructor can only be applied one after the other when they are created.
void ReadFromXML(void *trfnode) override
Read the transformation matrices from the xml node.
const Event * InverseTransform(const Event *const, Int_t cls) const override
apply the inverse Gauss or inverse uniform transformation
void AttachXMLTo(void *parent) override
create XML description of Gauss transformation
const Event * Transform(const Event *const, Int_t cls) const override
apply the Gauss transformation
Bool_t PrepareTransformation(const std::vector< Event * > &) override
calculate the cumulative distributions
virtual ~VariableGaussTransform(void)
destructor
void WriteTransformationToStream(std::ostream &) const override
void GetCumulativeDist(const std::vector< Event * > &)
fill the cumulative distributions
void ReadTransformationFromStream(std::istream &, const TString &) override
Read the cumulative distribution.
void PrintTransformation(std::ostream &o) override
prints the transformation
void CleanUpCumulativeArrays(TString opt="ALL")
clean up of cumulative arrays
void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls) override
creates the transformation function
Double_t OldCumulant(Float_t x, TH1 *h) const
Linear interpolation class.
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)=0
getinput and setoutput equivalent
virtual void ReadFromXML(void *trfnode)=0
Read the input variables from the XML node.
virtual void AttachXMLTo(void *parent)=0
create XML description the transformation (write out info of selected variables)
Basic string class.
Definition TString.h:138
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2384
Double_t x[n]
Definition legend1.C:17
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Definition TMathBase.h:251
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition TMath.cxx:190
Double_t ErfInverse(Double_t x)
Returns the inverse error function.
Definition TMath.cxx:208
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Definition TMathBase.h:199
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2339
static void output()