Logo ROOT  
Reference Guide
VariableGaussTransform.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Eckhard v. Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : VariableGaussTransform *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Eckhard v. Toerne <evt@uni-bonn.de> - Uni Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2005-2011: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * U. of Bonn, Germany *
24  * *
25  * Redistribution and use in source and binary forms, with or without *
26  * modification, are permitted according to the terms listed in LICENSE *
27  * (http://tmva.sourceforge.net/LICENSE) *
28  **********************************************************************************/
29 
30 /*! \class TMVA::VariableGaussTransform
31 \ingroup TMVA
32 Gaussian Transformation of input variables.
33 */
34 
36 
37 #include "TMVA/DataSetInfo.h"
38 #include "TMVA/MsgLogger.h"
39 #include "TMVA/PDF.h"
40 #include "TMVA/Tools.h"
41 #include "TMVA/Types.h"
42 #include "TMVA/Version.h"
43 
44 #include "TH1F.h"
45 #include "TMath.h"
46 #include "TVectorF.h"
47 #include "TVectorD.h"
48 
49 #include <exception>
50 #include <iostream>
51 #include <list>
52 #include <limits>
53 #include <stdexcept>
54 
56 
57 ////////////////////////////////////////////////////////////////////////////////
58 /// constructor
59 /// can only be applied one after the other when they are created. But in order to
60 /// determine the Gauss transformation
61 
63 : VariableTransformBase( dsi, Types::kGauss, "Gauss" ),
64  fFlatNotGauss(kFALSE),
65  fPdfMinSmooth(0),
66  fPdfMaxSmooth(0),
67  fElementsperbin(0)
68 {
69  if (strcor=="Uniform") {fFlatNotGauss = kTRUE;
70  SetName("Uniform");
71  }
72 }
73 
74 ////////////////////////////////////////////////////////////////////////////////
75 /// destructor
76 
78 {
79  CleanUpCumulativeArrays();
80 }
81 
82 ////////////////////////////////////////////////////////////////////////////////
83 
85 {
86 }
87 
88 ////////////////////////////////////////////////////////////////////////////////
89 /// calculate the cumulative distributions
90 
92 {
93  Initialize();
94 
95  if (!IsEnabled() || IsCreated()) return kTRUE;
96 
97  Log() << kINFO << "Preparing the Gaussian transformation..." << Endl;
98 
99  UInt_t inputSize = fGet.size();
100  SetNVariables(inputSize);
101 
102  if (inputSize > 200) {
103  Log() << kWARNING << "----------------------------------------------------------------------------"
104  << Endl;
105  Log() << kWARNING
106  << ": More than 200 variables, I hope you have enough memory!!!!" << Endl;
107  Log() << kWARNING << "----------------------------------------------------------------------------"
108  << Endl;
109  // return kFALSE;
110  }
111 
112  GetCumulativeDist( events );
113 
114  SetCreated( kTRUE );
115 
116  return kTRUE;
117 }
118 
119 ////////////////////////////////////////////////////////////////////////////////
120 /// apply the Gauss transformation
121 
123 {
124  if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
125  //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
126  //EVT if (cls <0 || cls > GetNClasses() ) {
127  //EVT cls = GetNClasses();
128  //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
129  //EVT}
130  if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
131  //EVT workaround end
132 
133  // get the variable vector of the current event
134  UInt_t inputSize = fGet.size();
135 
136  std::vector<Float_t> input(0);
137  std::vector<Float_t> output(0);
138 
139  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
140  GetInput( ev, input, mask );
141 
142  std::vector<Char_t>::iterator itMask = mask.begin();
143 
144  // TVectorD vec( inputSize );
145  // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
146  Double_t cumulant;
147  //transformation
148  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
149 
150  if ( (*itMask) ){
151  ++itMask;
152  continue;
153  }
154 
155  if (0 != fCumulativePDF[ivar][cls]) {
156  // first make it flat
157  if(fTMVAVersion>TMVA_VERSION(3,9,7))
158  cumulant = (fCumulativePDF[ivar][cls])->GetVal(input.at(ivar));
159  else
160  cumulant = OldCumulant(input.at(ivar), fCumulativePDF[ivar][cls]->GetOriginalHist() );
161  cumulant = TMath::Min(cumulant,1.-10e-10);
162  cumulant = TMath::Max(cumulant,0.+10e-10);
163 
164  if (fFlatNotGauss)
165  output.push_back( cumulant );
166  else {
167  // sanity correction for out-of-range values
168  Double_t maxErfInvArgRange = 0.99999999;
169  Double_t arg = 2.0*cumulant - 1.0;
170  arg = TMath::Min(+maxErfInvArgRange,arg);
171  arg = TMath::Max(-maxErfInvArgRange,arg);
172 
173  output.push_back( 1.414213562*TMath::ErfInverse(arg) );
174  }
175  }
176  }
177 
178  if (fTransformedEvent==0 || fTransformedEvent->GetNVariables()!=ev->GetNVariables()) {
179  if (fTransformedEvent!=0) { delete fTransformedEvent; fTransformedEvent = 0; }
180  fTransformedEvent = new Event();
181  }
182 
183  SetOutput( fTransformedEvent, output, mask, ev );
184 
185  return fTransformedEvent;
186 }
187 
188 ////////////////////////////////////////////////////////////////////////////////
189 /// apply the inverse Gauss or inverse uniform transformation
190 
192 {
193  if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
194  //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
195  //EVT if (cls <0 || cls > GetNClasses() ) {
196  //EVT cls = GetNClasses();
197  //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
198  //EVT}
199  if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
200  //EVT workaround end
201 
202  // get the variable vector of the current event
203  UInt_t inputSize = fGet.size();
204 
205  std::vector<Float_t> input(0);
206  std::vector<Float_t> output(0);
207 
208  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
209  GetInput( ev, input, mask, kTRUE );
210 
211  std::vector<Char_t>::iterator itMask = mask.begin();
212 
213  // TVectorD vec( inputSize );
214  // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
215  Double_t invCumulant;
216  //transformation
217  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
218 
219  if ( (*itMask) ){
220  ++itMask;
221  continue;
222  }
223 
224  if (0 != fCumulativePDF[ivar][cls]) {
225  invCumulant = input.at(ivar);
226 
227  // first de-gauss ist if gaussianized
228  if (!fFlatNotGauss)
229  invCumulant = (TMath::Erf(invCumulant/1.414213562)+1)/2.f;
230 
231  // then de-uniform the values
232  if(fTMVAVersion>TMVA_VERSION(4,0,0))
233  invCumulant = (fCumulativePDF[ivar][cls])->GetValInverse(invCumulant,kTRUE);
234  else
235  Log() << kFATAL << "Inverse Uniform/Gauss transformation not implemented for TMVA versions before 4.1.0" << Endl;
236 
237  output.push_back(invCumulant);
238  }
239  }
240 
241  if (fBackTransformedEvent==0) fBackTransformedEvent = new Event( *ev );
242 
243  SetOutput( fBackTransformedEvent, output, mask, ev, kTRUE );
244 
245  return fBackTransformedEvent;
246 }
247 
248 ////////////////////////////////////////////////////////////////////////////////
249 /// fill the cumulative distributions
250 
251 void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector< Event*>& events )
252 {
253  const UInt_t inputSize = fGet.size();
254  // const UInt_t nCls = GetNClasses();
255 
256  // const UInt_t nvar = GetNVariables();
257  UInt_t nevt = events.size();
258 
259  const UInt_t nClasses = GetNClasses();
260  UInt_t numDist = nClasses+1; // calculate cumulative distributions for all "event" classes separately + one where all classes are treated (added) together
261 
262  if (GetNClasses() == 1 ) numDist = nClasses; // for regression, if there is only one class, there is no "sum" of classes, hence
263 
264  UInt_t **nbins = new UInt_t*[numDist];
265 
266  std::list< TMVA::TMVAGaussPair > **listsForBinning = new std::list<TMVA::TMVAGaussPair>* [numDist];
267  std::vector< Float_t > **vsForBinning = new std::vector<Float_t>* [numDist];
268  for (UInt_t i=0; i < numDist; i++) {
269  listsForBinning[i] = new std::list<TMVA::TMVAGaussPair> [inputSize];
270  vsForBinning[i] = new std::vector<Float_t> [inputSize];
271  nbins[i] = new UInt_t[inputSize]; // nbins[0] = number of bins for signal distributions. It depends on the number of entries, thus it's the same for all the input variables, but it isn't necessary for some "weird" reason.
272  }
273 
274  std::vector<Float_t> input;
275  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
276 
277  // perform event loop
278  Float_t *sumOfWeights = new Float_t[numDist];
279  Float_t *minWeight = new Float_t[numDist];
280  Float_t *maxWeight = new Float_t[numDist];
281  for (UInt_t i=0; i<numDist; i++) {
282  sumOfWeights[i]=0;
283  minWeight[i]=10E10; // TODO: change this to std::max ?
284  maxWeight[i]=0; // QUESTION: wouldn't there be negative events possible?
285  }
286  for (UInt_t ievt=0; ievt < nevt; ievt++) {
287  const Event* ev= events[ievt];
288  Int_t cls = ev->GetClass();
289  Float_t eventWeight = ev->GetWeight();
290  sumOfWeights[cls] += eventWeight;
291  if (minWeight[cls] > eventWeight) minWeight[cls]=eventWeight;
292  if (maxWeight[cls] < eventWeight) maxWeight[cls]=eventWeight;
293  if (numDist>1) sumOfWeights[numDist-1] += eventWeight;
294 
295  Bool_t hasMaskedEntries = GetInput( ev, input, mask );
296  if( hasMaskedEntries ){
297  Log() << kWARNING << "Incomplete event" << Endl;
298  std::ostringstream oss;
299  ev->Print(oss);
300  Log() << oss.str();
301  Log() << kFATAL << "Targets or variables masked by transformation. Apparently (a) value(s) is/are missing in this event." << Endl;
302  }
303 
304 
305  Int_t ivar = 0;
306  for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
307  Float_t value = (*itInput);
308  listsForBinning[cls][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
309  if (numDist>1)listsForBinning[numDist-1][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
310  ++ivar;
311  }
312  }
313  if (numDist > 1) {
314  for (UInt_t icl=0; icl<numDist-1; icl++){
315  minWeight[numDist-1] = TMath::Min(minWeight[icl],minWeight[numDist-1]);
316  maxWeight[numDist-1] = TMath::Max(maxWeight[icl],maxWeight[numDist-1]);
317  }
318  }
319 
320  // Sorting the lists, getting nbins ...
321  const UInt_t nevmin=10; // minimum number of events per bin (to make sure we get reasonable distributions)
322  const UInt_t nbinsmax=2000; // maximum number of bins
323 
324  for (UInt_t icl=0; icl< numDist; icl++){
325  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
326  listsForBinning[icl][ivar].sort();
327  std::list< TMVA::TMVAGaussPair >::iterator it;
328  Float_t sumPerBin = sumOfWeights[icl]/nbinsmax;
329  sumPerBin=TMath::Max(minWeight[icl]*nevmin,sumPerBin);
330  Float_t sum=0;
331  Float_t ev_value=listsForBinning[icl][ivar].begin()->GetValue();
332  Float_t lastev_value=ev_value;
333  const Float_t eps = 1.e-4;
334  vsForBinning[icl][ivar].push_back(ev_value-eps);
335  vsForBinning[icl][ivar].push_back(ev_value);
336 
337  for (it=listsForBinning[icl][ivar].begin(); it != listsForBinning[icl][ivar].end(); ++it){
338  sum+= it->GetWeight();
339  if (sum >= sumPerBin) {
340  ev_value=it->GetValue();
341  if (ev_value>lastev_value) { // protection against bin width of 0
342  vsForBinning[icl][ivar].push_back(ev_value);
343  sum = 0.;
344  lastev_value=ev_value;
345  }
346  }
347  }
348  if (sum!=0) vsForBinning[icl][ivar].push_back(listsForBinning[icl][ivar].back().GetValue());
349  nbins[icl][ivar] = vsForBinning[icl][ivar].size();
350  }
351  }
352 
353  delete[] sumOfWeights;
354  delete[] minWeight;
355  delete[] maxWeight;
356 
357  // create histogram for the cumulative distribution.
358  fCumulativeDist.resize(inputSize);
359  for (UInt_t icls = 0; icls < numDist; icls++) {
360  for (UInt_t ivar=0; ivar < inputSize; ivar++){
361  Float_t* binnings = new Float_t[nbins[icls][ivar]];
362  //the binning for this particular histogram:
363  for (UInt_t k =0 ; k < nbins[icls][ivar]; k++){
364  binnings[k] = vsForBinning[icls][ivar][k];
365  }
366  fCumulativeDist[ivar].resize(numDist);
367  if (0 != fCumulativeDist[ivar][icls] ) {
368  delete fCumulativeDist[ivar][icls];
369  }
370  fCumulativeDist[ivar][icls] = new TH1F(Form("Cumulative_Var%d_cls%d",ivar,icls),
371  Form("Cumulative_Var%d_cls%d",ivar,icls),
372  nbins[icls][ivar] -1, // class icls
373  binnings);
374  fCumulativeDist[ivar][icls]->SetDirectory(0);
375  delete [] binnings;
376  }
377  }
378 
379  // Deallocation
380  for (UInt_t i=0; i<numDist; i++) {
381  delete [] listsForBinning[numDist-i-1];
382  delete [] vsForBinning[numDist-i-1];
383  delete [] nbins[numDist-i-1];
384  }
385  delete [] listsForBinning;
386  delete [] vsForBinning;
387  delete [] nbins;
388 
389  // perform event loop
390  std::vector<Int_t> ic(numDist);
391  for (UInt_t ievt=0; ievt<nevt; ievt++) {
392 
393  const Event* ev= events[ievt];
394  Int_t cls = ev->GetClass();
395  Float_t eventWeight = ev->GetWeight();
396 
397  GetInput( ev, input, mask );
398 
399  Int_t ivar = 0;
400  for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
401  Float_t value = (*itInput);
402  fCumulativeDist[ivar][cls]->Fill(value,eventWeight);
403  if (numDist>1) fCumulativeDist[ivar][numDist-1]->Fill(value,eventWeight);
404 
405  ++ivar;
406  }
407  }
408 
409  // clean up
410  CleanUpCumulativeArrays("PDF");
411 
412  // now sum up in order to get the real cumulative distribution
413  Double_t sum = 0, total=0;
414  fCumulativePDF.resize(inputSize);
415  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
416  // fCumulativePDF.resize(ivar+1);
417  for (UInt_t icls=0; icls<numDist; icls++) {
418  (fCumulativeDist[ivar][icls])->Smooth();
419  sum = 0;
420  total = 0.;
421  for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
422  Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
423  if (val>0) total += val;
424  }
425  for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
426  Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
427  if (val>0) sum += val;
428  (fCumulativeDist[ivar][icls])->SetBinContent(ibin,sum/total);
429  }
430  // create PDf
431  fCumulativePDF[ivar].push_back(new PDF( Form("GaussTransform var%d cls%d",ivar,icls), fCumulativeDist[ivar][icls], PDF::kSpline1, fPdfMinSmooth, fPdfMaxSmooth,kFALSE,kFALSE));
432  }
433  }
434 }
435 
436 ////////////////////////////////////////////////////////////////////////////////
437 
439 {
440  Log() << kFATAL << "VariableGaussTransform::WriteTransformationToStream is obsolete" << Endl;
441 }
442 
443 ////////////////////////////////////////////////////////////////////////////////
444 /// clean up of cumulative arrays
445 
447  if (opt == "ALL" || opt == "PDF"){
448  for (UInt_t ivar=0; ivar<fCumulativePDF.size(); ivar++) {
449  for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++) {
450  if (0 != fCumulativePDF[ivar][icls]) delete fCumulativePDF[ivar][icls];
451  }
452  }
453  fCumulativePDF.clear();
454  }
455  if (opt == "ALL" || opt == "Dist"){
456  for (UInt_t ivar=0; ivar<fCumulativeDist.size(); ivar++) {
457  for (UInt_t icls=0; icls<fCumulativeDist[ivar].size(); icls++) {
458  if (0 != fCumulativeDist[ivar][icls]) delete fCumulativeDist[ivar][icls];
459  }
460  }
461  fCumulativeDist.clear();
462  }
463 }
464 ////////////////////////////////////////////////////////////////////////////////
465 /// create XML description of Gauss transformation
466 
468  void* trfxml = gTools().AddChild(parent, "Transform");
469  gTools().AddAttr(trfxml, "Name", "Gauss");
470  gTools().AddAttr(trfxml, "FlatOrGauss", (fFlatNotGauss?"Flat":"Gauss") );
471 
473 
474  UInt_t nvar = fGet.size();
475  for (UInt_t ivar=0; ivar<nvar; ivar++) {
476  void* varxml = gTools().AddChild( trfxml, "Variable");
477  // gTools().AddAttr( varxml, "Name", Variables()[ivar].GetLabel() );
478  gTools().AddAttr( varxml, "VarIndex", ivar );
479 
480  if ( fCumulativePDF[ivar][0]==0 ||
481  (fCumulativePDF[ivar].size()>1 && fCumulativePDF[ivar][1]==0 ))
482  Log() << kFATAL << "Cumulative histograms for variable " << ivar << " don't exist, can't write it to weight file" << Endl;
483 
484  for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++){
485  void* pdfxml = gTools().AddChild( varxml, Form("CumulativePDF_cls%d",icls));
486  (fCumulativePDF[ivar][icls])->AddXMLTo(pdfxml);
487  }
488  }
489 }
490 
491 ////////////////////////////////////////////////////////////////////////////////
492 /// Read the transformation matrices from the xml node
493 
495  // clean up first
496  CleanUpCumulativeArrays();
497  TString FlatOrGauss;
498 
499  gTools().ReadAttr(trfnode, "FlatOrGauss", FlatOrGauss );
500 
501  if (FlatOrGauss == "Flat") fFlatNotGauss = kTRUE;
502  else fFlatNotGauss = kFALSE;
503 
504  Bool_t newFormat = kFALSE;
505 
506  void* inpnode = NULL;
507 
508  inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format
509  if( inpnode!=NULL )
510  newFormat = kTRUE; // new xml format
511 
512  void* varnode = NULL;
513  if( newFormat ){
514  // ------------- new format --------------------
515  // read input
517 
518  varnode = gTools().GetNextChild(inpnode);
519  }else
520  varnode = gTools().GetChild(trfnode);
521 
522  // Read the cumulative distribution
523 
524  TString varname, histname, classname;
525  UInt_t ivar;
526  while(varnode) {
527  if( gTools().HasAttr(varnode,"Name") )
528  gTools().ReadAttr(varnode, "Name", varname);
529  gTools().ReadAttr(varnode, "VarIndex", ivar);
530 
531  void* clsnode = gTools().GetChild( varnode);
532 
533  while(clsnode) {
534  void* pdfnode = gTools().GetChild( clsnode);
535  PDF* pdfToRead = new PDF(TString("tempName"),kFALSE);
536  pdfToRead->ReadXML(pdfnode); // pdfnode
537  // push_back PDF
538  fCumulativePDF.resize( ivar+1 );
539  fCumulativePDF[ivar].push_back(pdfToRead);
540  clsnode = gTools().GetNextChild(clsnode);
541  }
542 
543  varnode = gTools().GetNextChild(varnode);
544  }
545  SetCreated();
546 }
547 
548 ////////////////////////////////////////////////////////////////////////////////
549 /// Read the cumulative distribution
550 
551 void TMVA::VariableGaussTransform::ReadTransformationFromStream( std::istream& istr, const TString& classname)
552 {
553  Bool_t addDirStatus = TH1::AddDirectoryStatus();
554  TH1::AddDirectory(0); // this avoids the binding of the hists in TMVA::PDF to the current ROOT file
555  char buf[512];
556  istr.getline(buf,512);
557 
558  TString strvar, dummy;
559 
560  while (!(buf[0]=='#'&& buf[1]=='#')) { // if line starts with ## return
561  char* p = buf;
562  while (*p==' ' || *p=='\t') p++; // 'remove' leading whitespace
563  if (*p=='#' || *p=='\0') {
564  istr.getline(buf,512);
565  continue; // if comment or empty line, read the next line
566  }
567  std::stringstream sstr(buf);
568  sstr >> strvar;
569 
570  if (strvar=="CumulativeHistogram") {
571  UInt_t type(0), ivar(0);
572  TString devnullS(""),hname("");
573  Int_t nbins(0);
574 
575  // coverity[tainted_data_argument]
576  sstr >> type >> ivar >> hname >> nbins >> fElementsperbin;
577 
578  Float_t *Binnings = new Float_t[nbins+1];
579  Float_t val;
580  istr >> devnullS; // read the line "BinBoundaries" ..
581  for (Int_t ibin=0; ibin<nbins+1; ibin++) {
582  istr >> val;
583  Binnings[ibin]=val;
584  }
585 
586  if(ivar>=fCumulativeDist.size()) fCumulativeDist.resize(ivar+1);
587  if(type>=fCumulativeDist[ivar].size()) fCumulativeDist[ivar].resize(type+1);
588 
589  TH1F * histToRead = fCumulativeDist[ivar][type];
590  if ( histToRead !=0 ) delete histToRead;
591  // recreate the cumulative histogram to be filled with the values read
592  histToRead = new TH1F( hname, hname, nbins, Binnings );
593  histToRead->SetDirectory(0);
594  fCumulativeDist[ivar][type]=histToRead;
595 
596  istr >> devnullS; // read the line "BinContent" ..
597  for (Int_t ibin=0; ibin<nbins; ibin++) {
598  istr >> val;
599  histToRead->SetBinContent(ibin+1,val);
600  }
601 
602  PDF* pdf = new PDF(hname,histToRead,PDF::kSpline0, 0, 0, kFALSE, kFALSE);
603  // push_back PDF
604  fCumulativePDF.resize(ivar+1);
605  fCumulativePDF[ivar].resize(type+1);
606  fCumulativePDF[ivar][type] = pdf;
607  delete [] Binnings;
608  }
609 
610  // if (strvar=="TransformToFlatInsetadOfGauss=") { // don't correct this spelling mistake
611  if (strvar=="Uniform") { // don't correct this spelling mistake
612  sstr >> fFlatNotGauss;
613  istr.getline(buf,512);
614  break;
615  }
616 
617  istr.getline(buf,512); // reading the next line
618  }
619  TH1::AddDirectory(addDirStatus);
620 
621  UInt_t classIdx=(classname=="signal")?0:1;
622  for(UInt_t ivar=0; ivar<fCumulativePDF.size(); ++ivar) {
623  PDF* src = fCumulativePDF[ivar][classIdx];
624  fCumulativePDF[ivar].push_back(new PDF(src->GetName(),fCumulativeDist[ivar][classIdx],PDF::kSpline0, 0, 0, kFALSE, kFALSE) );
625  }
626 
627  SetTMVAVersion(TMVA_VERSION(3,9,7));
628 
629  SetCreated();
630 }
631 
632 ////////////////////////////////////////////////////////////////////////////////
633 
635 
636  Int_t bin = h->FindBin(x);
637  bin = TMath::Max(bin,1);
638  bin = TMath::Min(bin,h->GetNbinsX());
639 
640  Double_t cumulant;
641  Double_t x0, x1, y0, y1;
642  Double_t total = h->GetNbinsX()*fElementsperbin;
643  Double_t supmin = 0.5/total;
644 
645  x0 = h->GetBinLowEdge(TMath::Max(bin,1));
646  x1 = h->GetBinLowEdge(TMath::Min(bin,h->GetNbinsX())+1);
647 
648  y0 = h->GetBinContent(TMath::Max(bin-1,0)); // Y0 = F(x0); Y0 >= 0
649  y1 = h->GetBinContent(TMath::Min(bin, h->GetNbinsX()+1)); // Y1 = F(x1); Y1 <= 1
650 
651  if (bin == 0) {
652  y0 = supmin;
653  y1 = supmin;
654  }
655  if (bin == 1) {
656  y0 = supmin;
657  }
658  if (bin > h->GetNbinsX()) {
659  y0 = 1.-supmin;
660  y1 = 1.-supmin;
661  }
662  if (bin == h->GetNbinsX()) {
663  y1 = 1.-supmin;
664  }
665 
666  if (x0 == x1) {
667  cumulant = y1;
668  } else {
669  cumulant = y0 + (y1-y0)*(x-x0)/(x1-x0);
670  }
671 
672  if (x <= h->GetBinLowEdge(1)){
673  cumulant = supmin;
674  }
675  if (x >= h->GetBinLowEdge(h->GetNbinsX()+1)){
676  cumulant = 1-supmin;
677  }
678  return cumulant;
679 }
680 
681 ////////////////////////////////////////////////////////////////////////////////
682 /// prints the transformation
683 
685 {
686  Int_t cls = 0;
687  Log() << kINFO << "I do not know yet how to print this... look in the weight file " << cls << ":" << Endl;
688  cls++;
689 }
690 
691 ////////////////////////////////////////////////////////////////////////////////
692 /// creates the transformation function
693 ///
694 
695 void TMVA::VariableGaussTransform::MakeFunction( std::ostream& fout, const TString& fcncName,
696  Int_t part, UInt_t trCounter, Int_t )
697 {
698  const UInt_t nvar = fGet.size();
699  UInt_t numDist = GetNClasses() + 1;
700  Int_t nBins = -1;
701  for (UInt_t icls=0; icls<numDist; icls++) {
702  for (UInt_t ivar=0; ivar<nvar; ivar++) {
703  Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
704  if (nbin > nBins) nBins=nbin;
705  }
706  }
707 
708  // creates the gauss transformation function
709  if (part==1) {
710  fout << std::endl;
711  fout << " int nvar;" << std::endl;
712  fout << std::endl;
713  // declare variables
714  fout << " double cumulativeDist["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
715  fout << " double X["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
716  fout << " double xMin["<<nvar<<"]["<<numDist<<"];"<<std::endl;
717  fout << " double xMax["<<nvar<<"]["<<numDist<<"];"<<std::endl;
718  fout << " int nbins["<<nvar<<"]["<<numDist<<"];"<<std::endl;
719  }
720  if (part==2) {
721  fout << std::endl;
722  fout << "#include \"math.h\"" << std::endl;
723  fout << std::endl;
724  fout << "//_______________________________________________________________________" << std::endl;
725  fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl;
726  fout << "{" << std::endl;
727  fout << " // Gauss/Uniform transformation, initialisation" << std::endl;
728  fout << " nvar=" << nvar << ";" << std::endl;
729  for (UInt_t icls=0; icls<numDist; icls++) {
730  for (UInt_t ivar=0; ivar<nvar; ivar++) {
731  Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
732  fout << " nbins["<<ivar<<"]["<<icls<<"]="<<nbin<<";"<<std::endl;
733  }
734  }
735 
736  // fill meat here
737  // loop over nvar , cls, loop over nBins
738  // fill cumulativeDist with fCumulativePDF[ivar][cls])->GetValue(vec(ivar)
739  for (UInt_t icls=0; icls<numDist; icls++) {
740  for (UInt_t ivar=0; ivar<nvar; ivar++) {
741  // Int_t idx = 0;
742  try{
743  // idx = fGet.at(ivar).second;
744  Char_t type = fGet.at(ivar).first;
745  if( type != 'v' ){
746  Log() << kWARNING << "MakeClass for the Gauss transformation works only for the transformation of variables. The transformation of targets/spectators is not implemented." << Endl;
747  }
748  }catch( std::out_of_range &){
749  Log() << kWARNING << "MakeClass for the Gauss transformation searched for a non existing variable index (" << ivar << ")" << Endl;
750  }
751 
752  // Double_t xmn=Variables()[idx].GetMin();
753  // Double_t xmx=Variables()[idx].GetMax();
754  Double_t xmn = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[0];
755  Double_t xmx = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[(fCumulativePDF[ivar][icls])->GetGraph()->GetN()-1];
756 
757  fout << " xMin["<<ivar<<"]["<<icls<<"]="<< gTools().StringFromDouble(xmn)<<";"<<std::endl;
758  fout << " xMax["<<ivar<<"]["<<icls<<"]="<<gTools().StringFromDouble(xmx)<<";"<<std::endl;
759  for (Int_t ibin=0; ibin<(fCumulativePDF[ivar][icls])->GetGraph()->GetN(); ibin++) {
760  fout << " cumulativeDist[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetY()[ibin])<< ";"<<std::endl;
761  fout << " X[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetX()[ibin])<< ";"<<std::endl;
762 
763  }
764  }
765  }
766  fout << "}" << std::endl;
767  fout << std::endl;
768  fout << "//_______________________________________________________________________" << std::endl;
769  fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int clsIn) const" << std::endl;
770  fout << "{" << std::endl;
771  fout << " // Gauss/Uniform transformation" << std::endl;
772  fout << " int cls=clsIn;" << std::endl;
773  fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl;
774  fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl;
775  fout << " else cls = "<<(fCumulativePDF[0].size()==1?0:2)<<";"<< std::endl;
776  fout << " }"<< std::endl;
777 
778  fout << " // copy the variables which are going to be transformed "<< std::endl;
779  VariableTransformBase::MakeFunction(fout, fcncName, 0, trCounter, 0 );
780  fout << " static std::vector<double> dv; "<< std::endl;
781  fout << " dv.resize(nvar); "<< std::endl;
782  fout << " for (int ivar=0; ivar<nvar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)]; "<< std::endl;
783  fout << " "<< std::endl;
784  fout << " bool FlatNotGauss = "<< (fFlatNotGauss? "true": "false") <<"; "<< std::endl;
785  fout << " double cumulant; "<< std::endl;
786  fout << " //const int nvar = "<<nvar<<"; "<< std::endl;
787  fout << " for (int ivar=0; ivar<nvar; ivar++) { "<< std::endl;
788  fout << " int nbin = nbins[ivar][cls]; "<< std::endl;
789  fout << " int ibin=0; "<< std::endl;
790  fout << " while (dv[ivar] > X[ivar][cls][ibin]) ibin++; "<< std::endl;
791  fout << " "<< std::endl;
792  fout << " if (ibin<0) { ibin=0;} "<< std::endl;
793  fout << " if (ibin>=nbin) { ibin=nbin-1;} "<< std::endl;
794  fout << " int nextbin = ibin; "<< std::endl;
795  fout << " if ((dv[ivar] > X[ivar][cls][ibin] && ibin !=nbin-1) || ibin==0) "<< std::endl;
796  fout << " nextbin++; "<< std::endl;
797  fout << " else "<< std::endl;
798  fout << " nextbin--; "<< std::endl;
799  fout << " "<< std::endl;
800  fout << " double dx = X[ivar][cls][ibin]- X[ivar][cls][nextbin]; "<< std::endl;
801  fout << " double dy = cumulativeDist[ivar][cls][ibin] - cumulativeDist[ivar][cls][nextbin]; "<< std::endl;
802  fout << " cumulant = cumulativeDist[ivar][cls][ibin] + (dv[ivar] - X[ivar][cls][ibin])* dy/dx;"<< std::endl;
803  fout << " "<< std::endl;
804  fout << " "<< std::endl;
805  fout << " if (cumulant>1.-10e-10) cumulant = 1.-10e-10; "<< std::endl;
806  fout << " if (cumulant<10e-10) cumulant = 10e-10; "<< std::endl;
807  fout << " if (FlatNotGauss) dv[ivar] = cumulant; "<< std::endl;
808  fout << " else { "<< std::endl;
809  fout << " double maxErfInvArgRange = 0.99999999; "<< std::endl;
810  fout << " double arg = 2.0*cumulant - 1.0; "<< std::endl;
811  fout << " if (arg > maxErfInvArgRange) arg= maxErfInvArgRange; "<< std::endl;
812  fout << " if (arg < -maxErfInvArgRange) arg=-maxErfInvArgRange; "<< std::endl;
813  fout << " double inverf=0., stp=1. ; "<< std::endl;
814  fout << " while (stp >1.e-10){; "<< std::endl;
815  fout << " if (erf(inverf)>arg) inverf -=stp ; "<< std::endl;
816  fout << " else if (erf(inverf)<=arg && erf(inverf+stp)>=arg) stp=stp/5. ; "<< std::endl;
817  fout << " else inverf += stp; "<< std::endl;
818  fout << " } ; "<< std::endl;
819  fout << " //dv[ivar] = 1.414213562*TMath::ErfInverse(arg); "<< std::endl;
820  fout << " dv[ivar] = 1.414213562* inverf; "<< std::endl;
821  fout << " } "<< std::endl;
822  fout << " } "<< std::endl;
823  fout << " // copy the transformed variables back "<< std::endl;
824  fout << " for (int ivar=0; ivar<nvar; ivar++) iv[indicesPut.at(ivar)] = dv[ivar]; "<< std::endl;
825  fout << "} "<< std::endl;
826  }
827 }
kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:91
TH1F.h
e
#define e(i)
Definition: RSha256.hxx:103
TMVA::Tools::GetChild
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
TVectorD.h
TMVA::VariableTransformBase::MakeFunction
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)=0
getinput and setoutput equivalent
Definition: VariableTransformBase.cxx:817
TMath::Max
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:212
TMVA::VariableTransformBase::AttachXMLTo
virtual void AttachXMLTo(void *parent)=0
create XML description the transformation (write out info of selected variables)
Definition: VariableTransformBase.cxx:620
DataSetInfo.h
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
Form
char * Form(const char *fmt,...)
TMVA::VariableGaussTransform::InverseTransform
virtual const Event * InverseTransform(const Event *const, Int_t cls) const
apply the inverse Gauss or inverse uniform transformation
Definition: VariableGaussTransform.cxx:191
TMVA::Event::GetClass
UInt_t GetClass() const
Definition: Event.h:86
TMVA::TMVAGaussPair
Definition: VariableGaussTransform.h:54
TMVA::PDF::ReadXML
void ReadXML(void *pdfnode)
XML file reading.
Definition: PDF.cxx:961
sum
static uint64_t sum(uint64_t i)
Definition: Factory.cxx:2345
TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:760
TMVA::VariableGaussTransform::Initialize
void Initialize()
Definition: VariableGaussTransform.cxx:84
output
static void output(int code)
Definition: gifencode.c:226
TMVA::Tools::AddChild
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1136
TMVA::VariableGaussTransform
Gaussian Transformation of input variables.
Definition: VariableGaussTransform.h:72
Float_t
float Float_t
Definition: RtypesCore.h:57
TH1::SetBinContent
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8988
x
Double_t x[n]
Definition: legend1.C:17
TMath::Erf
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition: TMath.cxx:184
TMVA::Event::Print
void Print(std::ostream &o) const
print method
Definition: Event.cxx:351
TMVA::DNN::EActivationFunction::kGauss
@ kGauss
TMVA::VariableTransformBase
Linear interpolation class.
Definition: VariableTransformBase.h:54
TString
Basic string class.
Definition: TString.h:136
bool
x1
static const double x1[5]
Definition: RooGaussKronrodIntegrator1D.cxx:346
Version.h
PDF.h
TMVA::VariableGaussTransform::PrepareTransformation
Bool_t PrepareTransformation(const std::vector< Event * > &)
calculate the cumulative distributions
Definition: VariableGaussTransform.cxx:91
TMVA::Tools::AddAttr
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:353
total
static unsigned int total
Definition: TGWin32ProxyDefs.h:40
TMVA::DataSetInfo
Class that contains all the data information.
Definition: DataSetInfo.h:62
MsgLogger.h
TMVA::VariableGaussTransform::OldCumulant
Double_t OldCumulant(Float_t x, TH1 *h) const
Definition: VariableGaussTransform.cxx:634
TMVA::VariableTransformBase::ReadFromXML
virtual void ReadFromXML(void *trfnode)=0
Read the input variables from the XML node.
Definition: VariableTransformBase.cxx:708
TMVA::VariableGaussTransform::AttachXMLTo
virtual void AttachXMLTo(void *parent)
create XML description of Gauss transformation
Definition: VariableGaussTransform.cxx:467
h
#define h(i)
Definition: RSha256.hxx:106
TMVA::VariableGaussTransform::PrintTransformation
virtual void PrintTransformation(std::ostream &o)
prints the transformation
Definition: VariableGaussTransform.cxx:684
TMVA::Event::GetNVariables
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:308
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:92
TMVA::VariableGaussTransform::WriteTransformationToStream
void WriteTransformationToStream(std::ostream &) const
Definition: VariableGaussTransform.cxx:438
TH1::AddDirectory
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1258
TString::TString
TString()
TString default ctor.
Definition: TString.cxx:87
TMVA::Tools::ReadAttr
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:335
TMVA::VariableTransformBase::SetName
void SetName(const TString &c)
Definition: VariableTransformBase.h:124
TMVA::PDF::GetName
const char * GetName() const
Returns name of object.
Definition: PDF.h:116
TMVA::VariableGaussTransform::ReadFromXML
virtual void ReadFromXML(void *trfnode)
Read the transformation matrices from the xml node.
Definition: VariableGaussTransform.cxx:494
TH1::SetDirectory
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8703
VariableGaussTransform.h
TMVA::VariableGaussTransform::ReadTransformationFromStream
void ReadTransformationFromStream(std::istream &, const TString &)
Read the cumulative distribution.
Definition: VariableGaussTransform.cxx:551
TMVA::Types
Singleton class for Global types used by TMVA.
Definition: Types.h:73
Types.h
TMath::Min
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:180
TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
unsigned int
TMVA::VariableGaussTransform::Transform
virtual const Event * Transform(const Event *const, Int_t cls) const
apply the Gauss transformation
Definition: VariableGaussTransform.cxx:122
TMVA::VariableGaussTransform::~VariableGaussTransform
virtual ~VariableGaussTransform(void)
destructor
Definition: VariableGaussTransform.cxx:77
TMVA::TMVAGlob::Initialize
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
TMVA::Tools::StringFromDouble
TString StringFromDouble(Double_t d)
string tools
Definition: Tools.cxx:1245
TMVA::PDF
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
TMVA::PDF::kSpline1
@ kSpline1
Definition: PDF.h:70
TMVA::VariableGaussTransform::GetCumulativeDist
void GetCumulativeDist(const std::vector< Event * > &)
fill the cumulative distributions
Definition: VariableGaussTransform.cxx:251
Double_t
double Double_t
Definition: RtypesCore.h:59
TMVA::Event::GetWeight
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition: Event.cxx:381
TMVA::Tools::GetNextChild
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
TH1F
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
TMVA::PDF::kSpline0
@ kSpline0
Definition: PDF.h:70
TMVA::Event
Definition: Event.h:51
TMVA_VERSION
#define TMVA_VERSION(a, b, c)
Definition: Version.h:48
TH1
TH1 is the base class of all histogramm classes in ROOT.
Definition: TH1.h:58
TVectorF.h
TMVA::VariableGaussTransform::MakeFunction
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)
creates the transformation function
Definition: VariableGaussTransform.cxx:695
TMath::ErfInverse
Double_t ErfInverse(Double_t x)
returns the inverse error function x must be <-1<x<1
Definition: TMath.cxx:203
Tools.h
type
int type
Definition: TGX11.cxx:121
TH1::AddDirectoryStatus
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition: TH1.cxx:738
TMVA::gTools
Tools & gTools()
Char_t
char Char_t
Definition: RtypesCore.h:33
TMVA::VariableGaussTransform::CleanUpCumulativeArrays
void CleanUpCumulativeArrays(TString opt="ALL")
clean up of cumulative arrays
Definition: VariableGaussTransform.cxx:446
TMVA::VariableGaussTransform::fFlatNotGauss
Bool_t fFlatNotGauss
Definition: VariableGaussTransform.h:98
TMath.h
TMVA::VariableGaussTransform::VariableGaussTransform
VariableGaussTransform(DataSetInfo &dsi, TString strcor="")
constructor can only be applied one after the other when they are created.
Definition: VariableGaussTransform.cxx:62
int