Logo ROOT   6.10/09
Reference Guide
VariableGaussTransform.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Eckhard v. Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : VariableGaussTransform *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Eckhard v. Toerne <evt@uni-bonn.de> - Uni Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2005-2011: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * U. of Bonn, Germany *
24  * *
25  * Redistribution and use in source and binary forms, with or without *
26  * modification, are permitted according to the terms listed in LICENSE *
27  * (http://tmva.sourceforge.net/LICENSE) *
28  **********************************************************************************/
29 
30 /*! \class TMVA::VariableGaussTransform
31 \ingroup TMVA
32 Gaussian Transformation of input variables.
33 */
34 
36 
37 #include "TMVA/DataSetInfo.h"
38 #include "TMVA/MsgLogger.h"
39 #include "TMVA/PDF.h"
40 #include "TMVA/Tools.h"
41 #include "TMVA/Types.h"
42 #include "TMVA/Version.h"
43 
44 #include "TCanvas.h"
45 #include "TGraph.h"
46 #include "TH1F.h"
47 #include "TMath.h"
48 #include "TVectorF.h"
49 #include "TVectorD.h"
50 
51 #include <exception>
52 #include <iostream>
53 #include <iomanip>
54 #include <list>
55 #include <limits>
56 #include <stdexcept>
57 
59 
60 ////////////////////////////////////////////////////////////////////////////////
61 /// constructor
62 /// can only be applied one after the other when they are created. But in order to
63 /// determine the Gauss transformation
64 
66 : VariableTransformBase( dsi, Types::kGauss, "Gauss" ),
67  fFlatNotGauss(kFALSE),
68  fPdfMinSmooth(0),
69  fPdfMaxSmooth(0),
70  fElementsperbin(0)
71 {
72  if (strcor=="Uniform") {fFlatNotGauss = kTRUE;
73  SetName("Uniform");
74  }
75 }
76 
77 ////////////////////////////////////////////////////////////////////////////////
78 /// destructor
79 
81 {
83 }
84 
85 ////////////////////////////////////////////////////////////////////////////////
86 
88 {
89 }
90 
91 ////////////////////////////////////////////////////////////////////////////////
92 /// calculate the cumulative distributions
93 
95 {
96  Initialize();
97 
98  if (!IsEnabled() || IsCreated()) return kTRUE;
99 
100  Log() << kINFO << "Preparing the Gaussian transformation..." << Endl;
101 
102  UInt_t inputSize = fGet.size();
103  SetNVariables(inputSize);
104 
105  if (inputSize > 200) {
106  Log() << kWARNING << "----------------------------------------------------------------------------"
107  << Endl;
108  Log() << kWARNING
109  << ": More than 200 variables, I hope you have enough memory!!!!" << Endl;
110  Log() << kWARNING << "----------------------------------------------------------------------------"
111  << Endl;
112  // return kFALSE;
113  }
114 
115  GetCumulativeDist( events );
116 
117  SetCreated( kTRUE );
118 
119  return kTRUE;
120 }
121 
122 ////////////////////////////////////////////////////////////////////////////////
123 /// apply the Gauss transformation
124 
126 {
127  if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
128  //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
129  //EVT if (cls <0 || cls > GetNClasses() ) {
130  //EVT cls = GetNClasses();
131  //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
132  //EVT}
133  if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
134  //EVT workaround end
135 
136  // get the variable vector of the current event
137  UInt_t inputSize = fGet.size();
138 
139  std::vector<Float_t> input(0);
140  std::vector<Float_t> output(0);
141 
142  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
143  GetInput( ev, input, mask );
144 
145  std::vector<Char_t>::iterator itMask = mask.begin();
146 
147  // TVectorD vec( inputSize );
148  // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
149  Double_t cumulant;
150  //transformation
151  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
152 
153  if ( (*itMask) ){
154  ++itMask;
155  continue;
156  }
157 
158  if (0 != fCumulativePDF[ivar][cls]) {
159  // first make it flat
160  if(fTMVAVersion>TMVA_VERSION(3,9,7))
161  cumulant = (fCumulativePDF[ivar][cls])->GetVal(input.at(ivar));
162  else
163  cumulant = OldCumulant(input.at(ivar), fCumulativePDF[ivar][cls]->GetOriginalHist() );
164  cumulant = TMath::Min(cumulant,1.-10e-10);
165  cumulant = TMath::Max(cumulant,0.+10e-10);
166 
167  if (fFlatNotGauss)
168  output.push_back( cumulant );
169  else {
170  // sanity correction for out-of-range values
171  Double_t maxErfInvArgRange = 0.99999999;
172  Double_t arg = 2.0*cumulant - 1.0;
173  arg = TMath::Min(+maxErfInvArgRange,arg);
174  arg = TMath::Max(-maxErfInvArgRange,arg);
175 
176  output.push_back( 1.414213562*TMath::ErfInverse(arg) );
177  }
178  }
179  }
180 
182  if (fTransformedEvent!=0) { delete fTransformedEvent; fTransformedEvent = 0; }
183  fTransformedEvent = new Event();
184  }
185 
186  SetOutput( fTransformedEvent, output, mask, ev );
187 
188  return fTransformedEvent;
189 }
190 
191 ////////////////////////////////////////////////////////////////////////////////
192 /// apply the inverse Gauss or inverse uniform transformation
193 
195 {
196  if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
197  //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
198  //EVT if (cls <0 || cls > GetNClasses() ) {
199  //EVT cls = GetNClasses();
200  //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
201  //EVT}
202  if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
203  //EVT workaround end
204 
205  // get the variable vector of the current event
206  UInt_t inputSize = fGet.size();
207 
208  std::vector<Float_t> input(0);
209  std::vector<Float_t> output(0);
210 
211  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
212  GetInput( ev, input, mask, kTRUE );
213 
214  std::vector<Char_t>::iterator itMask = mask.begin();
215 
216  // TVectorD vec( inputSize );
217  // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
218  Double_t invCumulant;
219  //transformation
220  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
221 
222  if ( (*itMask) ){
223  ++itMask;
224  continue;
225  }
226 
227  if (0 != fCumulativePDF[ivar][cls]) {
228  invCumulant = input.at(ivar);
229 
230  // first de-gauss ist if gaussianized
231  if (!fFlatNotGauss)
232  invCumulant = (TMath::Erf(invCumulant/1.414213562)+1)/2.f;
233 
234  // then de-uniform the values
235  if(fTMVAVersion>TMVA_VERSION(4,0,0))
236  invCumulant = (fCumulativePDF[ivar][cls])->GetValInverse(invCumulant,kTRUE);
237  else
238  Log() << kFATAL << "Inverse Uniform/Gauss transformation not implemented for TMVA versions before 4.1.0" << Endl;
239 
240  output.push_back(invCumulant);
241  }
242  }
243 
245 
246  SetOutput( fBackTransformedEvent, output, mask, ev, kTRUE );
247 
248  return fBackTransformedEvent;
249 }
250 
251 ////////////////////////////////////////////////////////////////////////////////
252 /// fill the cumulative distributions
253 
254 void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector< Event*>& events )
255 {
256  const UInt_t inputSize = fGet.size();
257  // const UInt_t nCls = GetNClasses();
258 
259  // const UInt_t nvar = GetNVariables();
260  UInt_t nevt = events.size();
261 
262  const UInt_t nClasses = GetNClasses();
263  UInt_t numDist = nClasses+1; // calculate cumulative distributions for all "event" classes separately + one where all classes are treated (added) together
264 
265  if (GetNClasses() == 1 ) numDist = nClasses; // for regression, if there is only one class, there is no "sum" of classes, hence
266 
267  UInt_t **nbins = new UInt_t*[numDist];
268 
269  std::list< TMVA::TMVAGaussPair > **listsForBinning = new std::list<TMVA::TMVAGaussPair>* [numDist];
270  std::vector< Float_t > **vsForBinning = new std::vector<Float_t>* [numDist];
271  for (UInt_t i=0; i < numDist; i++) {
272  listsForBinning[i] = new std::list<TMVA::TMVAGaussPair> [inputSize];
273  vsForBinning[i] = new std::vector<Float_t> [inputSize];
274  nbins[i] = new UInt_t[inputSize]; // nbins[0] = number of bins for signal distributions. It depends on the number of entries, thus it's the same for all the input variables, but it isn't necessary for some "weird" reason.
275  }
276 
277  std::vector<Float_t> input;
278  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
279 
280  // perform event loop
281  Float_t *sumOfWeights = new Float_t[numDist];
282  Float_t *minWeight = new Float_t[numDist];
283  Float_t *maxWeight = new Float_t[numDist];
284  for (UInt_t i=0; i<numDist; i++) {
285  sumOfWeights[i]=0;
286  minWeight[i]=10E10; // TODO: change this to std::max ?
287  maxWeight[i]=0; // QUESTION: wouldn't there be negative events possible?
288  }
289  for (UInt_t ievt=0; ievt < nevt; ievt++) {
290  const Event* ev= events[ievt];
291  Int_t cls = ev->GetClass();
292  Float_t eventWeight = ev->GetWeight();
293  sumOfWeights[cls] += eventWeight;
294  if (minWeight[cls] > eventWeight) minWeight[cls]=eventWeight;
295  if (maxWeight[cls] < eventWeight) maxWeight[cls]=eventWeight;
296  if (numDist>1) sumOfWeights[numDist-1] += eventWeight;
297 
298  Bool_t hasMaskedEntries = GetInput( ev, input, mask );
299  if( hasMaskedEntries ){
300  Log() << kWARNING << "Incomplete event" << Endl;
301  std::ostringstream oss;
302  ev->Print(oss);
303  Log() << oss.str();
304  Log() << kFATAL << "Targets or variables masked by transformation. Apparently (a) value(s) is/are missing in this event." << Endl;
305  }
306 
307 
308  Int_t ivar = 0;
309  for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
310  Float_t value = (*itInput);
311  listsForBinning[cls][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
312  if (numDist>1)listsForBinning[numDist-1][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
313  ++ivar;
314  }
315  }
316  if (numDist > 1) {
317  for (UInt_t icl=0; icl<numDist-1; icl++){
318  minWeight[numDist-1] = TMath::Min(minWeight[icl],minWeight[numDist-1]);
319  maxWeight[numDist-1] = TMath::Max(maxWeight[icl],maxWeight[numDist-1]);
320  }
321  }
322 
323  // Sorting the lists, getting nbins ...
324  const UInt_t nevmin=10; // minimum number of events per bin (to make sure we get reasonable distributions)
325  const UInt_t nbinsmax=2000; // maximum number of bins
326 
327  for (UInt_t icl=0; icl< numDist; icl++){
328  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
329  listsForBinning[icl][ivar].sort();
330  std::list< TMVA::TMVAGaussPair >::iterator it;
331  Float_t sumPerBin = sumOfWeights[icl]/nbinsmax;
332  sumPerBin=TMath::Max(minWeight[icl]*nevmin,sumPerBin);
333  Float_t sum=0;
334  Float_t ev_value=listsForBinning[icl][ivar].begin()->GetValue();
335  Float_t lastev_value=ev_value;
336  const Float_t eps = 1.e-4;
337  vsForBinning[icl][ivar].push_back(ev_value-eps);
338  vsForBinning[icl][ivar].push_back(ev_value);
339 
340  for (it=listsForBinning[icl][ivar].begin(); it != listsForBinning[icl][ivar].end(); it++){
341  sum+= it->GetWeight();
342  if (sum >= sumPerBin) {
343  ev_value=it->GetValue();
344  if (ev_value>lastev_value) { // protection against bin width of 0
345  vsForBinning[icl][ivar].push_back(ev_value);
346  sum = 0.;
347  lastev_value=ev_value;
348  }
349  }
350  }
351  if (sum!=0) vsForBinning[icl][ivar].push_back(listsForBinning[icl][ivar].back().GetValue());
352  nbins[icl][ivar] = vsForBinning[icl][ivar].size();
353  }
354  }
355 
356  delete[] sumOfWeights;
357  delete[] minWeight;
358  delete[] maxWeight;
359 
360  // create histogram for the cumulative distribution.
361  fCumulativeDist.resize(inputSize);
362  for (UInt_t icls = 0; icls < numDist; icls++) {
363  for (UInt_t ivar=0; ivar < inputSize; ivar++){
364  Float_t* binnings = new Float_t[nbins[icls][ivar]];
365  //the binning for this particular histogram:
366  for (UInt_t k =0 ; k < nbins[icls][ivar]; k++){
367  binnings[k] = vsForBinning[icls][ivar][k];
368  }
369  fCumulativeDist[ivar].resize(numDist);
370  if (0 != fCumulativeDist[ivar][icls] ) {
371  delete fCumulativeDist[ivar][icls];
372  }
373  fCumulativeDist[ivar][icls] = new TH1F(Form("Cumulative_Var%d_cls%d",ivar,icls),
374  Form("Cumulative_Var%d_cls%d",ivar,icls),
375  nbins[icls][ivar] -1, // class icls
376  binnings);
377  fCumulativeDist[ivar][icls]->SetDirectory(0);
378  delete [] binnings;
379  }
380  }
381 
382  // Deallocation
383  for (UInt_t i=0; i<numDist; i++) {
384  delete [] listsForBinning[numDist-i-1];
385  delete [] vsForBinning[numDist-i-1];
386  delete [] nbins[numDist-i-1];
387  }
388  delete [] listsForBinning;
389  delete [] vsForBinning;
390  delete [] nbins;
391 
392  // perform event loop
393  std::vector<Int_t> ic(numDist);
394  for (UInt_t ievt=0; ievt<nevt; ievt++) {
395 
396  const Event* ev= events[ievt];
397  Int_t cls = ev->GetClass();
398  Float_t eventWeight = ev->GetWeight();
399 
400  GetInput( ev, input, mask );
401 
402  Int_t ivar = 0;
403  for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
404  Float_t value = (*itInput);
405  fCumulativeDist[ivar][cls]->Fill(value,eventWeight);
406  if (numDist>1) fCumulativeDist[ivar][numDist-1]->Fill(value,eventWeight);
407 
408  ++ivar;
409  }
410  }
411 
412  // clean up
414 
415  // now sum up in order to get the real cumulative distribution
416  Double_t sum = 0, total=0;
417  fCumulativePDF.resize(inputSize);
418  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
419  // fCumulativePDF.resize(ivar+1);
420  for (UInt_t icls=0; icls<numDist; icls++) {
421  (fCumulativeDist[ivar][icls])->Smooth();
422  sum = 0;
423  total = 0.;
424  for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
425  Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
426  if (val>0) total += val;
427  }
428  for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
429  Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
430  if (val>0) sum += val;
431  (fCumulativeDist[ivar][icls])->SetBinContent(ibin,sum/total);
432  }
433  // create PDf
434  fCumulativePDF[ivar].push_back(new PDF( Form("GaussTransform var%d cls%d",ivar,icls), fCumulativeDist[ivar][icls], PDF::kSpline1, fPdfMinSmooth, fPdfMaxSmooth,kFALSE,kFALSE));
435  }
436  }
437 }
438 
439 ////////////////////////////////////////////////////////////////////////////////
440 
442 {
443  Log() << kFATAL << "VariableGaussTransform::WriteTransformationToStream is obsolete" << Endl;
444 }
445 
446 ////////////////////////////////////////////////////////////////////////////////
447 /// clean up of cumulative arrays
448 
450  if (opt == "ALL" || opt == "PDF"){
451  for (UInt_t ivar=0; ivar<fCumulativePDF.size(); ivar++) {
452  for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++) {
453  if (0 != fCumulativePDF[ivar][icls]) delete fCumulativePDF[ivar][icls];
454  }
455  }
456  fCumulativePDF.clear();
457  }
458  if (opt == "ALL" || opt == "Dist"){
459  for (UInt_t ivar=0; ivar<fCumulativeDist.size(); ivar++) {
460  for (UInt_t icls=0; icls<fCumulativeDist[ivar].size(); icls++) {
461  if (0 != fCumulativeDist[ivar][icls]) delete fCumulativeDist[ivar][icls];
462  }
463  }
464  fCumulativeDist.clear();
465  }
466 }
467 ////////////////////////////////////////////////////////////////////////////////
468 /// create XML description of Gauss transformation
469 
471  void* trfxml = gTools().AddChild(parent, "Transform");
472  gTools().AddAttr(trfxml, "Name", "Gauss");
473  gTools().AddAttr(trfxml, "FlatOrGauss", (fFlatNotGauss?"Flat":"Gauss") );
474 
476 
477  UInt_t nvar = fGet.size();
478  for (UInt_t ivar=0; ivar<nvar; ivar++) {
479  void* varxml = gTools().AddChild( trfxml, "Variable");
480  // gTools().AddAttr( varxml, "Name", Variables()[ivar].GetLabel() );
481  gTools().AddAttr( varxml, "VarIndex", ivar );
482 
483  if ( fCumulativePDF[ivar][0]==0 ||
484  (fCumulativePDF[ivar].size()>1 && fCumulativePDF[ivar][1]==0 ))
485  Log() << kFATAL << "Cumulative histograms for variable " << ivar << " don't exist, can't write it to weight file" << Endl;
486 
487  for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++){
488  void* pdfxml = gTools().AddChild( varxml, Form("CumulativePDF_cls%d",icls));
489  (fCumulativePDF[ivar][icls])->AddXMLTo(pdfxml);
490  }
491  }
492 }
493 
494 ////////////////////////////////////////////////////////////////////////////////
495 /// Read the transformation matrices from the xml node
496 
498  // clean up first
500  TString FlatOrGauss;
501 
502  gTools().ReadAttr(trfnode, "FlatOrGauss", FlatOrGauss );
503 
504  if (FlatOrGauss == "Flat") fFlatNotGauss = kTRUE;
505  else fFlatNotGauss = kFALSE;
506 
507  Bool_t newFormat = kFALSE;
508 
509  void* inpnode = NULL;
510 
511  inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format
512  if( inpnode!=NULL )
513  newFormat = kTRUE; // new xml format
514 
515  void* varnode = NULL;
516  if( newFormat ){
517  // ------------- new format --------------------
518  // read input
520 
521  varnode = gTools().GetNextChild(inpnode);
522  }else
523  varnode = gTools().GetChild(trfnode);
524 
525  // Read the cumulative distribution
526 
527  TString varname, histname, classname;
528  UInt_t ivar;
529  while(varnode) {
530  if( gTools().HasAttr(varnode,"Name") )
531  gTools().ReadAttr(varnode, "Name", varname);
532  gTools().ReadAttr(varnode, "VarIndex", ivar);
533 
534  void* clsnode = gTools().GetChild( varnode);
535 
536  while(clsnode) {
537  void* pdfnode = gTools().GetChild( clsnode);
538  PDF* pdfToRead = new PDF(TString("tempName"),kFALSE);
539  pdfToRead->ReadXML(pdfnode); // pdfnode
540  // push_back PDF
541  fCumulativePDF.resize( ivar+1 );
542  fCumulativePDF[ivar].push_back(pdfToRead);
543  clsnode = gTools().GetNextChild(clsnode);
544  }
545 
546  varnode = gTools().GetNextChild(varnode);
547  }
548  SetCreated();
549 }
550 
551 ////////////////////////////////////////////////////////////////////////////////
552 /// Read the cumulative distribution
553 
554 void TMVA::VariableGaussTransform::ReadTransformationFromStream( std::istream& istr, const TString& classname)
555 {
556  Bool_t addDirStatus = TH1::AddDirectoryStatus();
557  TH1::AddDirectory(0); // this avoids the binding of the hists in TMVA::PDF to the current ROOT file
558  char buf[512];
559  istr.getline(buf,512);
560 
561  TString strvar, dummy;
562 
563  while (!(buf[0]=='#'&& buf[1]=='#')) { // if line starts with ## return
564  char* p = buf;
565  while (*p==' ' || *p=='\t') p++; // 'remove' leading whitespace
566  if (*p=='#' || *p=='\0') {
567  istr.getline(buf,512);
568  continue; // if comment or empty line, read the next line
569  }
570  std::stringstream sstr(buf);
571  sstr >> strvar;
572 
573  if (strvar=="CumulativeHistogram") {
574  UInt_t type(0), ivar(0);
575  TString devnullS(""),hname("");
576  Int_t nbins(0);
577 
578  // coverity[tainted_data_argument]
579  sstr >> type >> ivar >> hname >> nbins >> fElementsperbin;
580 
581  Float_t *Binnings = new Float_t[nbins+1];
582  Float_t val;
583  istr >> devnullS; // read the line "BinBoundaries" ..
584  for (Int_t ibin=0; ibin<nbins+1; ibin++) {
585  istr >> val;
586  Binnings[ibin]=val;
587  }
588 
589  if(ivar>=fCumulativeDist.size()) fCumulativeDist.resize(ivar+1);
590  if(type>=fCumulativeDist[ivar].size()) fCumulativeDist[ivar].resize(type+1);
591 
592  TH1F * histToRead = fCumulativeDist[ivar][type];
593  if ( histToRead !=0 ) delete histToRead;
594  // recreate the cumulative histogram to be filled with the values read
595  histToRead = new TH1F( hname, hname, nbins, Binnings );
596  histToRead->SetDirectory(0);
597  fCumulativeDist[ivar][type]=histToRead;
598 
599  istr >> devnullS; // read the line "BinContent" ..
600  for (Int_t ibin=0; ibin<nbins; ibin++) {
601  istr >> val;
602  histToRead->SetBinContent(ibin+1,val);
603  }
604 
605  PDF* pdf = new PDF(hname,histToRead,PDF::kSpline0, 0, 0, kFALSE, kFALSE);
606  // push_back PDF
607  fCumulativePDF.resize(ivar+1);
608  fCumulativePDF[ivar].resize(type+1);
609  fCumulativePDF[ivar][type] = pdf;
610  delete [] Binnings;
611  }
612 
613  // if (strvar=="TransformToFlatInsetadOfGauss=") { // don't correct this spelling mistake
614  if (strvar=="Uniform") { // don't correct this spelling mistake
615  sstr >> fFlatNotGauss;
616  istr.getline(buf,512);
617  break;
618  }
619 
620  istr.getline(buf,512); // reading the next line
621  }
622  TH1::AddDirectory(addDirStatus);
623 
624  UInt_t classIdx=(classname=="signal")?0:1;
625  for(UInt_t ivar=0; ivar<fCumulativePDF.size(); ++ivar) {
626  PDF* src = fCumulativePDF[ivar][classIdx];
627  fCumulativePDF[ivar].push_back(new PDF(src->GetName(),fCumulativeDist[ivar][classIdx],PDF::kSpline0, 0, 0, kFALSE, kFALSE) );
628  }
629 
631 
632  SetCreated();
633 }
634 
635 ////////////////////////////////////////////////////////////////////////////////
636 
638 
639  Int_t bin = h->FindBin(x);
640  bin = TMath::Max(bin,1);
641  bin = TMath::Min(bin,h->GetNbinsX());
642 
643  Double_t cumulant;
644  Double_t x0, x1, y0, y1;
646  Double_t supmin = 0.5/total;
647 
648  x0 = h->GetBinLowEdge(TMath::Max(bin,1));
649  x1 = h->GetBinLowEdge(TMath::Min(bin,h->GetNbinsX())+1);
650 
651  y0 = h->GetBinContent(TMath::Max(bin-1,0)); // Y0 = F(x0); Y0 >= 0
652  y1 = h->GetBinContent(TMath::Min(bin, h->GetNbinsX()+1)); // Y1 = F(x1); Y1 <= 1
653 
654  if (bin == 0) {
655  y0 = supmin;
656  y1 = supmin;
657  }
658  if (bin == 1) {
659  y0 = supmin;
660  }
661  if (bin > h->GetNbinsX()) {
662  y0 = 1.-supmin;
663  y1 = 1.-supmin;
664  }
665  if (bin == h->GetNbinsX()) {
666  y1 = 1.-supmin;
667  }
668 
669  if (x0 == x1) {
670  cumulant = y1;
671  } else {
672  cumulant = y0 + (y1-y0)*(x-x0)/(x1-x0);
673  }
674 
675  if (x <= h->GetBinLowEdge(1)){
676  cumulant = supmin;
677  }
678  if (x >= h->GetBinLowEdge(h->GetNbinsX()+1)){
679  cumulant = 1-supmin;
680  }
681  return cumulant;
682 }
683 
684 ////////////////////////////////////////////////////////////////////////////////
685 /// prints the transformation
686 
688 {
689  Int_t cls = 0;
690  Log() << kINFO << "I do not know yet how to print this... look in the weight file " << cls << ":" << Endl;
691  cls++;
692 }
693 
694 ////////////////////////////////////////////////////////////////////////////////
695 /// creates the transformation function
696 ///
697 
698 void TMVA::VariableGaussTransform::MakeFunction( std::ostream& fout, const TString& fcncName,
699  Int_t part, UInt_t trCounter, Int_t )
700 {
701  const UInt_t nvar = fGet.size();
702  UInt_t numDist = GetNClasses() + 1;
703  Int_t nBins = -1;
704  for (UInt_t icls=0; icls<numDist; icls++) {
705  for (UInt_t ivar=0; ivar<nvar; ivar++) {
706  Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
707  if (nbin > nBins) nBins=nbin;
708  }
709  }
710 
711  // creates the gauss transformation function
712  if (part==1) {
713  fout << std::endl;
714  fout << " int nvar;" << std::endl;
715  fout << std::endl;
716  // declare variables
717  fout << " double cumulativeDist["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
718  fout << " double X["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
719  fout << " double xMin["<<nvar<<"]["<<numDist<<"];"<<std::endl;
720  fout << " double xMax["<<nvar<<"]["<<numDist<<"];"<<std::endl;
721  fout << " int nbins["<<nvar<<"]["<<numDist<<"];"<<std::endl;
722  }
723  if (part==2) {
724  fout << std::endl;
725  fout << "#include \"math.h\"" << std::endl;
726  fout << std::endl;
727  fout << "//_______________________________________________________________________" << std::endl;
728  fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl;
729  fout << "{" << std::endl;
730  fout << " // Gauss/Uniform transformation, initialisation" << std::endl;
731  fout << " nvar=" << nvar << ";" << std::endl;
732  for (UInt_t icls=0; icls<numDist; icls++) {
733  for (UInt_t ivar=0; ivar<nvar; ivar++) {
734  Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
735  fout << " nbins["<<ivar<<"]["<<icls<<"]="<<nbin<<";"<<std::endl;
736  }
737  }
738 
739  // fill meat here
740  // loop over nvar , cls, loop over nBins
741  // fill cumulativeDist with fCumulativePDF[ivar][cls])->GetValue(vec(ivar)
742  for (UInt_t icls=0; icls<numDist; icls++) {
743  for (UInt_t ivar=0; ivar<nvar; ivar++) {
744  // Int_t idx = 0;
745  try{
746  // idx = fGet.at(ivar).second;
747  Char_t type = fGet.at(ivar).first;
748  if( type != 'v' ){
749  Log() << kWARNING << "MakeClass for the Gauss transformation works only for the transformation of variables. The transformation of targets/spectators is not implemented." << Endl;
750  }
751  }catch( std::out_of_range except ){
752  Log() << kWARNING << "MakeClass for the Gauss transformation searched for a non existing variable index (" << ivar << ")" << Endl;
753  }
754 
755  // Double_t xmn=Variables()[idx].GetMin();
756  // Double_t xmx=Variables()[idx].GetMax();
757  Double_t xmn = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[0];
758  Double_t xmx = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[(fCumulativePDF[ivar][icls])->GetGraph()->GetN()-1];
759 
760  fout << " xMin["<<ivar<<"]["<<icls<<"]="<< gTools().StringFromDouble(xmn)<<";"<<std::endl;
761  fout << " xMax["<<ivar<<"]["<<icls<<"]="<<gTools().StringFromDouble(xmx)<<";"<<std::endl;
762  for (Int_t ibin=0; ibin<(fCumulativePDF[ivar][icls])->GetGraph()->GetN(); ibin++) {
763  fout << " cumulativeDist[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetY()[ibin])<< ";"<<std::endl;
764  fout << " X[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetX()[ibin])<< ";"<<std::endl;
765 
766  }
767  }
768  }
769  fout << "}" << std::endl;
770  fout << std::endl;
771  fout << "//_______________________________________________________________________" << std::endl;
772  fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int clsIn) const" << std::endl;
773  fout << "{" << std::endl;
774  fout << " // Gauss/Uniform transformation" << std::endl;
775  fout << " int cls=clsIn;" << std::endl;
776  fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl;
777  fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl;
778  fout << " else cls = "<<(fCumulativePDF[0].size()==1?0:2)<<";"<< std::endl;
779  fout << " }"<< std::endl;
780 
781  fout << " // copy the variables which are going to be transformed "<< std::endl;
782  VariableTransformBase::MakeFunction(fout, fcncName, 0, trCounter, 0 );
783  fout << " static std::vector<double> dv; "<< std::endl;
784  fout << " dv.resize(nvar); "<< std::endl;
785  fout << " for (int ivar=0; ivar<nvar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)]; "<< std::endl;
786  fout << " "<< std::endl;
787  fout << " bool FlatNotGauss = "<< (fFlatNotGauss? "true": "false") <<"; "<< std::endl;
788  fout << " double cumulant; "<< std::endl;
789  fout << " //const int nvar = "<<nvar<<"; "<< std::endl;
790  fout << " for (int ivar=0; ivar<nvar; ivar++) { "<< std::endl;
791  fout << " int nbin = nbins[ivar][cls]; "<< std::endl;
792  fout << " int ibin=0; "<< std::endl;
793  fout << " while (dv[ivar] > X[ivar][cls][ibin]) ibin++; "<< std::endl;
794  fout << " "<< std::endl;
795  fout << " if (ibin<0) { ibin=0;} "<< std::endl;
796  fout << " if (ibin>=nbin) { ibin=nbin-1;} "<< std::endl;
797  fout << " int nextbin = ibin; "<< std::endl;
798  fout << " if ((dv[ivar] > X[ivar][cls][ibin] && ibin !=nbin-1) || ibin==0) "<< std::endl;
799  fout << " nextbin++; "<< std::endl;
800  fout << " else "<< std::endl;
801  fout << " nextbin--; "<< std::endl;
802  fout << " "<< std::endl;
803  fout << " double dx = X[ivar][cls][ibin]- X[ivar][cls][nextbin]; "<< std::endl;
804  fout << " double dy = cumulativeDist[ivar][cls][ibin] - cumulativeDist[ivar][cls][nextbin]; "<< std::endl;
805  fout << " cumulant = cumulativeDist[ivar][cls][ibin] + (dv[ivar] - X[ivar][cls][ibin])* dy/dx;"<< std::endl;
806  fout << " "<< std::endl;
807  fout << " "<< std::endl;
808  fout << " if (cumulant>1.-10e-10) cumulant = 1.-10e-10; "<< std::endl;
809  fout << " if (cumulant<10e-10) cumulant = 10e-10; "<< std::endl;
810  fout << " if (FlatNotGauss) dv[ivar] = cumulant; "<< std::endl;
811  fout << " else { "<< std::endl;
812  fout << " double maxErfInvArgRange = 0.99999999; "<< std::endl;
813  fout << " double arg = 2.0*cumulant - 1.0; "<< std::endl;
814  fout << " if (arg > maxErfInvArgRange) arg= maxErfInvArgRange; "<< std::endl;
815  fout << " if (arg < -maxErfInvArgRange) arg=-maxErfInvArgRange; "<< std::endl;
816  fout << " double inverf=0., stp=1. ; "<< std::endl;
817  fout << " while (stp >1.e-10){; "<< std::endl;
818  fout << " if (erf(inverf)>arg) inverf -=stp ; "<< std::endl;
819  fout << " else if (erf(inverf)<=arg && erf(inverf+stp)>=arg) stp=stp/5. ; "<< std::endl;
820  fout << " else inverf += stp; "<< std::endl;
821  fout << " } ; "<< std::endl;
822  fout << " //dv[ivar] = 1.414213562*TMath::ErfInverse(arg); "<< std::endl;
823  fout << " dv[ivar] = 1.414213562* inverf; "<< std::endl;
824  fout << " } "<< std::endl;
825  fout << " } "<< std::endl;
826  fout << " // copy the transformed variables back "<< std::endl;
827  fout << " for (int ivar=0; ivar<nvar; ivar++) iv[indicesPut.at(ivar)] = dv[ivar]; "<< std::endl;
828  fout << "} "<< std::endl;
829  }
830 }
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
Definition: TH1.cxx:3441
void SetTMVAVersion(TMVAVersion_t v)
Double_t ErfInverse(Double_t x)
returns the inverse error function x must be <-1<x<1
Definition: TMath.cxx:206
static long int sum(long int i)
Definition: Factory.cxx:2162
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
void ReadXML(void *pdfnode)
XML file reading.
Definition: PDF.cxx:964
virtual ~VariableGaussTransform(void)
destructor
Singleton class for Global types used by TMVA.
Definition: Types.h:73
virtual const Event * InverseTransform(const Event *const, Int_t cls) const
apply the inverse Gauss or inverse uniform transformation
float Float_t
Definition: RtypesCore.h:53
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8053
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)=0
getinput and setoutput equivalent
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:311
TH1 * h
Definition: legend2.C:5
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4639
virtual void AttachXMLTo(void *parent)=0
create XML description the transformation (write out info of selected variables)
Bool_t PrepareTransformation(const std::vector< Event *> &)
calculate the cumulative distributions
Basic string class.
Definition: TString.h:129
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition: TH1.cxx:700
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:551
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:168
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
int nbins[3]
#define NULL
Definition: RtypesCore.h:88
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Definition: TH1.cxx:8264
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:308
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1135
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1218
virtual Bool_t GetInput(const Event *event, std::vector< Float_t > &input, std::vector< Char_t > &mask, Bool_t backTransform=kFALSE) const
select the values from the event
Double_t x[n]
Definition: legend1.C:17
virtual void SetOutput(Event *event, std::vector< Float_t > &output, std::vector< Char_t > &mask, const Event *oldEvent=0, Bool_t backTransform=kFALSE) const
select the values from the event
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1161
UInt_t GetClass() const
Definition: Event.h:81
Class that contains all the data information.
Definition: DataSetInfo.h:60
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
const char * GetName() const
Returns name of object.
Definition: PDF.h:116
void ReadTransformationFromStream(std::istream &, const TString &)
Read the cumulative distribution.
virtual void ReadFromXML(void *trfnode)=0
Read the input variables from the XML node.
Linear interpolation class.
TString StringFromDouble(Double_t d)
string tools
Definition: Tools.cxx:1244
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition: TMath.cxx:187
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8325
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
void Print(std::ostream &o) const
print method
Definition: Event.cxx:352
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:290
Tools & gTools()
Gaussian Transformation of input variables.
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:309
const Bool_t kFALSE
Definition: RtypesCore.h:92
static unsigned int total
#define TMVA_VERSION(a, b, c)
Definition: Version.h:48
static const double x1[5]
#define ClassImp(name)
Definition: Rtypes.h:336
double f(double x)
double Double_t
Definition: RtypesCore.h:55
int type
Definition: TGX11.cxx:120
Double_t OldCumulant(Float_t x, TH1 *h) const
static RooMathCoreReg dummy
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1173
static std::shared_ptr< std::function< double(double)> > Gauss
Definition: NeuralNet.icc:76
The TH1 histogram class.
Definition: TH1.h:56
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void WriteTransformationToStream(std::ostream &) const
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)
creates the transformation function
char Char_t
Definition: RtypesCore.h:29
Abstract ClassifierFactory template that handles arbitrary types.
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:200
TString()
TString default ctor.
Definition: TString.cxx:88
std::vector< std::vector< PDF * > > fCumulativePDF
The Cumulative distributions.
virtual void ReadFromXML(void *trfnode)
Read the transformation matrices from the xml node.
void CleanUpCumulativeArrays(TString opt="ALL")
clean up of cumulative arrays
virtual void AttachXMLTo(void *parent)
create XML description of Gauss transformation
void GetCumulativeDist(const std::vector< Event *> &)
fill the cumulative distributions
virtual Int_t GetNbinsX() const
Definition: TH1.h:277
const Bool_t kTRUE
Definition: RtypesCore.h:91
const int except
virtual const Event * Transform(const Event *const, Int_t cls) const
apply the Gauss transformation
virtual void PrintTransformation(std::ostream &o)
prints the transformation
gr SetName("gr")
std::vector< std::vector< TH1F *> > fCumulativeDist