Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
PDF.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Asen Christov, Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss, Jan Therhaag, Eckhard von Toerne
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : PDF *
8 * *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Asen Christov <christov@physik.uni-freiburg.de> - Freiburg U., Germany *
15 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
16 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
17 * Eckhard von Toerne <evt@physik.uni-bonn.de> - U of Bonn, Germany *
18 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
20 * *
21 * Copyright (c) 2005-2011: *
22 * CERN, Switzerland *
23 * U. of Victoria, Canada *
24 * MPI-K Heidelberg, Germany *
25 * Freiburg U., Germany *
26 * U. of Bonn, Germany *
27 * *
28 * Redistribution and use in source and binary forms, with or without *
29 * modification, are permitted according to the terms listed in LICENSE *
30 * (see tmva/doc/LICENSE) *
31 **********************************************************************************/
32
33/*! \class TMVA::PDF
34\ingroup TMVA
35PDF wrapper for histograms; uses user-defined spline interpolation.
36*/
37
38#include "TMVA/PDF.h"
39
40#include "TMVA/Configurable.h"
41#include "TMVA/KDEKernel.h"
42#include "TMVA/MsgLogger.h"
43#include "TMVA/Types.h"
44#include "TMVA/Tools.h"
45#include "TMVA/TSpline1.h"
46#include "TMVA/TSpline2.h"
47#include "TMVA/Version.h"
48
49#include "TF1.h"
50#include "TH1F.h"
51#include "TMath.h"
52#include "TVectorD.h"
53#include "TGraph.h"
54
55#include <cstdlib>
56#include <iostream>
57#include <iomanip>
58
59// static configuration settings
62const Double_t TMVA::PDF::fgEpsilon = 1.0e-12;
63
65
66////////////////////////////////////////////////////////////////////////////////
67/// default constructor needed for ROOT I/O
68
70: Configurable (""),
71 fUseHistogram ( kFALSE ),
72 fPDFName ( name ),
73 fNsmooth ( 0 ),
74 fMinNsmooth (-1 ),
75 fMaxNsmooth (-1 ),
76 fNSmoothHist ( 0 ),
77 fInterpolMethod( PDF::kSpline2 ),
78 fSpline ( 0 ),
79 fPDFHist ( 0 ),
80 fHist ( 0 ),
81 fHistOriginal ( 0 ),
82 fGraph ( 0 ),
83 fIGetVal ( 0 ),
84 fHistAvgEvtPerBin ( 0 ),
85 fHistDefinedNBins ( 0 ),
86 fKDEtypeString ( 0 ),
87 fKDEiterString ( 0 ),
88 fBorderMethodString( 0 ),
89 fInterpolateString ( 0 ),
90 fKDEtype ( KDEKernel::kNone ),
91 fKDEiter ( KDEKernel::kNonadaptiveKDE ),
92 fKDEborder ( KDEKernel::kNoTreatment ),
93 fFineFactor ( 0. ),
94 fReadingVersion( 0 ),
95 fCheckHist ( kFALSE ),
96 fNormalize ( norm ),
97 fSuffix ( "" ),
98 fLogger ( 0 )
99{
100 fLogger = new MsgLogger(this);
101 GetThisPdfThreadLocal() = this;
102}
103
104////////////////////////////////////////////////////////////////////////////////
105/// constructor of spline based PDF:
106
108 const TH1 *hist,
110 Int_t minnsmooth,
111 Int_t maxnsmooth,
112 Bool_t checkHist,
113 Bool_t norm) :
114 Configurable (""),
115 fUseHistogram ( kFALSE ),
116 fPDFName ( name ),
117 fMinNsmooth ( minnsmooth ),
118 fMaxNsmooth ( maxnsmooth ),
119 fNSmoothHist ( 0 ),
120 fInterpolMethod( method ),
121 fSpline ( 0 ),
122 fPDFHist ( 0 ),
123 fHist ( 0 ),
124 fHistOriginal ( 0 ),
125 fGraph ( 0 ),
126 fIGetVal ( 0 ),
127 fHistAvgEvtPerBin ( 0 ),
128 fHistDefinedNBins ( 0 ),
129 fKDEtypeString ( 0 ),
130 fKDEiterString ( 0 ),
131 fBorderMethodString( 0 ),
132 fInterpolateString ( 0 ),
133 fKDEtype ( KDEKernel::kNone ),
134 fKDEiter ( KDEKernel::kNonadaptiveKDE ),
135 fKDEborder ( KDEKernel::kNoTreatment ),
136 fFineFactor ( 0. ),
137 fReadingVersion( 0 ),
138 fCheckHist ( checkHist ),
139 fNormalize ( norm ),
140 fSuffix ( "" ),
141 fLogger ( 0 )
142{
143 fLogger = new MsgLogger(this);
144 BuildPDF( hist );
145}
146
147////////////////////////////////////////////////////////////////////////////////
148/// constructor of kernel based PDF:
149
151 const TH1* hist,
155 Float_t FineFactor,
156 Bool_t norm) :
157 Configurable (""),
158 fUseHistogram ( kFALSE ),
159 fPDFName ( name ),
160 fNsmooth ( 0 ),
161 fMinNsmooth (-1 ),
162 fMaxNsmooth (-1 ),
163 fNSmoothHist ( 0 ),
164 fInterpolMethod( PDF::kKDE ),
165 fSpline ( 0 ),
166 fPDFHist ( 0 ),
167 fHist ( 0 ),
168 fHistOriginal ( 0 ),
169 fGraph ( 0 ),
170 fIGetVal ( 0 ),
171 fHistAvgEvtPerBin ( 0 ),
172 fHistDefinedNBins ( 0 ),
173 fKDEtypeString ( 0 ),
174 fKDEiterString ( 0 ),
175 fBorderMethodString( 0 ),
176 fInterpolateString ( 0 ),
177 fKDEtype ( ktype ),
178 fKDEiter ( kiter ),
179 fKDEborder ( kborder ),
180 fFineFactor ( FineFactor),
181 fReadingVersion( 0 ),
182 fCheckHist ( kFALSE ),
183 fNormalize ( norm ),
184 fSuffix ( "" ),
185 fLogger ( 0 )
186{
187 fLogger = new MsgLogger(this);
188 BuildPDF( hist );
189}
190
191////////////////////////////////////////////////////////////////////////////////
192
194 const TString& options,
195 const TString& suffix,
196 PDF* defaultPDF,
197 Bool_t norm) :
198 Configurable (options),
199 fUseHistogram ( kFALSE ),
200 fPDFName ( name ),
201 fNsmooth ( 0 ),
202 fMinNsmooth ( -1 ),
203 fMaxNsmooth ( -1 ),
204 fNSmoothHist ( 0 ),
205 fInterpolMethod( PDF::kSpline0 ),
206 fSpline ( 0 ),
207 fPDFHist ( 0 ),
208 fHist ( 0 ),
209 fHistOriginal ( 0 ),
210 fGraph ( 0 ),
211 fIGetVal ( 0 ),
212 fHistAvgEvtPerBin ( 50 ),
213 fHistDefinedNBins ( 0 ),
214 fKDEtypeString ( "Gauss" ),
215 fKDEiterString ( "Nonadaptive" ),
216 fBorderMethodString( "None" ),
217 fInterpolateString ( "Spline2" ),
218 fKDEtype ( KDEKernel::kNone ),
219 fKDEiter ( KDEKernel::kNonadaptiveKDE ),
220 fKDEborder ( KDEKernel::kNoTreatment ),
221 fFineFactor ( 1. ),
222 fReadingVersion( 0 ),
223 fCheckHist ( kFALSE ),
224 fNormalize ( norm ),
225 fSuffix ( suffix ),
226 fLogger ( 0 )
227{
228 fLogger = new MsgLogger(this);
229 if (defaultPDF != 0) {
230 fNsmooth = defaultPDF->fNsmooth;
231 fMinNsmooth = defaultPDF->fMinNsmooth;
232 fMaxNsmooth = defaultPDF->fMaxNsmooth;
236 fKDEtypeString = defaultPDF->fKDEtypeString;
237 fKDEiterString = defaultPDF->fKDEiterString;
238 fFineFactor = defaultPDF->fFineFactor;
240 fCheckHist = defaultPDF->fCheckHist;
242 }
243}
244
245//___________________fNSmoothHist____________________________________________________
247{
248 // destructor
249 if (fSpline != NULL) delete fSpline;
250 if (fHist != NULL) delete fHist;
251 if (fPDFHist != NULL) delete fPDFHist;
252 if (fHistOriginal != NULL) delete fHistOriginal;
253 if (fIGetVal != NULL) delete fIGetVal;
254 if (fGraph != NULL) delete fGraph;
255 delete fLogger;
256}
257
258////////////////////////////////////////////////////////////////////////////////
259
260void TMVA::PDF::BuildPDF( const TH1* hist )
261{
262 GetThisPdfThreadLocal() = this;
263 // sanity check
264 if (hist == NULL) Log() << kFATAL << "Called without valid histogram pointer!" << Endl;
265
266 // histogram should be non empty
267 if (hist->GetEntries() <= 0)
268 Log() << kFATAL << "Number of entries <= 0 (" << hist->GetEntries() << " in histogram: " << hist->GetTitle() << ")" << Endl;
269
270 if (fInterpolMethod == PDF::kKDE) {
271 Log()<< kDEBUG << "Create "
272 << ((fKDEiter == KDEKernel::kNonadaptiveKDE) ? "nonadaptive " :
273 (fKDEiter == KDEKernel::kAdaptiveKDE) ? "adaptive " : "??? ")
274 << ((fKDEtype == KDEKernel::kGauss) ? "Gauss " : "??? ")
275 << "type KDE kernel for histogram: \"" << hist->GetName() << "\""
276 << Endl;
277 }
278 else {
279 // another sanity check (nsmooth<0 indicated build with KDE)
280 if (fMinNsmooth<0)
281 Log() << kFATAL << "PDF construction called with minnsmooth<0" << Endl;
282 else if (fMaxNsmooth<=0)
283 fMaxNsmooth = fMinNsmooth;
284 else if (fMaxNsmooth<fMinNsmooth)
285 Log() << kFATAL << "PDF construction called with maxnsmooth<minnsmooth" << Endl;
286 }
287
288 fHistOriginal = (TH1F*)hist->Clone( TString(hist->GetName()) + "_original" );
289 fHist = (TH1F*)hist->Clone( TString(hist->GetName()) + "_smoothed" );
290 fHistOriginal->SetTitle( fHistOriginal->GetName() ); // reset to new title as well
291 fHist ->SetTitle( fHist->GetName() );
292
293 // do not store in current target file
294 fHistOriginal->SetDirectory(nullptr);
295 fHist ->SetDirectory(nullptr);
296 fUseHistogram = kFALSE;
297
298 if (fInterpolMethod == PDF::kKDE) BuildKDEPDF();
299 else BuildSplinePDF();
300}
301
302////////////////////////////////////////////////////////////////////////////////
303
305{
306 Int_t ResolutionFactor = (fInterpolMethod == PDF::kKDE) ? 5 : 1;
307 if (evtNum == 0 && fHistDefinedNBins == 0)
308 Log() << kFATAL << "No number of bins set for PDF" << Endl;
309 else if (fHistDefinedNBins > 0)
310 return fHistDefinedNBins * ResolutionFactor;
311 else if ( evtNum > 0 && fHistAvgEvtPerBin > 0 )
312 return evtNum / fHistAvgEvtPerBin * ResolutionFactor;
313 else
314 Log() << kFATAL << "No number of bins or average event per bin set for PDF" << fHistAvgEvtPerBin << Endl;
315 return 0;
316}
317
318////////////////////////////////////////////////////////////////////////////////
319/// build the PDF from the original histograms
320
322{
323 // (not useful for discrete distributions, or if no splines are requested)
324 if (fInterpolMethod != PDF::kSpline0 && fCheckHist) CheckHist();
325 // use ROOT TH1 smooth method
326 fNSmoothHist = 0;
327 if (fMaxNsmooth > 0 && fMinNsmooth >=0 ) SmoothHistogram();
328
329 // fill histogramm to graph
330 FillHistToGraph();
331
332 // fGraph->Print();
333 switch (fInterpolMethod) {
334
335 case kSpline0:
336 // use original histogram as reference
337 // this is useful, eg, for discrete variables
338 fUseHistogram = kTRUE;
339 break;
340
341 case kSpline1:
342 fSpline = new TMVA::TSpline1( "spline1", fGraph );
343 break;
344
345 case kSpline2:
346 fSpline = new TMVA::TSpline2( "spline2", fGraph );
347 break;
348
349 case kSpline3:
350 fSpline = new TSpline3( "spline3", fGraph );
351 break;
352
353 case kSpline5:
354 fSpline = new TSpline5( "spline5", fGraph );
355 break;
356
357 default:
358 Log() << kWARNING << "No valid interpolation method given! Use Spline2" << Endl;
359 fSpline = new TMVA::TSpline2( "spline2", fGraph );
360 Log() << kFATAL << " Well.. .thinking about it, I better quit so you notice you are forced to fix the mistake " << Endl;
361 std::exit(1);
362 }
363 // fill into histogram
364 FillSplineToHist();
365
366 if (!UseHistogram()) {
367 fSpline->SetTitle( (TString)fHist->GetTitle() + fSpline->GetTitle() );
368 fSpline->SetName ( (TString)fHist->GetName() + fSpline->GetName() );
369 }
370
371
372 // sanity check
373 Double_t integral = GetIntegral();
374 if (integral < 0) Log() << kFATAL << "Integral: " << integral << " <= 0" << Endl;
375
376 // normalize
377 if (fNormalize)
378 if (integral>0) fPDFHist->Scale( 1.0/integral );
379
380 fPDFHist->SetDirectory(nullptr);
381}
382
383////////////////////////////////////////////////////////////////////////////////
384/// creates high-binned reference histogram to be used instead of the
385/// PDF for speed reasons
386
388{
389 fPDFHist = new TH1F( "", "", fgNbin_PdfHist, GetXmin(), GetXmax() );
390 fPDFHist->SetTitle( (TString)fHist->GetTitle() + "_hist from_KDE" );
391 fPDFHist->SetName ( (TString)fHist->GetName() + "_hist_from_KDE" );
392
393 // create the kernel object
394 Float_t histoLowEdge = fHist->GetBinLowEdge(1);
395 Float_t histoUpperEdge = fPDFHist->GetBinLowEdge(fPDFHist->GetNbinsX()) + fPDFHist->GetBinWidth(fPDFHist->GetNbinsX());
396 KDEKernel *kern = new TMVA::KDEKernel( fKDEiter,
397 fHist, histoLowEdge, histoUpperEdge,
398 fKDEborder, fFineFactor );
399 kern->SetKernelType(fKDEtype);
400
401 for (Int_t i=1;i<fHist->GetNbinsX();i++) {
402 // loop over the bins of the original histo
403 for (Int_t j=1;j<fPDFHist->GetNbinsX();j++) {
404 // loop over the bins of the new histo and fill it
405 fPDFHist->AddBinContent(j,fHist->GetBinContent(i)*
406 kern->GetBinKernelIntegral(fPDFHist->GetBinLowEdge(j),
407 fPDFHist->GetBinLowEdge(j+1),
408 fHist->GetBinCenter(i),
409 i)
410 );
411 }
412 if (fKDEborder == 3) { // mirror the samples and fill them again
413 // in order to save time do the mirroring only for the first (the lower) 1/5 of the histo to the left;
414 // and the last (the higher) 1/5 of the histo to the right.
415 // the middle part of the histo, which is not mirrored, has no influence on the border effects anyway ...
416 if (i < fHist->GetNbinsX()/5 ) { // the first (the lower) 1/5 of the histo
417 for (Int_t j=1;j<fPDFHist->GetNbinsX();j++) {
418 // loop over the bins of the PDF histo and fill it
419 fPDFHist->AddBinContent(j,fHist->GetBinContent(i)*
420 kern->GetBinKernelIntegral(fPDFHist->GetBinLowEdge(j),
421 fPDFHist->GetBinLowEdge(j+1),
422 2*histoLowEdge-fHist->GetBinCenter(i), // mirroring to the left
423 i)
424 );
425 }
426 }
427 if (i > 4*fHist->GetNbinsX()/5) { // the last (the higher) 1/5 of the histo
428 for (Int_t j=1;j<fPDFHist->GetNbinsX();j++) {
429 // loop over the bins of the PDF histo and fill it
430 fPDFHist->AddBinContent( j,fHist->GetBinContent(i)*
431 kern->GetBinKernelIntegral(fPDFHist->GetBinLowEdge(j),
432 fPDFHist->GetBinLowEdge(j+1),
433 2*histoUpperEdge-fHist->GetBinCenter(i), i) );
434 }
435 }
436 }
437 }
438
439 fPDFHist->SetEntries(fHist->GetEntries());
440
441 delete kern;
442
443 // sanity check
444 Double_t integral = GetIntegral();
445 if (integral < 0) Log() << kFATAL << "Integral: " << integral << " <= 0" << Endl;
446
447 // normalize
448 if (fNormalize)
449 if (integral>0) fPDFHist->Scale( 1.0/integral );
450 fPDFHist->SetDirectory(nullptr);
451}
452
453////////////////////////////////////////////////////////////////////////////////
454
456{
457 if(fHist->GetNbinsX()==1) return;
458 if (fMaxNsmooth == fMinNsmooth) {
459 fHist->Smooth( fMinNsmooth );
460 return;
461 }
462
463 //calculating Mean, RMS of the relative errors and using them to set
464 //the boundaries of the linear transformation
465 Float_t Err=0, ErrAvg=0, ErrRMS=0 ; Int_t num=0, smooth;
466 for (Int_t bin=0; bin<fHist->GetNbinsX(); bin++) {
467 if (fHist->GetBinContent(bin+1) <= fHist->GetBinError(bin+1)) continue;
468 Err = fHist->GetBinError(bin+1) / fHist->GetBinContent(bin+1);
469 ErrAvg += Err; ErrRMS += Err*Err; num++;
470 }
471 ErrAvg /= num;
472 ErrRMS = TMath::Sqrt(ErrRMS/num-ErrAvg*ErrAvg) ;
473
474 //linearly convent the histogram to a vector of smoothnum
475 Float_t MaxErr=ErrAvg+ErrRMS, MinErr=ErrAvg-ErrRMS;
476 fNSmoothHist = new TH1I("","",fHist->GetNbinsX(),0,fHist->GetNbinsX());
477 fNSmoothHist->SetTitle( (TString)fHist->GetTitle() + "_Nsmooth" );
478 fNSmoothHist->SetName ( (TString)fHist->GetName() + "_Nsmooth" );
479 for (Int_t bin=0; bin<fHist->GetNbinsX(); bin++) {
480 if (fHist->GetBinContent(bin+1) <= fHist->GetBinError(bin+1))
481 smooth=fMaxNsmooth;
482 else {
483 Err = fHist->GetBinError(bin+1) / fHist->GetBinContent(bin+1);
484 smooth=(Int_t)((Err-MinErr) /(MaxErr-MinErr) * (fMaxNsmooth-fMinNsmooth)) + fMinNsmooth;
485 }
486 smooth = TMath::Max(fMinNsmooth,TMath::Min(fMaxNsmooth,smooth));
487 fNSmoothHist->SetBinContent(bin+1,smooth);
488 }
489
490 //find regions of constant smoothnum, starting from the highest amount of
491 //smoothing. So the last iteration all the histogram will be smoothed as a whole
492 for (Int_t n=fMaxNsmooth; n>=0; n--) {
493 //all the histogram has to be smoothed at least fMinNsmooth
494 if (n <= fMinNsmooth) { fHist->Smooth(); continue; }
495 Int_t MinBin=-1,MaxBin =-1;
496 for (Int_t bin=0; bin < fHist->GetNbinsX(); bin++) {
497 if (fNSmoothHist->GetBinContent(bin+1) >= n) {
498 if (MinBin==-1) MinBin = bin;
499 else MaxBin=bin;
500 }
501 else if (MaxBin >= 0) {
502 fHist->Smooth(1,"R");
503 MinBin=MaxBin=-1;
504 }
505 else // can't smooth a single bin
506 MinBin = -1;
507 }
508 }
509}
510
511////////////////////////////////////////////////////////////////////////////////
512/// Simple conversion
513
515{
516 fGraph=new TGraph(fHist);
517}
518
519////////////////////////////////////////////////////////////////////////////////
520/// creates high-binned reference histogram to be used instead of the
521/// PDF for speed reasons
522
524{
525 if (UseHistogram()) {
526 // no spline given, use the original histogram
527 fPDFHist = (TH1*)fHist->Clone();
528 fPDFHist->SetTitle( (TString)fHist->GetTitle() + "_hist from_spline0" );
529 fPDFHist->SetName ( (TString)fHist->GetName() + "_hist_from_spline0" );
530 }
531 else {
532 // create new reference histogram
533 fPDFHist = new TH1F( "", "", fgNbin_PdfHist, GetXmin(), GetXmax() );
534 fPDFHist->SetTitle( (TString)fHist->GetTitle() + "_hist from_" + fSpline->GetTitle() );
535 fPDFHist->SetName ( (TString)fHist->GetName() + "_hist_from_" + fSpline->GetTitle() );
536
537 for (Int_t bin=1; bin <= fgNbin_PdfHist; bin++) {
538 Double_t x = fPDFHist->GetBinCenter( bin );
539 Double_t y = fSpline->Eval( x );
540 // sanity correction: in cases where strong slopes exist, accidentally, the
541 // splines can go to zero; in this case we set the corresponding bin content
542 // equal to the bin content of the original histogram
543 if (y <= fgEpsilon) y = fHist->GetBinContent( fHist->FindBin( x ) );
544 fPDFHist->SetBinContent( bin, TMath::Max(y, fgEpsilon) );
545 }
546 }
547 fPDFHist->SetDirectory(nullptr);
548}
549
550////////////////////////////////////////////////////////////////////////////////
551/// sanity check: compare PDF with original histogram
552
554{
555 if (fHist == NULL) {
556 Log() << kFATAL << "<CheckHist> Called without valid histogram pointer!" << Endl;
557 }
558
559 Int_t nbins = fHist->GetNbinsX();
560
561 Int_t emptyBins=0;
562 // count number of empty bins
563 for (Int_t bin=1; bin<=nbins; bin++)
564 if (fHist->GetBinContent(bin) == 0) emptyBins++;
565
566 if (((Float_t)emptyBins/(Float_t)nbins) > 0.5) {
567 Log() << kWARNING << "More than 50% (" << (((Float_t)emptyBins/(Float_t)nbins)*100)
568 <<"%) of the bins in hist '"
569 << fHist->GetName() << "' are empty!" << Endl;
570 Log() << kWARNING << "X_min=" << GetXmin()
571 << " mean=" << fHist->GetMean() << " X_max= " << GetXmax() << Endl;
572 }
573}
574
575////////////////////////////////////////////////////////////////////////////////
576/// comparison of original histogram with reference PDF
577
578void TMVA::PDF::ValidatePDF( TH1* originalHist ) const
579{
580 // if no histogram is given, use the original one (the one the PDF was made from)
581 if (!originalHist) originalHist = fHistOriginal;
582
583 Int_t nbins = originalHist->GetNbinsX();
584
585 // treat errors properly
586 if (originalHist->GetSumw2()->GetSize() == 0) originalHist->Sumw2();
587
588 // ---- first validation: simple(st) possible chi2 test
589 // count number of empty bins
590 Double_t chi2 = 0;
591 Int_t ndof = 0;
592 Int_t nc1 = 0; // deviation counters
593 Int_t nc2 = 0; // deviation counters
594 Int_t nc3 = 0; // deviation counters
595 Int_t nc6 = 0; // deviation counters
596 for (Int_t bin=1; bin<=nbins; bin++) {
597 Double_t x = originalHist->GetBinCenter( bin );
598 Double_t y = originalHist->GetBinContent( bin );
599 Double_t ey = originalHist->GetBinError( bin );
600
601 Int_t binPdfHist = fPDFHist->FindBin( x );
602 if (binPdfHist<0) continue; // happens only if hist-dim>3
603
604 Double_t yref = GetVal( x );
605 Double_t rref = ( originalHist->GetSumOfWeights()/fPDFHist->GetSumOfWeights() *
606 originalHist->GetBinWidth( bin )/fPDFHist->GetBinWidth( binPdfHist ) );
607
608 if (y > 0) {
609 ndof++;
610 Double_t d = TMath::Abs( (y - yref*rref)/ey );
611 // std::cout << "bin: " << bin << " val: " << x << " data(err): " << y << "(" << ey << ") pdf: "
612 // << yref << " dev(chi2): " << d << "(" << chi2 << ") rref: " << rref << std::endl;
613 chi2 += d*d;
614 if (d > 1) { nc1++; if (d > 2) { nc2++; if (d > 3) { nc3++; if (d > 6) nc6++; } } }
615 }
616 }
617
618 Log() << kDEBUG << "Validation result for PDF \"" << originalHist->GetTitle() << "\"" << ": " << Endl;
619 Log() << kDEBUG << TString::Format( " chi2/ndof(!=0) = %.1f/%i = %.2f (Prob = %.2f)",
620 chi2, ndof, chi2/ndof, TMath::Prob( chi2, ndof ) ) << Endl;
621 if ((1.0 - TMath::Prob( chi2, ndof )) > 0.9999994) {
622 Log() << kDEBUG << "Comparison of the original histogram \"" << originalHist->GetTitle() << "\"" << Endl;
623 Log() << kDEBUG << "with the corresponding PDF gave a chi2/ndof of " << chi2/ndof << "," << Endl;
624 Log() << kDEBUG << "which corresponds to a deviation of more than 5 sigma! Please check!" << Endl;
625 }
626 Log() << kDEBUG << TString::Format( " #bins-found(#expected-bins) deviating > [1,2,3,6] sigmas: " \
627 "[%i(%i),%i(%i),%i(%i),%i(%i)]",
628 nc1, Int_t(TMath::Prob(1.0,1)*ndof), nc2, Int_t(TMath::Prob(4.0,1)*ndof),
629 nc3, Int_t(TMath::Prob(9.0,1)*ndof), nc6, Int_t(TMath::Prob(36.0,1)*ndof) ) << Endl;
630}
631
632////////////////////////////////////////////////////////////////////////////////
633/// computes normalisation
634
636{
637 Double_t integral = fPDFHist->GetSumOfWeights();
638 integral *= GetPdfHistBinWidth();
639
640 return integral;
641}
642
643////////////////////////////////////////////////////////////////////////////////
644/// static external auxiliary function (integrand)
645
647{
648 return ThisPDF()->GetVal( x[0] );
649}
650
651////////////////////////////////////////////////////////////////////////////////
652/// computes PDF integral within given ranges
653
655{
656 Double_t integral = 0;
657
658 if (fgManualIntegration) {
659
660 // compute integral by summing over bins
661 Int_t imin = fPDFHist->FindBin(xmin);
662 Int_t imax = fPDFHist->FindBin(xmax);
663 if (imin < 1) imin = 1;
664 if (imax > fPDFHist->GetNbinsX()) imax = fPDFHist->GetNbinsX();
665
666 for (Int_t bini = imin; bini <= imax; bini++) {
667 Float_t dx = fPDFHist->GetBinWidth(bini);
668 // correct for bin fractions in first and last bin
669 if (bini == imin) dx = fPDFHist->GetBinLowEdge(bini+1) - xmin;
670 else if (bini == imax) dx = xmax - fPDFHist->GetBinLowEdge(bini);
671 if (dx < 0 && dx > -1.0e-8) dx = 0; // protect against float->double numerical effects
672 if (dx<0) {
673 Log() << kWARNING
674 << "dx = " << dx << std::endl
675 << "bini = " << bini << std::endl
676 << "xmin = " << xmin << std::endl
677 << "xmax = " << xmax << std::endl
678 << "imin = " << imin << std::endl
679 << "imax = " << imax << std::endl
680 << "low edge of imin" << fPDFHist->GetBinLowEdge(imin) << std::endl
681 << "low edge of imin+1" << fPDFHist->GetBinLowEdge(imin+1) << Endl;
682 Log() << kFATAL << "<GetIntegral> dx = " << dx << " < 0" << Endl;
683 }
684 integral += fPDFHist->GetBinContent(bini)*dx;
685 }
686
687 }
688 else {
689
690 // compute via Gaussian quadrature (C++ version of CERNLIB function DGAUSS)
691 if (fIGetVal == 0) fIGetVal = new TF1( "IGetVal", PDF::IGetVal, GetXmin(), GetXmax(), 0 );
692 integral = fIGetVal->Integral( xmin, xmax );
693 }
694
695 return integral;
696}
697
698////////////////////////////////////////////////////////////////////////////////
699/// returns value PDF(x)
700
702{
703 // check which is filled
704 Int_t bin = fPDFHist->FindBin(x);
705 bin = TMath::Max(bin,1);
706 bin = TMath::Min(bin,fPDFHist->GetNbinsX());
707
708 Double_t retval = 0;
709
710 if (UseHistogram()) {
711 // use directly histogram bins (this is for discrete PDFs)
712 retval = fPDFHist->GetBinContent( bin );
713 }
714 else {
715 // linear interpolation
716 Int_t nextbin = bin;
717 if ((x > fPDFHist->GetBinCenter(bin) && bin != fPDFHist->GetNbinsX()) || bin == 1)
718 nextbin++;
719 else
720 nextbin--;
721
722 // linear interpolation between adjacent bins
723 Double_t dx = fPDFHist->GetBinCenter( bin ) - fPDFHist->GetBinCenter( nextbin );
724 Double_t dy = fPDFHist->GetBinContent( bin ) - fPDFHist->GetBinContent( nextbin );
725 retval = fPDFHist->GetBinContent( bin ) + (x - fPDFHist->GetBinCenter( bin ))*dy/dx;
726 }
727
728 return TMath::Max( retval, fgEpsilon );
729}
730
731////////////////////////////////////////////////////////////////////////////////
732/// returns value \f$ PDF^{-1}(y) \f$
733
734Double_t TMVA::PDF::GetValInverse( Double_t y, Bool_t isMonotonouslyIncreasingFunction ) const
735{
736 Int_t lowerBin=0, higherBin=0;
737 Double_t lowerBinValue=0, higherBinValue=0;
738 FindBinInverse(fPDFHist,lowerBin,higherBin,lowerBinValue,higherBinValue,y,isMonotonouslyIncreasingFunction);
739
740 Double_t xValueLowerBin =fPDFHist->GetBinCenter (lowerBin);
741 Double_t xValueHigherBin=fPDFHist->GetBinCenter (higherBin);
742
743 Double_t length =(higherBinValue-lowerBinValue);
744 Double_t fraction=lowerBinValue;
745 if (length>1.0e-10)
746 fraction=(y-lowerBinValue)/length;
747
748 Double_t lengthX =xValueHigherBin-xValueLowerBin;
749 Double_t x =xValueLowerBin+lengthX*fraction;
750
751 // comparison for test purposes
752 // std::cout << "lb " << lowerBin << " hb " << higherBin << " lbv " << lowerBinValue << " hbv " << higherBinValue << " frac " << fraction << std::endl;
753 // std::cout << "y " << y << " inv x " << x << " straight y " << GetVal(x) << std::endl;
754
755 return x;
756}
757
758////////////////////////////////////////////////////////////////////////////////
759/// find bin from value on ordinate
760
761void TMVA::PDF::FindBinInverse( const TH1* histogram, Int_t& lowerBin, Int_t& higherBin, Double_t& lowerBinValue, Double_t& higherBinValue,
762 Double_t y, Bool_t isMonotonouslyIncreasingFunction ) const
763{
764 if (isMonotonouslyIncreasingFunction) {
765 higherBin=histogram->GetNbinsX();
766 lowerBin =0;
767
768 Int_t bin=higherBin/2;
769
770 while (bin>lowerBin && bin<higherBin) {
771 Double_t binContent=histogram->GetBinContent(bin);
772
773 if (y<binContent) {
774 higherBin =bin;
775 higherBinValue=binContent;
776 }
777 else if (y>=binContent){
778 lowerBin =bin;
779 lowerBinValue =binContent;
780 }
781 bin=lowerBin+(higherBin-lowerBin)/2;
782 }
783 return;
784 }
785 // search sequentially
786 for (Int_t bin=0, binEnd=histogram->GetNbinsX(); bin<binEnd; ++bin) {
787 Double_t binContent=histogram->GetBinContent(bin);
788 if (binContent<y) {
789 lowerBin =bin;
790 higherBin=bin;
791 lowerBinValue =binContent;
792 higherBinValue=binContent;
793 }
794 else {
795 higherBin=bin;
796 higherBinValue=binContent;
797 break;
798 }
799 }
800}
801
802
803////////////////////////////////////////////////////////////////////////////////
804/// define the options (their key words) that can be set in the option string
805///
806/// know options:
807///
808/// PDFInterpol[ivar] `<string>` Spline0, Spline1, Spline2 `<default>`, Spline3, Spline5, KDE used to interpolate reference histograms
809/// if no variable index is given, it is valid for ALL the variables
810///
811/// - NSmooth `<int>` how often the input histos are smoothed
812/// - MinNSmooth `<int>` min number of smoothing iterations, for bins with most data
813/// - MaxNSmooth `<int>` max number of smoothing iterations, for bins with least data
814/// - NAvEvtPerBin `<int>` minimum average number of events per PDF bin
815/// - TransformOutput `<bool>` transform (often strongly peaked) likelihood output through sigmoid inversion
816/// - fKDEtype `<KernelType>` type of the Kernel to use (1 is Gaussian)
817/// - fKDEiter `<KerneIter>` number of iterations (1 --> "static KDE", 2 --> "adaptive KDE")
818/// - fBorderMethod `<KernelBorder>` the method to take care about "border" effects (1=no treatment , 2=kernel renormalization, 3=sample mirroring)
819
821{
822 DeclareOptionRef( fNsmooth, TString::Format("NSmooth%s",fSuffix.Data()),
823 "Number of smoothing iterations for the input histograms" );
824 DeclareOptionRef( fMinNsmooth, TString::Format("MinNSmooth%s",fSuffix.Data()),
825 "Min number of smoothing iterations, for bins with most data" );
826
827 DeclareOptionRef( fMaxNsmooth, TString::Format("MaxNSmooth%s",fSuffix.Data()),
828 "Max number of smoothing iterations, for bins with least data" );
829
830 DeclareOptionRef( fHistAvgEvtPerBin, TString::Format("NAvEvtPerBin%s",fSuffix.Data()),
831 "Average number of events per PDF bin" );
832
833 DeclareOptionRef( fHistDefinedNBins, TString::Format("Nbins%s",fSuffix.Data()),
834 "Defined number of bins for the histogram from which the PDF is created" );
835
836 DeclareOptionRef( fCheckHist, TString::Format("CheckHist%s",fSuffix.Data()),
837 "Whether or not to check the source histogram of the PDF" );
838
839 DeclareOptionRef( fInterpolateString, TString::Format("PDFInterpol%s",fSuffix.Data()),
840 "Interpolation method for reference histograms (e.g. Spline2 or KDE)" );
841 AddPreDefVal(TString("Spline0")); // take histogram
842 AddPreDefVal(TString("Spline1")); // linear interpolation between bins
843 AddPreDefVal(TString("Spline2")); // quadratic interpolation
844 AddPreDefVal(TString("Spline3")); // cubic interpolation
845 AddPreDefVal(TString("Spline5")); // fifth order polynom interpolation
846 AddPreDefVal(TString("KDE")); // use kernel density estimator
847
848 DeclareOptionRef( fKDEtypeString, TString::Format("KDEtype%s",fSuffix.Data()), "KDE kernel type (1=Gauss)" );
849 AddPreDefVal(TString("Gauss"));
850
851 DeclareOptionRef( fKDEiterString, TString::Format("KDEiter%s",fSuffix.Data()), "Number of iterations (1=non-adaptive, 2=adaptive)" );
852 AddPreDefVal(TString("Nonadaptive"));
853 AddPreDefVal(TString("Adaptive"));
854
855 DeclareOptionRef( fFineFactor , TString::Format("KDEFineFactor%s",fSuffix.Data()),
856 "Fine tuning factor for Adaptive KDE: Factor to multiply the width of the kernel");
857
858 DeclareOptionRef( fBorderMethodString, TString::Format("KDEborder%s",fSuffix.Data()),
859 "Border effects treatment (1=no treatment , 2=kernel renormalization, 3=sample mirroring)" );
860 AddPreDefVal(TString("None"));
861 AddPreDefVal(TString("Renorm"));
862 AddPreDefVal(TString("Mirror"));
863
864 SetConfigName( GetName() );
865 SetConfigDescription( "Configuration options for the PDF class" );
866}
867
868////////////////////////////////////////////////////////////////////////////////
869
871{
872 if (fNsmooth < 0) fNsmooth = 0; // set back to default
873
874 if (fMaxNsmooth < 0 || fMinNsmooth < 0) { // use "Nsmooth" variable
875 fMinNsmooth = fMaxNsmooth = fNsmooth;
876 }
877
878 if (fMaxNsmooth < fMinNsmooth && fMinNsmooth >= 0) { // sanity check
879 Log() << kFATAL << "ERROR: MaxNsmooth = "
880 << fMaxNsmooth << " < MinNsmooth = " << fMinNsmooth << Endl;
881 }
882
883 if (fMaxNsmooth < 0 || fMinNsmooth < 0) {
884 Log() << kFATAL << "ERROR: MaxNsmooth = "
885 << fMaxNsmooth << " or MinNsmooth = " << fMinNsmooth << " smaller than zero" << Endl;
886 }
887
888 //processing the options
889 if (fInterpolateString == "Spline0") fInterpolMethod = TMVA::PDF::kSpline0;
890 else if (fInterpolateString == "Spline1") fInterpolMethod = TMVA::PDF::kSpline1;
891 else if (fInterpolateString == "Spline2") fInterpolMethod = TMVA::PDF::kSpline2;
892 else if (fInterpolateString == "Spline3") fInterpolMethod = TMVA::PDF::kSpline3;
893 else if (fInterpolateString == "Spline5") fInterpolMethod = TMVA::PDF::kSpline5;
894 else if (fInterpolateString == "KDE" ) fInterpolMethod = TMVA::PDF::kKDE;
895 else if (fInterpolateString != "" ) {
896 Log() << kFATAL << "unknown setting for option 'InterpolateMethod': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
897 }
898
899 // init KDE options
900 if (fKDEtypeString == "Gauss" ) fKDEtype = KDEKernel::kGauss;
901 else if (fKDEtypeString != "" )
902 Log() << kFATAL << "unknown setting for option 'KDEtype': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
903 if (fKDEiterString == "Nonadaptive") fKDEiter = KDEKernel::kNonadaptiveKDE;
904 else if (fKDEiterString == "Adaptive" ) fKDEiter = KDEKernel::kAdaptiveKDE;
905 else if (fKDEiterString != "" )// nothing more known
906 Log() << kFATAL << "unknown setting for option 'KDEiter': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
907
908 if ( fBorderMethodString == "None" ) fKDEborder= KDEKernel::kNoTreatment;
909 else if ( fBorderMethodString == "Renorm" ) fKDEborder= KDEKernel::kKernelRenorm;
910 else if ( fBorderMethodString == "Mirror" ) fKDEborder= KDEKernel::kSampleMirror;
911 else if ( fKDEiterString != "" ) { // nothing more known
912 Log() << kFATAL << "unknown setting for option 'KDEBorder': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
913 }
914}
915
916////////////////////////////////////////////////////////////////////////////////
917/// XML file writing
918
919void TMVA::PDF::AddXMLTo( void* parent )
920{
921 void* pdfxml = gTools().AddChild(parent, "PDF");
922 gTools().AddAttr(pdfxml, "Name", fPDFName );
923 gTools().AddAttr(pdfxml, "MinNSmooth", fMinNsmooth );
924 gTools().AddAttr(pdfxml, "MaxNSmooth", fMaxNsmooth );
925 gTools().AddAttr(pdfxml, "InterpolMethod", fInterpolMethod );
926 gTools().AddAttr(pdfxml, "KDE_type", fKDEtype );
927 gTools().AddAttr(pdfxml, "KDE_iter", fKDEiter );
928 gTools().AddAttr(pdfxml, "KDE_border", fKDEborder );
929 gTools().AddAttr(pdfxml, "KDE_finefactor", fFineFactor );
930 void* pdfhist = gTools().AddChild(pdfxml,"Histogram" );
931 TH1* histToWrite = GetOriginalHist();
932 Bool_t hasEquidistantBinning = gTools().HistoHasEquidistantBins(*histToWrite);
933 gTools().AddAttr(pdfhist, "Name", histToWrite->GetName() );
934 gTools().AddAttr(pdfhist, "NBins", histToWrite->GetNbinsX() );
935 gTools().AddAttr(pdfhist, "XMin", histToWrite->GetXaxis()->GetXmin() );
936 gTools().AddAttr(pdfhist, "XMax", histToWrite->GetXaxis()->GetXmax() );
937 gTools().AddAttr(pdfhist, "HasEquidistantBins", hasEquidistantBinning );
938
939 TString bincontent("");
940 for (Int_t i=0; i<histToWrite->GetNbinsX(); i++) {
941 bincontent += gTools().StringFromDouble(histToWrite->GetBinContent(i+1));
942 bincontent += " ";
943 }
944 gTools().AddRawLine(pdfhist, bincontent );
945
946 if (!hasEquidistantBinning) {
947 void* pdfhistbins = gTools().AddChild(pdfxml,"HistogramBinning" );
948 gTools().AddAttr(pdfhistbins, "NBins", histToWrite->GetNbinsX() );
949 TString binns("");
950 for (Int_t i=1; i<=histToWrite->GetNbinsX()+1; i++) {
951 binns += gTools().StringFromDouble(histToWrite->GetXaxis()->GetBinLowEdge(i));
952 binns += " ";
953 }
954 gTools().AddRawLine(pdfhistbins, binns );
955 }
956}
957
958////////////////////////////////////////////////////////////////////////////////
959/// XML file reading
960
961void TMVA::PDF::ReadXML( void* pdfnode )
962{
963 UInt_t enumint;
964
965 gTools().ReadAttr(pdfnode, "MinNSmooth", fMinNsmooth );
966 gTools().ReadAttr(pdfnode, "MaxNSmooth", fMaxNsmooth );
967 gTools().ReadAttr(pdfnode, "InterpolMethod", enumint ); fInterpolMethod = EInterpolateMethod(enumint);
968 gTools().ReadAttr(pdfnode, "KDE_type", enumint ); fKDEtype = KDEKernel::EKernelType(enumint);
969 gTools().ReadAttr(pdfnode, "KDE_iter", enumint ); fKDEiter = KDEKernel::EKernelIter(enumint);
970 gTools().ReadAttr(pdfnode, "KDE_border", enumint ); fKDEborder = KDEKernel::EKernelBorder(enumint);
971 gTools().ReadAttr(pdfnode, "KDE_finefactor", fFineFactor );
972
973 TString hname;
974 UInt_t nbins;
976 Bool_t hasEquidistantBinning;
977
978 void* histch = gTools().GetChild(pdfnode);
979 gTools().ReadAttr( histch, "Name", hname );
980 gTools().ReadAttr( histch, "NBins", nbins );
981 gTools().ReadAttr( histch, "XMin", xmin );
982 gTools().ReadAttr( histch, "XMax", xmax );
983 gTools().ReadAttr( histch, "HasEquidistantBins", hasEquidistantBinning );
984
985 // recreate the original hist
986 TH1* newhist = 0;
987 if (hasEquidistantBinning) {
988 newhist = new TH1F( hname, hname, nbins, xmin, xmax );
989 newhist->SetDirectory(nullptr);
990 const char* content = gTools().GetContent(histch);
991 std::stringstream s(content);
992 Double_t val;
993 for (UInt_t i=0; i<nbins; i++) {
994 s >> val;
995 newhist->SetBinContent(i+1,val);
996 }
997 }
998 else {
999 const char* content = gTools().GetContent(histch);
1000 std::stringstream s(content);
1001 Double_t val;
1002 void* binch = gTools().GetNextChild(histch);
1003 UInt_t nbinning;
1004 gTools().ReadAttr( binch, "NBins", nbinning );
1005 TVectorD binns(nbinning+1);
1006 if (nbinning != nbins) {
1007 Log() << kFATAL << "Number of bins in content and binning array differs"<<Endl;
1008 }
1009 const char* binString = gTools().GetContent(binch);
1010 std::stringstream sb(binString);
1011 for (UInt_t i=0; i<=nbins; i++) sb >> binns[i];
1012 newhist = new TH1F( hname, hname, nbins, binns.GetMatrixArray() );
1013 newhist->SetDirectory(nullptr);
1014 for (UInt_t i=0; i<nbins; i++) {
1015 s >> val;
1016 newhist->SetBinContent(i+1,val);
1017 }
1018 }
1019
1020 TString hnameSmooth = hname;
1021 hnameSmooth.ReplaceAll( "_original", "_smoothed" );
1022
1023 if (fHistOriginal != 0) delete fHistOriginal;
1024 fHistOriginal = newhist;
1025 fHist = (TH1F*)fHistOriginal->Clone( hnameSmooth );
1026 fHist->SetTitle( hnameSmooth );
1027 fHist->SetDirectory(nullptr);
1028
1029 if (fInterpolMethod == PDF::kKDE) BuildKDEPDF();
1030 else BuildSplinePDF();
1031}
1032
1033////////////////////////////////////////////////////////////////////////////////
1034/// write the pdf
1035
1036std::ostream& TMVA::operator<< ( std::ostream& os, const PDF& pdf )
1037{
1038 Int_t dp = os.precision();
1039 os << "MinNSmooth " << pdf.fMinNsmooth << std::endl;
1040 os << "MaxNSmooth " << pdf.fMaxNsmooth << std::endl;
1041 os << "InterpolMethod " << pdf.fInterpolMethod << std::endl;
1042 os << "KDE_type " << pdf.fKDEtype << std::endl;
1043 os << "KDE_iter " << pdf.fKDEiter << std::endl;
1044 os << "KDE_border " << pdf.fKDEborder << std::endl;
1045 os << "KDE_finefactor " << pdf.fFineFactor << std::endl;
1046
1047 TH1* histToWrite = pdf.GetOriginalHist();
1048
1049 const Int_t nBins = histToWrite->GetNbinsX();
1050
1051 // note: this is a schema change introduced for v3.7.3
1052 os << "Histogram "
1053 << histToWrite->GetName()
1054 << " " << nBins // nbins
1055 << " " << std::setprecision(12) << histToWrite->GetXaxis()->GetXmin() // x_min
1056 << " " << std::setprecision(12) << histToWrite->GetXaxis()->GetXmax() // x_max
1057 << std::endl;
1058
1059 // write the smoothed hist
1060 os << "Weights " << std::endl;
1061 os << std::setprecision(8);
1062 for (Int_t i=0; i<nBins; i++) {
1063 os << std::setw(15) << std::left << histToWrite->GetBinContent(i+1) << std::right << " ";
1064 if ((i+1)%5==0) os << std::endl;
1065 }
1066
1067 os << std::setprecision(dp);
1068 return os; // Return the output stream.
1069}
1070
1071////////////////////////////////////////////////////////////////////////////////
1072/// read the tree from an std::istream
1073
1074std::istream& TMVA::operator>> ( std::istream& istr, PDF& pdf )
1075{
1076 TString devnullS;
1077 Int_t valI;
1078 Int_t nbins=-1; // default binning will cause an exit
1079 Float_t xmin=-1., xmax=-1.;
1080 TString hname="_original";
1081 Bool_t doneReading = kFALSE;
1082 while (!doneReading) {
1083 istr >> devnullS;
1084 if (devnullS=="NSmooth")
1085 {istr >> pdf.fMinNsmooth; pdf.fMaxNsmooth=pdf.fMinNsmooth;}
1086 else if (devnullS=="MinNSmooth") istr >> pdf.fMinNsmooth;
1087 else if (devnullS=="MaxNSmooth") istr >> pdf.fMaxNsmooth;
1088 // have to do this with strings to be more stable with developing code
1089 else if (devnullS == "InterpolMethod") { istr >> valI; pdf.fInterpolMethod = PDF::EInterpolateMethod(valI);}
1090 else if (devnullS == "KDE_type") { istr >> valI; pdf.fKDEtype = KDEKernel::EKernelType(valI); }
1091 else if (devnullS == "KDE_iter") { istr >> valI; pdf.fKDEiter = KDEKernel::EKernelIter(valI);}
1092 else if (devnullS == "KDE_border") { istr >> valI; pdf.fKDEborder = KDEKernel::EKernelBorder(valI);}
1093 else if (devnullS == "KDE_finefactor") {
1094 istr >> pdf.fFineFactor;
1095 if (pdf.GetReadingVersion() != 0 && pdf.GetReadingVersion() < TMVA_VERSION(3,7,3)) { // here we expect the histogram limits if the version is below 3.7.3. When version == 0, the newest TMVA version is assumed.
1096 // coverity[tainted_data_argument]
1097 istr >> nbins >> xmin >> xmax;
1098 doneReading = kTRUE;
1099 }
1100 }
1101 else if (devnullS == "Histogram") { istr >> hname >> nbins >> xmin >> xmax; }
1102 else if (devnullS == "Weights") { doneReading = kTRUE; }
1103 }
1104
1105 TString hnameSmooth = hname;
1106 hnameSmooth.ReplaceAll( "_original", "_smoothed" );
1107
1108 // recreate the original hist
1109 if (nbins==-1) {
1110 std::cout << "PDF, trying to create a histogram without defined binning"<< std::endl;
1111 std::exit(1);
1112 }
1113 TH1* newhist = new TH1F( hname,hname, nbins, xmin, xmax );
1114 newhist->SetDirectory(nullptr);
1115 Float_t val;
1116 for (Int_t i=0; i<nbins; i++) {
1117 istr >> val;
1118 newhist->SetBinContent(i+1,val);
1119 }
1120
1121 if (pdf.fHistOriginal != 0) delete pdf.fHistOriginal;
1122 pdf.fHistOriginal = newhist;
1123 pdf.fHist = (TH1F*)pdf.fHistOriginal->Clone( hnameSmooth );
1124 pdf.fHist->SetTitle( hnameSmooth );
1125 pdf.fHist->SetDirectory(nullptr);
1126
1127 if (pdf.fMinNsmooth>=0) pdf.BuildSplinePDF();
1128 else {
1129 pdf.fInterpolMethod = PDF::kKDE;
1130 pdf.BuildKDEPDF();
1131 }
1132
1133 return istr;
1134}
1135
1137{
1138 // return global "this" pointer of PDF
1139 return GetThisPdfThreadLocal();
1140}
const Handle_t kNone
Definition GuiTypes.h:88
#define d(i)
Definition RSha256.hxx:102
bool Bool_t
Definition RtypesCore.h:63
int Int_t
Definition RtypesCore.h:45
float Float_t
Definition RtypesCore.h:57
constexpr Bool_t kFALSE
Definition RtypesCore.h:94
double Double_t
Definition RtypesCore.h:59
constexpr Bool_t kTRUE
Definition RtypesCore.h:93
#define ClassImp(name)
Definition Rtypes.h:377
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
char name[80]
Definition TGX11.cxx:110
float xmin
float xmax
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2489
#define TMVA_VERSION(a, b, c)
Definition Version.h:48
Int_t GetSize() const
Definition TArray.h:47
Double_t GetXmax() const
Definition TAxis.h:140
virtual Double_t GetBinLowEdge(Int_t bin) const
Return low edge of bin.
Definition TAxis.cxx:518
Double_t GetXmin() const
Definition TAxis.h:139
1-Dim function class
Definition TF1.h:233
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:622
1-D histogram with an int per channel (see TH1 documentation)
Definition TH1.h:540
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:59
virtual void SetDirectory(TDirectory *dir)
By default, when a histogram is created, it is added to the list of histogram objects in the current ...
Definition TH1.cxx:8928
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Definition TH1.cxx:9132
virtual Double_t GetBinError(Int_t bin) const
Return value of error associated to bin number bin.
Definition TH1.cxx:9054
TAxis * GetXaxis()
Definition TH1.h:324
virtual Int_t GetNbinsX() const
Definition TH1.h:297
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition TH1.cxx:9213
virtual Double_t GetEntries() const
Return the current number of entries.
Definition TH1.cxx:4423
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition TH1.cxx:5052
virtual TArrayD * GetSumw2()
Definition TH1.h:313
virtual Double_t GetBinWidth(Int_t bin) const
Return bin width for 1D histogram.
Definition TH1.cxx:9154
TObject * Clone(const char *newname="") const override
Make a complete copy of the underlying object.
Definition TH1.cxx:2752
virtual Double_t GetSumOfWeights() const
Return the sum of weights excluding under/overflows.
Definition TH1.cxx:7908
virtual void Sumw2(Bool_t flag=kTRUE)
Create structure to store sum of squares of weights.
Definition TH1.cxx:9011
KDE Kernel for "smoothing" the PDFs.
Definition KDEKernel.h:50
void SetKernelType(EKernelType ktype=kGauss)
fIter == 1 —> nonadaptive KDE fIter == 2 —> adaptive KDE
Float_t GetBinKernelIntegral(Float_t lowr, Float_t highr, Float_t mean, Int_t binnum)
calculates the integral of the Kernel
ostringstream derivative to redirect and format output
Definition MsgLogger.h:57
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition PDF.h:63
static const Bool_t fgManualIntegration
manual integration (sum over bins) or DGAUSS
Definition PDF.h:162
KDEKernel::EKernelType fKDEtype
Kernel type to use for KDE.
Definition PDF.h:188
void FillHistToGraph()
Simple conversion.
Definition PDF.cxx:514
void BuildPDF(const TH1 *theHist)
Definition PDF.cxx:260
KDEKernel::EKernelBorder fKDEborder
The method to take care about "border" effects (string)
Definition PDF.h:190
static Double_t IGetVal(Double_t *, Double_t *)
static external auxiliary function (integrand)
Definition PDF.cxx:646
TString fBorderMethodString
Definition PDF.h:185
Int_t fMaxNsmooth
Max number of smoothing iterations.
Definition PDF.h:169
void ValidatePDF(TH1 *original=nullptr) const
comparison of original histogram with reference PDF
Definition PDF.cxx:578
TMVA::PDF::EInterpolateMethod fInterpolMethod
interpolation method
Definition PDF.h:172
void SmoothHistogram()
Definition PDF.cxx:455
Double_t GetValInverse(Double_t y, Bool_t isMonotonouslyIncreasingFunction=kFALSE) const
returns value
Definition PDF.cxx:734
void CheckHist() const
sanity check: compare PDF with original histogram
Definition PDF.cxx:553
void FindBinInverse(const TH1 *histogram, Int_t &lowerBin, Int_t &higherBin, Double_t &lowerBinValue, Double_t &higherBinValue, Double_t y, Bool_t isMonotonouslyIncreasingFunction=kFALSE) const
find bin from value on ordinate
Definition PDF.cxx:761
Int_t fHistDefinedNBins
source hist bin num set by user
Definition PDF.h:181
TString fKDEiterString
Definition PDF.h:184
void ReadXML(void *pdfnode)
XML file reading.
Definition PDF.cxx:961
TString fInterpolateString
Definition PDF.h:186
Int_t fNsmooth
Min number of smoothing iterations.
Definition PDF.h:167
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition PDF.cxx:820
void AddXMLTo(void *parent)
XML file writing.
Definition PDF.cxx:919
void BuildSplinePDF()
build the PDF from the original histograms
Definition PDF.cxx:321
void FillSplineToHist()
creates high-binned reference histogram to be used instead of the PDF for speed reasons
Definition PDF.cxx:523
void BuildKDEPDF()
creates high-binned reference histogram to be used instead of the PDF for speed reasons
Definition PDF.cxx:387
Int_t fMinNsmooth
Min number of smoothing iterations.
Definition PDF.h:168
Int_t GetHistNBins(Int_t evtNum=0)
Definition PDF.cxx:304
static const Double_t fgEpsilon
minimum PDF return
Definition PDF.h:163
TH1 * GetOriginalHist() const
Definition PDF.h:94
Float_t fFineFactor
fine tuning factor for Adaptive KDE
Definition PDF.h:191
static PDF *& GetThisPdfThreadLocal()
Definition PDF.h:205
virtual ~PDF()
Definition PDF.cxx:246
Double_t GetVal(Double_t x) const
returns value PDF(x)
Definition PDF.cxx:701
Int_t fHistAvgEvtPerBin
avg event per source hist bin
Definition PDF.h:180
Double_t GetIntegral() const
computes normalisation
Definition PDF.cxx:635
Bool_t fCheckHist
check of source histogram
Definition PDF.h:195
MsgLogger * fLogger
! message logger
Definition PDF.h:199
TString fKDEtypeString
strings used to read definitions
Definition PDF.h:183
void ProcessOptions()
Definition PDF.cxx:870
EInterpolateMethod
Definition PDF.h:70
@ kKDE
Definition PDF.h:70
@ kSpline1
Definition PDF.h:70
@ kSpline0
Definition PDF.h:70
@ kSpline3
Definition PDF.h:70
@ kSpline2
Definition PDF.h:70
@ kSpline5
Definition PDF.h:70
static const Int_t fgNbin_PdfHist
number of bins in high-binned reference histogram
Definition PDF.h:161
PDF(const TString &name, Bool_t norm=kTRUE)
default constructor needed for ROOT I/O
Definition PDF.cxx:69
KDEKernel::EKernelIter fKDEiter
Number of iterations (adaptive or not)
Definition PDF.h:189
static PDF * ThisPDF(void)
Definition PDF.cxx:1136
Linear interpolation of TGraph.
Definition TSpline1.h:43
Quadratic interpolation of TGraph.
Definition TSpline2.h:43
Bool_t HistoHasEquidistantBins(const TH1 &h)
Definition Tools.cxx:1486
Bool_t AddRawLine(void *node, const char *raw)
XML helpers.
Definition Tools.cxx:1190
TString StringFromDouble(Double_t d)
string tools
Definition Tools.cxx:1233
const char * GetContent(void *node)
XML helpers.
Definition Tools.cxx:1174
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1150
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347
void * AddChild(void *parent, const char *childname, const char *content=nullptr, bool isRootNode=false)
add child node
Definition Tools.cxx:1124
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1162
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
const char * GetTitle() const override
Returns title of object.
Definition TNamed.h:48
Class to create third splines to interpolate knots Arbitrary conditions can be introduced for first a...
Definition TSpline.h:182
Class to create quintic natural splines to interpolate knots Arbitrary conditions can be introduced f...
Definition TSpline.h:238
Basic string class.
Definition TString.h:139
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition TString.h:704
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2378
Element * GetMatrixArray()
Definition TVectorT.h:76
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
Double_t ey[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
std::istream & operator>>(std::istream &istr, BinaryTree &tree)
Tools & gTools()
std::ostream & operator<<(std::ostream &os, const BinaryTree &tree)
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Definition TMathBase.h:250
Double_t Prob(Double_t chi2, Int_t ndf)
Computation of the probability for a certain Chi-squared (chi2) and number of degrees of freedom (ndf...
Definition TMath.cxx:637
Double_t Sqrt(Double_t x)
Returns the square root of x.
Definition TMath.h:662
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Definition TMathBase.h:198
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition TMathBase.h:123