doc/v608/RuleFitParams_8cxx_source.html

 // @(#)root/tmva $Id$
 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss

 /**********************************************************************************
  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
  * Package: TMVA                                                                  *
  * Class  : RuleFitParams                                                         *
  * Web    : http://tmva.sourceforge.net                                           *
  *                                                                                *
  * Description:                                                                   *
  *      Implementation                                                            *
  *                                                                                *
  * Authors (alphabetical):                                                        *
  *      Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA      *
  *      Helge Voss         <Helge.Voss@cern.ch>         - MPI-KP Heidelberg, Ger. *
  *                                                                                *
  * Copyright (c) 2005:                                                            *
  *      CERN, Switzerland                                                         *
  *      Iowa State U.                                                             *
  *      MPI-K Heidelberg, Germany                                                 *
  *                                                                                *
  * Redistribution and use in source and binary forms, with or without             *
  * modification, are permitted according to the terms listed in LICENSE           *
  * (http://tmva.sourceforge.net/LICENSE)                                          *
  **********************************************************************************/

 #include "TMVA/RuleFitParams.h"

 #include "TMVA/DataSetInfo.h"
 #include "TMVA/MethodRuleFit.h"
 #include "TMVA/MsgLogger.h"
 #include "TMVA/RuleEnsemble.h"
 #include "TMVA/RuleFit.h"
 #include "TMVA/Timer.h"
 #include "TMVA/Tools.h"
 #include "TMVA/Types.h"

 #include "TTree.h"
 #include "TMath.h"

 #include <iostream>
 #include <iomanip>
 #include <numeric>
 #include <algorithm>
 #include <functional>

 Bool_t gFIRSTTST=kTRUE;
 Bool_t gFIRSTORG=kTRUE;

 Double_t gGDInit=0;
 Double_t gGDPtr=0;
 Double_t gGDNorm=0;
 Double_t gGDEval=0;
 Double_t gGDEvalRule=0;
 Double_t gGDRuleLoop=0;
 Double_t gGDLinLoop=0;

 ////////////////////////////////////////////////////////////////////////////////
 /// constructor

 TMVA::RuleFitParams::RuleFitParams()
    : fRuleFit ( 0 )
    , fRuleEnsemble ( 0 )
    , fNRules ( 0 )
    , fNLinear ( 0 )
    , fPathIdx1 ( 0 )
    , fPathIdx2 ( 0 )
    , fPerfIdx1 ( 0 )
    , fPerfIdx2 ( 0 )
    , fGDNTauTstOK( 0 )
    , fGDNTau     ( 51 )
    , fGDTauPrec  ( 0.02 )
    , fGDTauScan  ( 1000 )
    , fGDTauMin   ( 0.0 )
    , fGDTauMax   ( 1.0 )
    , fGDTau      ( -1.0 )
    , fGDPathStep ( 0.01 )
    , fGDNPathSteps ( 1000 )
    , fGDErrScale ( 1.1 )
    , fAverageTruth( 0 )
    , fFstarMedian ( 0 )
    , fGDNtuple    ( 0 )
    , fNTRisk      ( 0 )
    , fNTErrorRate ( 0 )
    , fNTNuval     ( 0 )
    , fNTCoefRad   ( 0 )
    , fNTOffset ( 0 )
    , fNTCoeff ( 0 )
    , fNTLinCoeff ( 0 )
    , fsigave( 0 )
    , fsigrms( 0 )
    , fbkgave( 0 )
    , fbkgrms( 0 )
    , fLogger( new MsgLogger("RuleFit") )
 {
    Init();
 }
 ////////////////////////////////////////////////////////////////////////////////
 /// destructor

 TMVA::RuleFitParams::~RuleFitParams()
 {
    if (fNTCoeff)     { delete fNTCoeff; fNTCoeff = 0; }
    if (fNTLinCoeff)  { delete fNTLinCoeff;fNTLinCoeff = 0; }
    delete fLogger;
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// Initializes all parameters using the RuleEnsemble and the training tree

 void TMVA::RuleFitParams::Init()
 {
    if (fRuleFit==0) return;
    if (fRuleFit->GetMethodRuleFit()==0) {
       Log() << kFATAL << "RuleFitParams::Init() - MethodRuleFit ptr is null" << Endl;
    }
    UInt_t neve = fRuleFit->GetTrainingEvents().size();
    //
    fRuleEnsemble   = fRuleFit->GetRuleEnsemblePtr();
    fNRules         = fRuleEnsemble->GetNRules();
    fNLinear        = fRuleEnsemble->GetNLinear();

    //
    // Fraction of events used for validation should be close of unity..
    // Always selection from the END
    //
    UInt_t ofs;
    fPerfIdx1 = 0;
    if (neve>1) {
       fPerfIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
    }
    else {
       fPerfIdx2 = 0;
    }
    ofs = neve - fPerfIdx2 - 1;
    fPerfIdx1 += ofs;
    fPerfIdx2 += ofs;
    //
    // Fraction of events used for the path search can be allowed to be a smaller value, say 0.5
    // Alwas select events from the BEGINNING.
    // This means that the validation and search samples will not overlap if both fractions are <0.5.
    //
    fPathIdx1 = 0;
    if (neve>1) {
       fPathIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
    }
    else {
       fPathIdx2 = 0;
    }
    //
    // summarize weights
    //
    fNEveEffPath = 0;;
    for (UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
       fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
    }

    fNEveEffPerf=0;
    for (UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
       fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
    }
    //
    Log() << kVERBOSE << "Path constr. - event index range = [ " << fPathIdx1 << ", " << fPathIdx2 << " ]"
          << ", effective N(events) = " << fNEveEffPath << Endl;
    Log() << kVERBOSE << "Error estim. - event index range = [ " << fPerfIdx1 << ", " << fPerfIdx2 << " ]"
          << ", effective N(events) = " << fNEveEffPerf << Endl;
    //
    if (fRuleEnsemble->DoRules())
       Log() << kDEBUG << "Number of rules in ensemble: " << fNRules << Endl;
    else
       Log() << kDEBUG << "Rules are disabled " << Endl;

    if (fRuleEnsemble->DoLinear())
       Log() << kDEBUG << "Number of linear terms: " << fNLinear << Endl;
    else
       Log() << kDEBUG << "Linear terms are disabled " << Endl;
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// initializes the ntuple

 void TMVA::RuleFitParams::InitNtuple()
 {
    fGDNtuple= new TTree("MonitorNtuple_RuleFitParams","RuleFit path search");
    fGDNtuple->Branch("risk",    &fNTRisk,     "risk/D");
    fGDNtuple->Branch("error",   &fNTErrorRate,"error/D");
    fGDNtuple->Branch("nuval",   &fNTNuval,    "nuval/D");
    fGDNtuple->Branch("coefrad", &fNTCoefRad,  "coefrad/D");
    fGDNtuple->Branch("offset",  &fNTOffset,   "offset/D");
    //
    fNTCoeff    = (fNRules >0 ? new Double_t[fNRules]  : 0);
    fNTLinCoeff = (fNLinear>0 ? new Double_t[fNLinear] : 0);

    for (UInt_t i=0; i<fNRules; i++) {
       fGDNtuple->Branch(Form("a%d",i+1),&fNTCoeff[i],Form("a%d/D",i+1));
    }
    for (UInt_t i=0; i<fNLinear; i++) {
       fGDNtuple->Branch(Form("b%d",i+1),&fNTLinCoeff[i],Form("b%d/D",i+1));
    }
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// evaluate the average of each variable and f(x) in the given range

 void TMVA::RuleFitParams::EvaluateAverage( UInt_t ind1, UInt_t ind2,
                                            std::vector<Double_t> &avsel,
                                            std::vector<Double_t> &avrul )
 {
    UInt_t neve = ind2-ind1+1;
    if (neve<1) {
       Log() << kFATAL << "<EvaluateAverage> - no events selected for path search -> BUG!" << Endl;
    }
    //
    avsel.clear();
    avrul.clear();
    //
    if (fNLinear>0) avsel.resize(fNLinear,0);
    if (fNRules>0)  avrul.resize(fNRules,0);
    const std::vector<UInt_t> *eventRuleMap=0;
    Double_t ew;
    Double_t sumew=0;
    //
    // Loop over events and calculate average of linear terms (normalised) and rule response.
    //
    if (fRuleEnsemble->IsRuleMapOK()) { // MakeRuleMap() has been called
       for ( UInt_t i=ind1; i<ind2+1; i++) {
          ew = fRuleFit->GetTrainingEventWeight(i);
          sumew += ew;
          for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
             avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
          }
          // loop over rules
          UInt_t nrules=0;
          if (fRuleEnsemble->DoRules()) {
             eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
             nrules = (*eventRuleMap).size();
          }
          for (UInt_t r=0; r<nrules; r++) {
             avrul[(*eventRuleMap)[r]] += ew;
          }
       }
    }
    else { // MakeRuleMap() has not yet been called
       const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
       for ( UInt_t i=ind1; i<ind2+1; i++) {
          ew = fRuleFit->GetTrainingEventWeight(i);
          sumew += ew;
          // first cache rule/lin response
          /* Double_t val = */ fRuleEnsemble->EvalLinEvent(*((*events)[i]));
          /* val = */ fRuleEnsemble->EvalEvent(*((*events)[i]));
          // loop over linear terms
          for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
             avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
          }
          // loop over rules
          for (UInt_t r=0; r<fNRules; r++) {
             avrul[r] += ew*fRuleEnsemble->GetEventRuleVal(r);
          }
       }
    }
    // average variable
    for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
       avsel[sel] = avsel[sel] / sumew;
    }
    // average rule response, excl coeff
    for (UInt_t r=0; r<fNRules; r++) {
       avrul[r] = avrul[r] / sumew;
    }
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)

 Double_t TMVA::RuleFitParams::LossFunction( const Event& e ) const
 {
    Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( e )) );
    Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)?1:-1) - h;
    //
    return diff*diff*e.GetWeight();
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)

 Double_t TMVA::RuleFitParams::LossFunction( UInt_t evtidx ) const
 {
    Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( evtidx )) );
    Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
    //
    return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)

 Double_t TMVA::RuleFitParams::LossFunction( UInt_t evtidx, UInt_t itau ) const
 {
    Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
    Double_t h = TMath::Max( -1.0, TMath::Min(1.0,e) );
    Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
    //
    return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// risk asessment

 Double_t TMVA::RuleFitParams::Risk(UInt_t ind1,UInt_t ind2, Double_t neff) const
 {
    UInt_t neve = ind2-ind1+1;
    if (neve<1) {
       Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
    }
    Double_t rval=0;
    //
    //   const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
    for ( UInt_t i=ind1; i<ind2+1; i++) {
       rval += LossFunction(i);
    }
    rval  = rval/neff;

    return rval;
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// risk asessment for tau model <itau>

 Double_t TMVA::RuleFitParams::Risk(UInt_t ind1,UInt_t ind2, Double_t neff, UInt_t itau) const
 {
    UInt_t neve = ind2-ind1+1;
    if (neve<1) {
       Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
    }
    Double_t rval=0;
    //
    //   const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
    for ( UInt_t i=ind1; i<ind2+1; i++) {
       rval += LossFunction(i,itau);
    }
    rval  = rval/neff;

    return rval;
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// This is the "lasso" penalty
 /// To be used for regression.
 /// --- NOT USED ---

 Double_t TMVA::RuleFitParams::Penalty() const
 {
    Log() << kWARNING << "<Penalty> Using unverified code! Check!" << Endl;
    Double_t rval=0;
    const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
    for (UInt_t i=0; i<fNRules; i++) {
       rval += TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
    }
    for (UInt_t i=0; i<fNLinear; i++) {
       rval += TMath::Abs((*lincoeff)[i]);
    }
    return rval;
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// Initialize GD path search

 void TMVA::RuleFitParams::InitGD()
 {
    if (fGDNTau<2) {
       fGDNTau    = 1;
       fGDTauScan = 0;
    }
    if (fGDTau<0.0) {
       //      fGDNTau    = 50; already set in MethodRuleFit
       fGDTauScan = 1000;
       fGDTauMin  = 0.0;
       fGDTauMax  = 1.0;
    }
    else {
       fGDNTau    = 1;
       fGDTauScan = 0;
    }
    // set all taus
    fGDTauVec.clear();
    fGDTauVec.resize( fGDNTau );
    if (fGDNTau==1) {
       fGDTauVec[0] = fGDTau;
    }
    else {
       // set tau vector - TODO: make a smarter choice of range in tau
       Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
       for (UInt_t itau=0; itau<fGDNTau; itau++) {
          fGDTauVec[itau] = static_cast<Double_t>(itau)*dtau + fGDTauMin;
          if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
       }
    }
    // inititalize path search vectors

    fGradVec.clear();
    fGradVecLin.clear();
    fGradVecTst.clear();
    fGradVecLinTst.clear();
    fGDErrTst.clear();
    fGDErrTstOK.clear();
    fGDOfsTst.clear();
    fGDCoefTst.clear();
    fGDCoefLinTst.clear();
    //
    // rules
    //
    fGDCoefTst.resize(fGDNTau);
    fGradVec.resize(fNRules,0);
    fGradVecTst.resize(fGDNTau);
    for (UInt_t i=0; i<fGDNTau; i++) {
       fGradVecTst[i].resize(fNRules,0);
       fGDCoefTst[i].resize(fNRules,0);
    }
    //
    // linear terms
    //
    fGDCoefLinTst.resize(fGDNTau);
    fGradVecLin.resize(fNLinear,0);
    fGradVecLinTst.resize(fGDNTau);
    for (UInt_t i=0; i<fGDNTau; i++) {
       fGradVecLinTst[i].resize(fNLinear,0);
       fGDCoefLinTst[i].resize(fNLinear,0);
    }
    //
    // error, coefs etc
    //
    fGDErrTst.resize(fGDNTau,0);
    fGDErrTstOK.resize(fGDNTau,kTRUE);
    fGDOfsTst.resize(fGDNTau,0);
    fGDNTauTstOK = fGDNTau;
    //
    // calculate average selectors and rule responses for the path sample size
    //
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// This finds the cutoff parameter tau by scanning several different paths

 Int_t TMVA::RuleFitParams::FindGDTau()
 {
    if (fGDNTau<2) return 0;
    if (fGDTauScan==0) return 0;

    if (fGDOfsTst.size()<1)
       Log() << kFATAL << "BUG! FindGDTau() has been called BEFORE InitGD()." << Endl;
    //
    Log() << kINFO << "Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." << Endl;
    //
    // Find how many points to scan and how often to calculate the error
    UInt_t nscan = fGDTauScan; //std::min(static_cast<Int_t>(fGDTauScan),fGDNPathSteps);
    UInt_t netst = std::min(nscan,UInt_t(100));
    UInt_t nscanned=0;
    //
    //--------------------
    // loop over the paths
    //--------------------
    // The number of MAXIMUM loops is given by nscan.
    // At each loop, the paths being far away from the minimum
    // are rejected. Hence at each check (every netst events), the number
    // of paths searched will be reduced.
    // The maximum 'distance' from the minimum error rate is
    // 1 sigma. See RiskPerfTst() for details.
    //
    Bool_t doloop=kTRUE;
    UInt_t ip=0;
    UInt_t itauMin=0;
    Timer timer( nscan, "RuleFit" );
    while (doloop) {
       // make gradvec
       MakeTstGradientVector();
       // update coefs
       UpdateTstCoefficients();
       // estimate error and do the sum
       // do this at index=0, netst-1, 2*netst-1 ...
       nscanned++;
       if ( (ip==0) || ((ip+1)%netst==0) ) {
          //         ErrorRateRocTst( );
          itauMin = RiskPerfTst();
          Log() << kVERBOSE << Form("%4d",ip+1) << ". tau = " << Form("%4.4f",fGDTauVec[itauMin])
                << " => error rate = " << fGDErrTst[itauMin] << Endl;
       }
       ip++;
       doloop = ((ip<nscan) && (fGDNTauTstOK>3));
       gFIRSTTST=kFALSE;
       if (Log().GetMinType()>kVERBOSE)
          timer.DrawProgressBar(ip);
    }
    //
    // Set tau and coefs
    // Downscale tau slightly in order to avoid numerical problems
    //
    if (nscanned==0) {
       Log() << kERROR << "<FindGDTau> number of scanned loops is zero! Should NOT see this message." << Endl;
    }
    fGDTau = fGDTauVec[itauMin];
    fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
    fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
    fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
    Log() << kINFO << "Best path found with tau = " << Form("%4.4f",fGDTau)
          << " after " << timer.GetElapsedTime() << "      " << Endl;

    return nscan;
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// The following finds the gradient directed path in parameter space.
 /// More work is needed... FT, 24/9/2006
 /// The algorithm is currently as follows:
 /// <***if not otherwise stated, the sample used below is [fPathIdx1,fPathIdx2]***>
 /// 1. Set offset to -average(y(true)) and all coefs=0 => average of F(x)==0
 /// 2. FindGDTau() : start scanning using several paths defined by different tau
 ///                  choose the tau yielding the best path
 /// 3. start the scanning the chosen path
 /// 4. check error rate at a given frequency
 ///    data used for check: [fPerfIdx1,fPerfIdx2]
 /// 5. stop when either of the following onditions are fullfilled:
 ///    a. loop index==fGDNPathSteps
 ///    b. error > fGDErrScale*errmin
 ///    c. only in DEBUG mode: risk is not monotoneously decreasing
 ///
 /// The algorithm will warn if:
 ///   I. the error rate was still decreasing when loop finnished -> increase fGDNPathSteps!
 ///  II. minimum was found at an early stage -> decrease fGDPathStep
 /// III. DEBUG: risk > previous risk -> entered caotic region (regularization is too small)
 ///

 void TMVA::RuleFitParams::MakeGDPath()
 {
    Log() << kINFO << "GD path scan - the scan stops when the max num. of steps is reached or a min is found"
          << Endl;
    Log() << kVERBOSE << "Number of events used per path step = " << fPathIdx2-fPathIdx1+1 << Endl;
    Log() << kVERBOSE << "Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 << Endl;

    // check if debug mode
    const Bool_t isVerbose = (Log().GetMinType()<=kVERBOSE);
    const Bool_t isDebug   = (Log().GetMinType()<=kDEBUG);

    // init GD parameters and clear coeff vectors
    InitGD();

    // evaluate average response of rules/linear terms (with event weights)
    EvaluateAveragePath();
    EvaluateAveragePerf();

    // initial estimate; all other a(i) are zero
    Log() << kVERBOSE << "Creating GD path"  << Endl;
    Log() << kVERBOSE << "  N(steps)     = "   << fGDNPathSteps << Endl;
    Log() << kVERBOSE << "  step         = "   << fGDPathStep   << Endl;
    Log() << kVERBOSE << "  N(tau)       = "   << fGDNTau       << Endl;
    Log() << kVERBOSE << "  N(tau steps) = "   << fGDTauScan    << Endl;
    Log() << kVERBOSE << "  tau range    = [ " << fGDTauVec[0]  << " , " << fGDTauVec[fGDNTau-1] << " ]" << Endl;

    // init ntuple
    if (isDebug) InitNtuple();

    // DEBUG: risk scan
    Int_t    nbadrisk=0;                // number of points where risk(i+1)>risk(i)
    Double_t trisk=0;                   // time per risk evaluation
    Double_t strisk=0;                  // total time
    Double_t rprev=1e32;                // previous risk

    // parameters set at point with min error
    Double_t              errmin=1e32;  // min error
    // Double_t              riskMin=0;    // risk
    Int_t                 indMin=-1;    // index
    std::vector<Double_t> coefsMin;     // rule coefs
    std::vector<Double_t> lincoefsMin;  // linear coefs
    Double_t              offsetMin;    // offset


    // DEBUG: timing
    clock_t  t0=0;
    Double_t tgradvec;
    Double_t tupgrade;
    Double_t tperf;
    Double_t stgradvec=0;
    Double_t stupgrade=0;
    Double_t stperf=0;

    // linear regression to estimate slope of error rate evolution
    const UInt_t npreg=5;
    std::vector<Double_t> valx;
    std::vector<Double_t> valy;
    std::vector<Double_t> valxy;

    // loop related
    Bool_t  docheck;          // true if an error rate check is to be done
    Int_t   iloop=0;          // loop index
    Bool_t  found=kFALSE;     // true if minimum is found
    Bool_t  riskFlat=kFALSE;  // DEBUG: flag is true if risk evolution behaves badly
    Bool_t  done = kFALSE;    // flag that the scan is done

    // calculate how often to check error rate
    int imod = fGDNPathSteps/100;
    if (imod<100) imod = std::min(100,fGDNPathSteps);
    if (imod>100) imod=100;

    // reset coefficients
    fAverageTruth = -CalcAverageTruth();
    offsetMin     = fAverageTruth;
    fRuleEnsemble->SetOffset(offsetMin);
    fRuleEnsemble->ClearCoefficients(0);
    fRuleEnsemble->ClearLinCoefficients(0);
    for (UInt_t i=0; i<fGDOfsTst.size(); i++) {
       fGDOfsTst[i] = offsetMin;
    }
    Log() << kVERBOSE << "Obtained initial offset = " << offsetMin << Endl;

    // find the best tau - returns the number of steps performed in scan
    Int_t nprescan = FindGDTau();

    //
    //
    // calculate F*
    //
    //   CalcFStar(fPerfIdx1, fPerfIdx2);
    //

    // set some ntuple values
    fNTRisk = rprev;
    fNTCoefRad = -1.0;
    fNTErrorRate = 0;

    // a local flag indicating for what reason the search was stopped
    Int_t stopCondition=0;

    Log() << kINFO << "Fitting model..." << Endl;
    // start loop with timer
    Timer timer( fGDNPathSteps, "RuleFit" );
    Log() << kWARNING;
    while (!done) {
       // Make gradient vector (eq 44, ref 1)
       if (isVerbose) t0 = clock();
       MakeGradientVector();
       if (isVerbose) {
          tgradvec = Double_t(clock()-t0)/CLOCKS_PER_SEC;
          stgradvec += tgradvec;
       }

       // Calculate the direction in parameter space (eq 25, ref 1) and update coeffs (eq 22, ref 1)
       if (isVerbose) t0 = clock();
       UpdateCoefficients();
       if (isVerbose) {
          tupgrade = Double_t(clock()-t0)/CLOCKS_PER_SEC;
          stupgrade += tupgrade;
       }

       // don't check error rate every loop
       docheck = ((iloop==0) ||((iloop+1)%imod==0));

       if (docheck) {
          // draw progressbar only if not debug
          if (!isVerbose)
             timer.DrawProgressBar(iloop);
          fNTNuval = Double_t(iloop)*fGDPathStep;
          fNTRisk = 0.0;

          // check risk evolution

          if (isDebug) FillCoefficients();
          fNTCoefRad = fRuleEnsemble->CoefficientRadius();

          // calculate risk
          t0 = clock();
          fNTRisk = RiskPath();
          trisk =  Double_t(clock()-t0)/CLOCKS_PER_SEC;
          strisk += trisk;
          //
          // Check for an increase in risk.
          // Such an increase would imply that the regularization is too small.
          // Stop the iteration if this happens.
          //
          if (fNTRisk>=rprev) {
             if (fNTRisk>rprev) {
                nbadrisk++;
                Log() << "Risk(i+1)>=Risk(i) in path" << Endl;
                riskFlat=(nbadrisk>3);
                if (riskFlat) {
                   Log() << kWARNING << "Chaotic behaviour of risk evolution" << Endl;
                   Log() << "--- STOPPING MINIMISATION ---" << Endl;
                   Log() << "This may be OK if minimum is already found" << Endl;
                }
             }
          }
          rprev = fNTRisk;
          //
          // Estimate the error rate using cross validation
          // Well, not quite full cross validation since we only
          // use ONE model.
          //
          if (isVerbose) t0 = clock();
          fNTErrorRate = 0;

          // Check error rate
          Double_t errroc;//= ErrorRateRoc();
          Double_t riskPerf = RiskPerf();
          //         Double_t optimism = Optimism();
          //
          errroc = riskPerf;
          //
          fNTErrorRate = errroc;
          //
          if (isVerbose) {
             tperf = Double_t(clock()-t0)/CLOCKS_PER_SEC;
             stperf +=tperf;
          }
          //
          // Always take the last min.
          // For each step the risk is reduced.
          //
          if (fNTErrorRate<=errmin) {
             errmin  = fNTErrorRate;
             // riskMin = fNTRisk;
             indMin  = iloop;
             fRuleEnsemble->GetCoefficients(coefsMin);
             lincoefsMin = fRuleEnsemble->GetLinCoefficients();
             offsetMin   = fRuleEnsemble->GetOffset();
          }
          if ( fNTErrorRate > fGDErrScale*errmin) found = kTRUE;
          //
          // check slope of last couple of points
          //
          if (valx.size()==npreg) {
             valx.erase(valx.begin());
             valy.erase(valy.begin());
             valxy.erase(valxy.begin());
          }
          valx.push_back(fNTNuval);
          valy.push_back(fNTErrorRate);
          valxy.push_back(fNTErrorRate*fNTNuval);

          gFIRSTORG=kFALSE;

          //
          if (isDebug) fGDNtuple->Fill();
          if (isVerbose) {
             Log() << kVERBOSE << "ParamsIRE : "
                   << std::setw(10)
                   << Form("%8d",iloop+1) << " "
                   << Form("%4.4f",fNTRisk) << " "
                   << Form("%4.4f",riskPerf)  << " "
                   << Form("%4.4f",fNTRisk+riskPerf)  << " "
                //                     << Form("%4.4f",fsigave+fbkgave) << " "
                //                     << Form("%4.4f",fsigave) << " "
                //                     << Form("%4.4f",fsigrms) << " "
                //                     << Form("%4.4f",fbkgave) << " "
                //                     << Form("%4.4f",fbkgrms) << " "

                //                    << Form("%4.4f",fRuleEnsemble->CoefficientRadius())
                   << Endl;
          }
       }
       iloop++;
       // Stop iteration under various conditions
       // * The condition R(i+1)<R(i) is no longer true (when then implicit regularization is too weak)
       // * If the current error estimate is > factor*errmin (factor = 1.1)
       // * We have reach the last step...
       Bool_t endOfLoop = (iloop==fGDNPathSteps);
       if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
          if (riskFlat) {
             stopCondition = 1;
          }
          else if (endOfLoop) {
             stopCondition = 2;
          }
          if (indMin<0) {
             Log() << kWARNING << "BUG TRAP: should not be here - still, this bug is harmless;)" << Endl;
             errmin  = fNTErrorRate;
             // riskMin = fNTRisk;
             indMin  = iloop;
             fRuleEnsemble->GetCoefficients(coefsMin);
             lincoefsMin = fRuleEnsemble->GetLinCoefficients();
             offsetMin   = fRuleEnsemble->GetOffset();
          }
          found = kTRUE;
       }
       done = (found);
    }
    Log() << Endl;
    Log() << kINFO << "Minimisation elapsed time : " << timer.GetElapsedTime() << "                      " << Endl;
    Log() << kINFO << "----------------------------------------------------------------"  << Endl;
    Log() << kINFO << "Found minimum at step " << indMin+1 << " with error = " << errmin << Endl;
    Log() << kINFO << "Reason for ending loop: ";
    switch (stopCondition) {
    case 0:
       Log() << kINFO << "clear minima found";
       break;
    case 1:
       Log() << kINFO << "chaotic behaviour of risk";
       break;
    case 2:
       Log() << kINFO << "end of loop reached";
       break;
    default:
       Log() << kINFO << "unknown!";
       break;
    }
    Log() << Endl;
    Log() << kINFO << "----------------------------------------------------------------"  << Endl;

    // check if early minima - might be an indication of too large stepsize
    if ( Double_t(indMin)/Double_t(nprescan+fGDNPathSteps) < 0.05 ) {
       Log() << kWARNING << "Reached minimum early in the search" << Endl;
       Log() << "Check results and maybe decrease GDStep size" << Endl;
    }
    //
    // quick check of the sign of the slope for the last npreg points
    //
    Double_t sumx  = std::accumulate( valx.begin(), valx.end(), Double_t() );
    Double_t sumxy = std::accumulate( valxy.begin(), valxy.end(), Double_t() );
    Double_t sumy  = std::accumulate( valy.begin(), valy.end(), Double_t() );
    Double_t slope = Double_t(valx.size())*sumxy - sumx*sumy;
    if (slope<0) {
       Log() << kINFO << "The error rate was still decreasing at the end of the path" << Endl;
       Log() << kINFO << "Increase number of steps (GDNSteps)." << Endl;
    }
    //
    // set coefficients
    //
    if (found) {
       fRuleEnsemble->SetCoefficients( coefsMin );
       fRuleEnsemble->SetLinCoefficients( lincoefsMin );
       fRuleEnsemble->SetOffset( offsetMin );
    }
    else {
       Log() << kFATAL << "BUG TRAP: minimum not found in MakeGDPath()" << Endl;
    }

    //
    // print timing info (VERBOSE mode)
    //
    if (isVerbose) {
       Double_t stloop  = strisk +stupgrade + stgradvec + stperf;
       Log() << kVERBOSE << "Timing per loop (ms):" << Endl;
       Log() << kVERBOSE << "   gradvec = " << 1000*stgradvec/iloop << Endl;
       Log() << kVERBOSE << "   upgrade = " << 1000*stupgrade/iloop << Endl;
       Log() << kVERBOSE << "   risk    = " << 1000*strisk/iloop    << Endl;
       Log() << kVERBOSE << "   perf    = " << 1000*stperf/iloop    << Endl;
       Log() << kVERBOSE << "   loop    = " << 1000*stloop/iloop    << Endl;
       //
       Log() << kVERBOSE << "   GDInit      = " << 1000*gGDInit/iloop    << Endl;
       Log() << kVERBOSE << "   GDPtr       = " << 1000*gGDPtr/iloop    << Endl;
       Log() << kVERBOSE << "   GDEval      = " << 1000*gGDEval/iloop    << Endl;
       Log() << kVERBOSE << "   GDEvalRule  = " << 1000*gGDEvalRule/iloop    << Endl;
       Log() << kVERBOSE << "   GDNorm      = " << 1000*gGDNorm/iloop    << Endl;
       Log() << kVERBOSE << "   GDRuleLoop  = " << 1000*gGDRuleLoop/iloop    << Endl;
       Log() << kVERBOSE << "   GDLinLoop   = " << 1000*gGDLinLoop/iloop    << Endl;
    }
    //
    // write ntuple (DEBUG)
    if (isDebug) fGDNtuple->Write();
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// helper function to store the rule coefficients in local arrays

 void TMVA::RuleFitParams::FillCoefficients()
 {
    fNTOffset = fRuleEnsemble->GetOffset();
    //
    for (UInt_t i=0; i<fNRules; i++) {
       fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
    }
    for (UInt_t i=0; i<fNLinear; i++) {
       fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
    }
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// Estimates F* (optimum scoring function) for all events for the given sets.
 /// The result is used in ErrorRateReg().
 /// --- NOT USED ---
 ///

 void TMVA::RuleFitParams::CalcFStar()
 {
    Log() << kWARNING << "<CalcFStar> Using unverified code! Check!" << Endl;
    UInt_t neve = fPerfIdx2-fPerfIdx1+1;
    if (neve<1) {
       Log() << kFATAL << "<CalcFStar> Invalid start/end indices!" << Endl;
       return;
    }
    //
    const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
    //
    fFstar.clear();
    std::vector<Double_t> fstarSorted;
    Double_t fstarVal;
    // loop over all events and estimate F* for each event
    for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
       const Event& e = *(*events)[i];
       fstarVal = fRuleEnsemble->FStar(e);
       fFstar.push_back(fstarVal);
       fstarSorted.push_back(fstarVal);
       if (TMath::IsNaN(fstarVal)) Log() << kFATAL << "F* is NAN!" << Endl;
    }
    // sort F* and find median
    std::sort( fstarSorted.begin(), fstarSorted.end() );
    UInt_t ind = neve/2;
    if (neve&1) { // odd number of events
       fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
    }
    else { // even
       fFstarMedian = fstarSorted[ind];
    }
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// implementation of eq. 7.17 in Hastie,Tibshirani & Friedman book
 /// this is the covariance between the estimated response yhat and the
 /// true value y.
 /// NOT REALLY SURE IF THIS IS CORRECT!
 /// --- THIS IS NOT USED ---
 ///

 Double_t TMVA::RuleFitParams::Optimism()
 {
    Log() << kWARNING << "<Optimism> Using unverified code! Check!" << Endl;
    UInt_t neve = fPerfIdx2-fPerfIdx1+1;
    if (neve<1) {
       Log() << kFATAL << "<Optimism> Invalid start/end indices!" << Endl;
    }
    //
    const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
    //
    Double_t sumy=0;
    Double_t sumyhat=0;
    Double_t sumyhaty=0;
    Double_t sumw2=0;
    Double_t sumw=0;
    Double_t yhat;
    Double_t y;
    Double_t w;
    //
    for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
       const Event& e = *(*events)[i];
       yhat = fRuleEnsemble->EvalEvent(i);         // evaluated using the model
       y    = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? 1.0:-1.0);           // the truth
       w    = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf; // the weight, reweighted such that sum=1
       sumy     += w*y;
       sumyhat  += w*yhat;
       sumyhaty += w*yhat*y;
       sumw2    += w*w;
       sumw     += w;
    }
    Double_t div = 1.0-sumw2;
    Double_t cov = sumyhaty - sumyhat*sumy;
    return 2.0*cov/div;
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// Estimates the error rate with the current set of parameters
 /// This code is pretty messy at the moment.
 /// Cleanup is needed.
 /// -- NOT USED ---
 ///

 Double_t TMVA::RuleFitParams::ErrorRateReg()
 {
    Log() << kWARNING << "<ErrorRateReg> Using unverified code! Check!" << Endl;
    UInt_t neve = fPerfIdx2-fPerfIdx1+1;
    if (neve<1) {
       Log() << kFATAL << "<ErrorRateReg> Invalid start/end indices!" << Endl;
    }
    if (fFstar.size()!=neve) {
       Log() << kFATAL << "--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
             << " Fstar.size() = " << fFstar.size() << " , N(events) = " << neve << Endl;
    }
    //
    Double_t sF;
    //
    const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
    //
    Double_t sumdf = 0;
    Double_t sumdfmed = 0;
    //
    // A bit messy here.
    // I believe the binary error classification is appropriate here.
    // The problem is stability.
    //
    for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
       const Event& e = *(*events)[i];
       sF = fRuleEnsemble->EvalEvent( e );
       // scaled abs error, eq 20 in RuleFit paper
       sumdf += TMath::Abs(fFstar[i-fPerfIdx1] - sF);
       sumdfmed += TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
    }
    // scaled abs error, eq 20
    // This error (df) is large - need to think on how to compensate...
    //
    return sumdf/sumdfmed;
 }

 ////////////////////////////////////////////////////////////////////////////////
 ///
 /// Estimates the error rate with the current set of parameters
 /// It uses a binary estimate of (y-F*(x))
 /// (y-F*(x)) = (Num of events where sign(F)!=sign(y))/Neve
 /// y = {+1 if event is signal, -1 otherwise}
 /// --- NOT USED ---
 ///

 Double_t TMVA::RuleFitParams::ErrorRateBin()
 {
    Log() << kWARNING << "<ErrorRateBin> Using unverified code! Check!" << Endl;
    UInt_t neve = fPerfIdx2-fPerfIdx1+1;
    if (neve<1) {
       Log() << kFATAL << "<ErrorRateBin> Invalid start/end indices!" << Endl;
    }
    //
    const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
    //
    Double_t sumdfbin = 0;
    Double_t dneve = Double_t(neve);
    Int_t signF, signy;
    Double_t sF;
    //
    for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
       const Event& e = *(*events)[i];
       sF     = fRuleEnsemble->EvalEvent( e );
       //      Double_t sFstar = fRuleEnsemble->FStar(e); // THIS CAN BE CALCULATED ONCE!
       signF = (sF>0 ? +1:-1);
       //      signy = (sFStar>0 ? +1:-1);
       signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? +1:-1);
       sumdfbin += TMath::Abs(Double_t(signF-signy))*0.5;
    }
    Double_t f = sumdfbin/dneve;
    //   Double_t   df = f*TMath::Sqrt((1.0/sumdfbin) + (1.0/dneve));
    return f;
 }

 ////////////////////////////////////////////////////////////////////////////////

 Double_t TMVA::RuleFitParams::ErrorRateRocRaw( std::vector<Double_t> & sFsig,
                                                std::vector<Double_t> & sFbkg )

 {
    //
    // Estimates the error rate with the current set of parameters.
    // It calculates the area under the bkg rejection vs signal efficiency curve.
    // The value returned is 1-area.
    //
    std::sort(sFsig.begin(), sFsig.end());
    std::sort(sFbkg.begin(), sFbkg.end());
    const Double_t minsig = sFsig.front();
    const Double_t minbkg = sFbkg.front();
    const Double_t maxsig = sFsig.back();
    const Double_t maxbkg = sFbkg.back();
    const Double_t minf   = std::min(minsig,minbkg);
    const Double_t maxf   = std::max(maxsig,maxbkg);
    const Int_t    nsig   = Int_t(sFsig.size());
    const Int_t    nbkg   = Int_t(sFbkg.size());
    const Int_t    np     = std::min((nsig+nbkg)/4,50);
    const Double_t df     = (maxf-minf)/(np-1);
    //
    // calculate area under rejection/efficiency curve
    //
    Double_t fcut;
    std::vector<Double_t>::const_iterator indit;
    Int_t nrbkg;
    Int_t nesig;
    Int_t pnesig=0;
    Double_t rejb=0;
    Double_t effs=1.0;
    Double_t prejb=0;
    Double_t peffs=1.0;
    // Double_t drejb;
    Double_t deffs;
    Double_t area=0;
    Int_t    npok=0;
    //
    // loop over range of F [minf,maxf]
    //
    for (Int_t i=0; i<np; i++) {
       fcut = minf + df*Double_t(i);
       indit = std::find_if( sFsig.begin(), sFsig.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
       nesig = sFsig.end()-indit; // number of sig accepted with F>cut
       if (TMath::Abs(pnesig-nesig)>0) {
          npok++;
          indit = std::find_if( sFbkg.begin(), sFbkg.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
          nrbkg = indit-sFbkg.begin(); // number of bkg rejected with F>cut
          rejb = Double_t(nrbkg)/Double_t(nbkg);
          effs = Double_t(nesig)/Double_t(nsig);
          //
          // drejb = rejb-prejb;
          deffs = effs-peffs;
          area += 0.5*TMath::Abs(deffs)*(rejb+prejb); // trapezoid
          prejb = rejb;
          peffs = effs;
       }
       pnesig = nesig;
    }
    area += 0.5*(1+rejb)*effs; // extrapolate to the end point

    return (1.0-area);
 }

 ////////////////////////////////////////////////////////////////////////////////
 ///
 /// Estimates the error rate with the current set of parameters.
 /// It calculates the area under the bkg rejection vs signal efficiency curve.
 /// The value returned is 1-area.
 /// This works but is less efficient than calculating the Risk using RiskPerf().
 ///

 Double_t TMVA::RuleFitParams::ErrorRateRoc()
 {
    Log() << kWARNING << "<ErrorRateRoc> Should not be used in the current version! Check!" << Endl;
    UInt_t neve = fPerfIdx2-fPerfIdx1+1;
    if (neve<1) {
       Log() << kFATAL << "<ErrorRateRoc> Invalid start/end indices!" << Endl;
    }
    //
    const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
    //
    Double_t sF;
    //
    std::vector<Double_t> sFsig;
    std::vector<Double_t> sFbkg;
    Double_t sumfsig=0;
    Double_t sumfbkg=0;
    Double_t sumf2sig=0;
    Double_t sumf2bkg=0;
    //
    for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
       const Event& e = *(*events)[i];
       sF = fRuleEnsemble->EvalEvent(i);// * fRuleFit->GetTrainingEventWeight(i);
       if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)) {
          sFsig.push_back(sF);
          sumfsig  +=sF;
          sumf2sig +=sF*sF;
       }
       else {
          sFbkg.push_back(sF);
          sumfbkg  +=sF;
          sumf2bkg +=sF*sF;
       }
    }
    fsigave = sumfsig/sFsig.size();
    fbkgave = sumfbkg/sFbkg.size();
    fsigrms = TMath::Sqrt(gTools().ComputeVariance(sumf2sig,sumfsig,sFsig.size()));
    fbkgrms = TMath::Sqrt(gTools().ComputeVariance(sumf2bkg,sumfbkg,sFbkg.size()));
    //
    return ErrorRateRocRaw( sFsig, sFbkg );
 }

 ////////////////////////////////////////////////////////////////////////////////
 ///
 /// Estimates the error rate with the current set of parameters.
 /// It calculates the area under the bkg rejection vs signal efficiency curve.
 /// The value returned is 1-area.
 ///
 /// See comment under ErrorRateRoc().
 ///

 void TMVA::RuleFitParams::ErrorRateRocTst()
 {
    Log() << kWARNING << "<ErrorRateRocTst> Should not be used in the current version! Check!" << Endl;
    UInt_t neve = fPerfIdx2-fPerfIdx1+1;
    if (neve<1) {
       Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
       return;
    }
    //
    const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
    //
    //   std::vector<Double_t> sF;
    Double_t sF;
    std::vector< std::vector<Double_t> > sFsig;
    std::vector< std::vector<Double_t> > sFbkg;
    //
    sFsig.resize( fGDNTau );
    sFbkg.resize( fGDNTau );
    //   sF.resize( fGDNTau );

    for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
       for (UInt_t itau=0; itau<fGDNTau; itau++) {
          //         if (itau==0) sF = fRuleEnsemble->EvalEvent( *(*events)[i], fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
          //         else         sF = fRuleEnsemble->EvalEvent(                fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
          sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
          if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
             sFsig[itau].push_back(sF);
          }
          else {
             sFbkg[itau].push_back(sF);
          }
       }
    }
    Double_t err;

    for (UInt_t itau=0; itau<fGDNTau; itau++) {
       err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
       fGDErrTst[itau] = err;
    }
 }

 ////////////////////////////////////////////////////////////////////////////////
 ///
 /// Estimates the error rate with the current set of parameters.
 /// using the <Perf> subsample.
 /// Return the tau index giving the lowest error
 ///

 UInt_t TMVA::RuleFitParams::RiskPerfTst()
 {
    UInt_t neve = fPerfIdx2-fPerfIdx1+1;
    if (neve<1) {
       Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
       return 0;
    }
    //
    Double_t sumx    = 0;
    Double_t sumx2   = 0;
    Double_t maxx    = -100.0;
    Double_t minx    = 1e30;
    UInt_t   itaumin = 0;
    UInt_t   nok=0;
    for (UInt_t itau=0; itau<fGDNTau; itau++) {
       if (fGDErrTstOK[itau]) {
          nok++;
          fGDErrTst[itau] = RiskPerf(itau);
          sumx  += fGDErrTst[itau];
          sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
          if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
          if (fGDErrTst[itau]<minx) {
             minx=fGDErrTst[itau];
             itaumin = itau;
          }
       }
    }
    Double_t sigx = TMath::Sqrt(gTools().ComputeVariance( sumx2, sumx, nok ) );
    Double_t maxacc = minx+sigx;
    //
    if (nok>0) {
       nok = 0;
       for (UInt_t itau=0; itau<fGDNTau; itau++) {
          if (fGDErrTstOK[itau]) {
             if (fGDErrTst[itau] > maxacc) {
                fGDErrTstOK[itau] = kFALSE;
             }
             else {
                nok++;
             }
          }
       }
    }
    fGDNTauTstOK = nok;
    Log() << kVERBOSE << "TAU: "
          << itaumin << "   "
          << nok     << "   "
          << minx    << "   "
          << maxx    << "   "
          << sigx    << Endl;
    //
    return itaumin;
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// make test gradient vector for all tau
 /// same algorithm as MakeGradientVector()

 void TMVA::RuleFitParams::MakeTstGradientVector()
 {
    UInt_t neve = fPathIdx1-fPathIdx2+1;
    if (neve<1) {
       Log() << kFATAL << "<MakeTstGradientVector> Invalid start/end indices!" << Endl;
       return;
    }
    //
    Double_t norm   = 2.0/fNEveEffPath;
    //
    const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());

    // Clear gradient vectors
    for (UInt_t itau=0; itau<fGDNTau; itau++) {
       if (fGDErrTstOK[itau]) {
          for (UInt_t ir=0; ir<fNRules; ir++) {
             fGradVecTst[itau][ir]=0;
          }
          for (UInt_t il=0; il<fNLinear; il++) {
             fGradVecLinTst[itau][il]=0;
          }
       }
    }
    //
    //   Double_t val; // temp store
    Double_t sF;   // score function value
    Double_t r;   // eq 35, ref 1
    Double_t y;   // true score (+1 or -1)
    const std::vector<UInt_t> *eventRuleMap=0;
    UInt_t rind;
    //
    // Loop over all events
    //
    UInt_t nsfok=0;
    for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
       const Event *e = (*events)[i];
       UInt_t nrules=0;
       if (fRuleEnsemble->DoRules()) {
          eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
          nrules = (*eventRuleMap).size();
       }
       for (UInt_t itau=0; itau<fGDNTau; itau++) { // loop over tau
          //         if (itau==0) sF = fRuleEnsemble->EvalEvent( *e, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
          //         else         sF = fRuleEnsemble->EvalEvent(     fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
          if (fGDErrTstOK[itau]) {
             sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
             if (TMath::Abs(sF)<1.0) {
                nsfok++;
                r = 0;
                y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
                r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
                // rule gradient vector
                for (UInt_t ir=0; ir<nrules; ir++) {
                   rind = (*eventRuleMap)[ir];
                   fGradVecTst[itau][rind] += r;
                }
                // linear terms
                for (UInt_t il=0; il<fNLinear; il++) {
                   fGradVecLinTst[itau][il] += r*fRuleEnsemble->EvalLinEventRaw( il,i, kTRUE );
                }
             } // if (TMath::Abs(F)<xxx)
          }
       }
    }
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// Establish maximum gradient for rules, linear terms and the offset
 /// for all taus TODO: do not need index range!
 ///

 void TMVA::RuleFitParams::UpdateTstCoefficients()
 {
    Double_t maxr, maxl, cthresh, val;
    // loop over all taus
    for (UInt_t itau=0; itau<fGDNTau; itau++) {
       if (fGDErrTstOK[itau]) {
          // find max gradient
          maxr = ( (fNRules>0 ?
                    TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(), AbsValue()))):0) );
          maxl = ( (fNLinear>0 ?
                    TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(), AbsValue()))):0) );

          // Use the maximum as a threshold
          Double_t maxv = (maxr>maxl ? maxr:maxl);
          cthresh = maxv * fGDTauVec[itau];

          // Add to offset, if gradient is large enough:
          // Loop over the gradient vector and move to next set of coefficients
          // size of GradVec (and GradVecLin) should be 0 if learner is disabled
          //
          // Step-size is divided by 10 when looking for the best path.
          //
          if (maxv>0) {
             const Double_t stepScale=1.0;
             for (UInt_t i=0; i<fNRules; i++) {
                val = fGradVecTst[itau][i];

                if (TMath::Abs(val)>=cthresh) {
                   fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
                }
             }
             // Loop over the gradient vector for the linear part and move to next set of coefficients
             for (UInt_t i=0; i<fNLinear; i++) {
                val = fGradVecLinTst[itau][i];
                if (TMath::Abs(val)>=cthresh) {
                   fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
                }
             }
          }
       }
    }
    // set the offset - should be outside the itau loop!
    CalcTstAverageResponse();
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// make gradient vector
 ///

 void TMVA::RuleFitParams::MakeGradientVector()
 {
    clock_t t0;
    //   clock_t t10;
    t0 = clock();
    //
    UInt_t neve = fPathIdx2-fPathIdx1+1;
    if (neve<1) {
       Log() << kFATAL << "<MakeGradientVector> Invalid start/end indices!" << Endl;
       return;
    }
    //
    const Double_t norm   = 2.0/fNEveEffPath;
    //
    const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());

    // Clear gradient vectors
    for (UInt_t ir=0; ir<fNRules; ir++) {
       fGradVec[ir]=0;
    }
    for (UInt_t il=0; il<fNLinear; il++) {
       fGradVecLin[il]=0;
    }
    //
    //   Double_t val; // temp store
    Double_t sF;   // score function value
    Double_t r;   // eq 35, ref 1
    Double_t y;   // true score (+1 or -1)
    const std::vector<UInt_t> *eventRuleMap=0;
    UInt_t rind;
    //
    gGDInit += Double_t(clock()-t0)/CLOCKS_PER_SEC;

    for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
       const Event *e = (*events)[i];

       //    t0 = clock(); //DEB
       sF = fRuleEnsemble->EvalEvent( i ); // should not contain the weight
       //    gGDEval += Double_t(clock()-t0)/CLOCKS_PER_SEC;
       if (TMath::Abs(sF)<1.0) {
          UInt_t nrules=0;
          if (fRuleEnsemble->DoRules()) {
             eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
             nrules = (*eventRuleMap).size();
          }
          y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
          r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
          // rule gradient vector
          for (UInt_t ir=0; ir<nrules; ir++) {
             rind = (*eventRuleMap)[ir];
             fGradVec[rind] += r;
          }
          //       gGDRuleLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
          // linear terms
          //       t0 = clock(); //DEB
          for (UInt_t il=0; il<fNLinear; il++) {
             fGradVecLin[il] += r*fRuleEnsemble->EvalLinEventRaw( il, i, kTRUE );
          }
          //       gGDLinLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
       } // if (TMath::Abs(F)<xxx)
    }
 }


 ////////////////////////////////////////////////////////////////////////////////
 /// Establish maximum gradient for rules, linear terms and the offset
 ///

 void TMVA::RuleFitParams::UpdateCoefficients()
 {
    Double_t maxr = ( (fRuleEnsemble->DoRules() ?
                       TMath::Abs(*(std::max_element( fGradVec.begin(), fGradVec.end(), AbsValue()))):0) );
    Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
                       TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(), AbsValue()))):0) );
    // Use the maximum as a threshold
    Double_t maxv = (maxr>maxl ? maxr:maxl);
    Double_t cthresh = maxv * fGDTau;

    Double_t useRThresh;
    Double_t useLThresh;
    //
    // Choose threshholds.
    //
    useRThresh = cthresh;
    useLThresh = cthresh;

    Double_t gval, lval, coef, lcoef;

    // Add to offset, if gradient is large enough:
    // Loop over the gradient vector and move to next set of coefficients
    // size of GradVec (and GradVecLin) should be 0 if learner is disabled
    if (maxv>0) {
       for (UInt_t i=0; i<fGradVec.size(); i++) {
          gval = fGradVec[i];
          if (TMath::Abs(gval)>=useRThresh) {
             coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
             fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
          }
       }

       // Loop over the gradient vector for the linear part and move to next set of coefficients
       for (UInt_t i=0; i<fGradVecLin.size(); i++) {
          lval = fGradVecLin[i];
          if (TMath::Abs(lval)>=useLThresh) {
             lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
             fRuleEnsemble->SetLinCoefficient(i,lcoef);
          }
       }
       // Set the offset
       Double_t offset = CalcAverageResponse();
       fRuleEnsemble->SetOffset( offset );
    }
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// calc average response for all test paths - TODO: see comment under CalcAverageResponse()
 /// note that 0 offset is used

 void TMVA::RuleFitParams::CalcTstAverageResponse()
 {
    for (UInt_t itau=0; itau<fGDNTau; itau++) {
       if (fGDErrTstOK[itau]) {
          fGDOfsTst[itau] = 0;
          for (UInt_t s=0; s<fNLinear; s++) {
             fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
          }
          for (UInt_t r=0; r<fNRules; r++) {
             fGDOfsTst[itau] -= fGDCoefTst[itau][r] * fAverageRulePath[r];
          }
       }
    }
    //
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// calulate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
 ///
 /// note that 0 offset is used

 Double_t TMVA::RuleFitParams::CalcAverageResponse()
 {
    Double_t ofs = 0;
    for (UInt_t s=0; s<fNLinear; s++) {
       ofs -= fRuleEnsemble->GetLinCoefficients(s) * fAverageSelectorPath[s];
    }
    for (UInt_t r=0; r<fNRules; r++) {
       ofs -= fRuleEnsemble->GetRules(r)->GetCoefficient() * fAverageRulePath[r];
    }
    return ofs;
 }

 ////////////////////////////////////////////////////////////////////////////////
 /// calulate the average truth

 Double_t TMVA::RuleFitParams::CalcAverageTruth()
 {
    if (fPathIdx2<=fPathIdx1) {
       Log() << kFATAL << "<CalcAverageTruth> Invalid start/end indices!" << Endl;
       return 0;
    }
    Double_t sum=0;
    Double_t ensig=0;
    Double_t enbkg=0;
    const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
    for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
       Double_t ew = fRuleFit->GetTrainingEventWeight(i);
       if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
       else                          enbkg += ew;
       sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
    }
    Log() << kVERBOSE << "Effective number of signal / background = " << ensig << " / " << enbkg << Endl;

    return sum/fNEveEffPath;
 }

 //_______________________________________________________________________

 Int_t  TMVA::RuleFitParams::Type( const Event * e ) const {
    return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1);
 }


 ////////////////////////////////////////////////////////////////////////////////

 void TMVA::RuleFitParams::SetMsgType( EMsgType t ) {
    fLogger->SetMinType(t);
 }
gGDRuleLoop
Double_t gGDRuleLoop
Definition: RuleFitParams.cxx:55

gFIRSTORG
Bool_t gFIRSTORG
Definition: RuleFitParams.cxx:48

TMVA::RuleFitParams::fPerfIdx1
UInt_t fPerfIdx1
Definition: RuleFitParams.h:207

sum
static long int sum(long int i)
Definition: Factory.cxx:1786

TMVA::RuleFitParams::fsigave
Double_t fsigave
Definition: RuleFitParams.h:254

TMVA::kFATAL
Definition: Types.h:67

TMVA::RuleFitParams::fNTCoefRad
Double_t fNTCoefRad
Definition: RuleFitParams.h:249

TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162

Types.h

TMVA::RuleFitParams::fNTLinCoeff
Double_t * fNTLinCoeff
Definition: RuleFitParams.h:252

TMVA::RuleFitParams::fGDCoefLinTst
std::vector< std::vector< Double_t > > fGDCoefLinTst
Definition: RuleFitParams.h:226

TMVA::RuleFitParams::fAverageSelectorPath
std::vector< Double_t > fAverageSelectorPath
Definition: RuleFitParams.h:212

DataSetInfo.h

TMVA::RuleEnsemble::GetLinNorm
const std::vector< Double_t > & GetLinNorm() const
Definition: RuleEnsemble.h:280

TMVA::RuleFit::GetTrainingEvents
const std::vector< const TMVA::Event *> & GetTrainingEvents() const
Definition: RuleFit.h:141

TMVA::AbsValue
Definition: RuleFitParams.h:268

TMVA::RuleEnsemble::EvalLinEventRaw
Double_t EvalLinEventRaw(UInt_t vind, const Event &e, Bool_t norm) const
Definition: RuleEnsemble.h:552

TMVA::RuleFitParams::MakeGradientVector
void MakeGradientVector()
make gradient vector
Definition: RuleFitParams.cxx:1389

TMVA::RuleEnsemble::GetNLinear
UInt_t GetNLinear() const
Definition: RuleEnsemble.h:283

TMVA::RuleFitParams::EvaluateAveragePerf
void EvaluateAveragePerf()
Definition: RuleFitParams.h:188

TTree::Fill
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4375

TMVA::RuleFitParams::fNEveEffPath
Double_t fNEveEffPath
Definition: RuleFitParams.h:209

h
TH1 * h
Definition: legend2.C:5

TMVA::RuleFitParams::fsigrms
Double_t fsigrms
Definition: RuleFitParams.h:255

TMVA::RuleFitParams::FillCoefficients
void FillCoefficients()
helper function to store the rule coefficients in local arrays
Definition: RuleFitParams.cxx:864

TMVA::MsgLogger::GetMinType
EMsgType GetMinType() const
Definition: MsgLogger.h:75

TTree.h

TMVA::RuleEnsemble::GetEventRuleMap
const std::vector< UInt_t > & GetEventRuleMap(UInt_t evtidx) const
Definition: RuleEnsemble.h:311

TMVA::RuleFitParams::RiskPath
Double_t RiskPath() const
Definition: RuleFitParams.h:116

TMVA::RuleEnsemble::GetRulesConst
const std::vector< TMVA::Rule * > & GetRulesConst() const
Definition: RuleEnsemble.h:277

TMVA::RuleFitParams::fAverageTruth
Double_t fAverageTruth
Definition: RuleFitParams.h:240

TMVA::Timer::DrawProgressBar
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:186

TMVA::RuleFitParams::fGradVecTst
std::vector< std::vector< Double_t > > fGradVecTst
Definition: RuleFitParams.h:220

TMVA::RuleFitParams::EvaluateAveragePath
void EvaluateAveragePath()
Definition: RuleFitParams.h:185

TMVA::RuleFitParams::fAverageRulePath
std::vector< Double_t > fAverageRulePath
Definition: RuleFitParams.h:213

TMVA::MethodRuleFit::GetGDValidEveFrac
Double_t GetGDValidEveFrac() const
Definition: MethodRuleFit.h:116

TMVA::RuleFitParams::fGDNtuple
TTree * fGDNtuple
Definition: RuleFitParams.h:245

TMath::Min
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170

Int_t
int Int_t
Definition: RtypesCore.h:41

Bool_t
bool Bool_t
Definition: RtypesCore.h:59

kFALSE
const Bool_t kFALSE
Definition: Rtypes.h:92

TMVA::RuleFitParams::fGDTauVec
std::vector< Double_t > fGDTauVec
Definition: RuleFitParams.h:228

TMVA::RuleFitParams::fFstar
std::vector< Double_t > fFstar
Definition: RuleFitParams.h:242

TMVA::Timer::GetElapsedTime
TString GetElapsedTime(Bool_t Scientific=kTRUE)
Definition: Timer.cxx:129

TMVA::RuleFitParams::fNTNuval
Double_t fNTNuval
Definition: RuleFitParams.h:248

TMVA::RuleFitParams::ErrorRateRocTst
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
Definition: RuleFitParams.cxx:1163

TMath::Abs
Short_t Abs(Short_t d)
Definition: TMathBase.h:110

TMVA::RuleEnsemble::GetLinCoefficients
const std::vector< Double_t > & GetLinCoefficients() const
Definition: RuleEnsemble.h:279

TMVA::RuleEnsemble::GetEventLinearValNorm
Double_t GetEventLinearValNorm(UInt_t i) const
Definition: RuleEnsemble.h:309

TMVA::RuleFitParams::fPerfIdx2
UInt_t fPerfIdx2
Definition: RuleFitParams.h:208

RuleFit.h

TMVA::RuleEnsemble::SetLinCoefficients
void SetLinCoefficients(const std::vector< Double_t > &v)
Definition: RuleEnsemble.h:124

TMVA::RuleFitParams::MakeTstGradientVector
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
Definition: RuleFitParams.cxx:1269

TMVA::RuleFitParams::fGDTau
Double_t fGDTau
Definition: RuleFitParams.h:235

Timer.h

TMVA::gTools
Tools & gTools()
Definition: Tools.cxx:79

TMVA::RuleFitParams::fNTErrorRate
Double_t fNTErrorRate
Definition: RuleFitParams.h:247

timer
TStopwatch timer
Definition: pirndm.C:37

TMVA::RuleFitParams::fPathIdx2
UInt_t fPathIdx2
Definition: RuleFitParams.h:206

TMVA::RuleFitParams::fGDTauMax
Double_t fGDTauMax
Definition: RuleFitParams.h:234

rprev
#define rprev(otri1, otri2)
Definition: triangle.c:1024

TMVA::RuleFitParams::fGDCoefTst
std::vector< std::vector< Double_t > > fGDCoefTst
Definition: RuleFitParams.h:225

TMVA::RuleFitParams::FindGDTau
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
Definition: RuleFitParams.cxx:446

TMVA::kDEBUG
Definition: Types.h:62

TMVA::RuleEnsemble::GetEventRuleVal
Double_t GetEventRuleVal(UInt_t i) const
Definition: RuleEnsemble.h:307

TMVA::RuleEnsemble::GetNRules
UInt_t GetNRules() const
Definition: RuleEnsemble.h:276

TMVA::RuleFitParams::Log
MsgLogger & Log() const
message logger
Definition: RuleFitParams.h:262

TMVA::RuleFitParams::Risk
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk asessment
Definition: RuleFitParams.cxx:311

TMVA::RuleEnsemble::FStar
Double_t FStar() const
We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x) F(x) = FL(x) + FR(x) ...
Definition: RuleEnsemble.cxx:813

TMVA::RuleFitParams::fGDNTauTstOK
UInt_t fGDNTauTstOK
Definition: RuleFitParams.h:229

TMVA::RuleFitParams::CalcAverageTruth
Double_t CalcAverageTruth()
calulate the average truth
Definition: RuleFitParams.cxx:1543

TMVA::MethodBase::DataInfo
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406

TMVA::RuleFitParams::ErrorRateBin
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
Definition: RuleFitParams.cxx:1010

TMVA::MsgLogger::SetMinType
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:76

TMVA::RuleFitParams::fGDNTau
UInt_t fGDNTau
Definition: RuleFitParams.h:230

TMVA::RuleFitParams::fGDNPathSteps
Int_t fGDNPathSteps
Definition: RuleFitParams.h:237

TMVA::RuleFitParams::fLogger
MsgLogger * fLogger
Definition: RuleFitParams.h:261

TMVA::kERROR
Definition: Types.h:66

TMVA::RuleFit::GetRuleEnsemblePtr
RuleEnsemble * GetRuleEnsemblePtr()
Definition: RuleFit.h:149

TMVA::Event::GetWeight
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378

TMVA::RuleFitParams::SetMsgType
void SetMsgType(EMsgType t)
Definition: RuleFitParams.cxx:1573

TMVA::RuleFitParams::fGDErrScale
Double_t fGDErrScale
Definition: RuleFitParams.h:238

TMVA::RuleFitParams::LossFunction
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
Definition: RuleFitParams.cxx:275

TMVA::RuleFitParams::fNTOffset
Double_t fNTOffset
Definition: RuleFitParams.h:250

TTree::Write
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9042

TMVA::RuleFitParams::fNEveEffPerf
Double_t fNEveEffPerf
Definition: RuleFitParams.h:210

TMVA::MethodRuleFit::GetGDPathEveFrac
Double_t GetGDPathEveFrac() const
Definition: MethodRuleFit.h:115

TMVA::RuleFitParams::fGDErrTstOK
std::vector< Char_t > fGDErrTstOK
Definition: RuleFitParams.h:224

r
TRandom2 r(17)

TMVA::RuleEnsemble::DoLinear
Bool_t DoLinear() const
Definition: RuleEnsemble.h:267

TMVA::RuleFitParams::ErrorRateReg
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment...
Definition: RuleFitParams.cxx:965

TMVA::RuleFitParams::ErrorRateRoc
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Definition: RuleFitParams.cxx:1113

MethodRuleFit.h

TMVA::Event
Definition: Event.h:60

TMVA::EMsgType
EMsgType
Definition: Types.h:61

TMVA::RuleEnsemble::CoefficientRadius
Double_t CoefficientRadius()
Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
Definition: RuleEnsemble.cxx:198

TMVA::RuleFitParams::fPathIdx1
UInt_t fPathIdx1
Definition: RuleFitParams.h:205

TMVA::RuleFitParams::fGDOfsTst
std::vector< Double_t > fGDOfsTst
Definition: RuleFitParams.h:227

TMVA::RuleEnsemble::SetOffset
void SetOffset(Double_t v=0.0)
Definition: RuleEnsemble.h:122

TMVA::RuleEnsemble::ClearCoefficients
void ClearCoefficients(Double_t val=0)
Definition: RuleEnsemble.h:133

TMVA::RuleEnsemble::GetRuleMapEvent
const TMVA::Event * GetRuleMapEvent(UInt_t evtidx) const
Definition: RuleEnsemble.h:312

TMVA::RuleFitParams::Optimism
Double_t Optimism()
implementation of eq.
Definition: RuleFitParams.cxx:923

UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:42

Form
char * Form(const char *fmt,...)

TMVA::RuleFitParams::Penalty
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
Definition: RuleFitParams.cxx:353

MsgLogger.h

TMVA::RuleFitParams::fNLinear
UInt_t fNLinear
Definition: RuleFitParams.h:200

TMVA::RuleEnsemble::ClearLinCoefficients
void ClearLinCoefficients(Double_t val=0)
Definition: RuleEnsemble.h:134

TMVA::RuleEnsemble::IsRuleMapOK
Bool_t IsRuleMapOK() const
Definition: RuleEnsemble.h:313

TMVA::RuleEnsemble::SetLinCoefficient
void SetLinCoefficient(UInt_t i, Double_t v)
Definition: RuleEnsemble.h:125

TMVA::RuleFitParams::fbkgave
Double_t fbkgave
Definition: RuleFitParams.h:256

gGDEval
Double_t gGDEval
Definition: RuleFitParams.cxx:53

TMVA::RuleFitParams::fGDTauMin
Double_t fGDTauMin
Definition: RuleFitParams.h:233

TMVA::RuleEnsemble::SetCoefficients
void SetCoefficients(const std::vector< Double_t > &v)
set all rule coefficients
Definition: RuleEnsemble.cxx:227

f
double f(double x)
Definition: testIntegration.cxx:12

TMVA::RuleFitParams::InitNtuple
void InitNtuple()
initializes the ntuple
Definition: RuleFitParams.cxx:182

TMVA::RuleFit::GetTrainingEventWeight
Double_t GetTrainingEventWeight(UInt_t i) const
Definition: RuleFit.h:137

Double_t
double Double_t
Definition: RtypesCore.h:55

TMVA::RuleFitParams::fbkgrms
Double_t fbkgrms
Definition: RuleFitParams.h:257

y
Double_t y[n]
Definition: legend1.C:17

TMVA::RuleFitParams::CalcFStar
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
Definition: RuleFitParams.cxx:882

gGDEvalRule
Double_t gGDEvalRule
Definition: RuleFitParams.cxx:54

TMVA::RuleFitParams::~RuleFitParams
virtual ~RuleFitParams()
destructor
Definition: RuleFitParams.cxx:101

e
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630

TMVA::RuleFitParams::fGradVecLinTst
std::vector< std::vector< Double_t > > fGradVecLinTst
Definition: RuleFitParams.h:221

gGDNorm
Double_t gGDNorm
Definition: RuleFitParams.cxx:52

TMVA::RuleFitParams::UpdateCoefficients
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
Definition: RuleFitParams.cxx:1457

TMVA::MsgLogger
Definition: MsgLogger.h:63

TMVA::RuleFitParams::ErrorRateRocRaw
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
Definition: RuleFitParams.cxx:1041

RuleEnsemble.h

TMVA::RuleFitParams::RuleFitParams
RuleFitParams()
constructor
Definition: RuleFitParams.cxx:61

TMVA::RuleFitParams::fGDPathStep
Double_t fGDPathStep
Definition: RuleFitParams.h:236

TMVA::RuleFitParams::fNTCoeff
Double_t * fNTCoeff
Definition: RuleFitParams.h:251

TTree::Branch
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1652

gGDInit
Double_t gGDInit
Definition: RuleFitParams.cxx:50

TMVA::RuleFitParams::fGradVecLin
std::vector< Double_t > fGradVecLin
Definition: RuleFitParams.h:218

TMVA::RuleEnsemble::GetOffset
Double_t GetOffset() const
Definition: RuleEnsemble.h:275

Tools.h

TMVA::RuleFitParams::fGradVec
std::vector< Double_t > fGradVec
Definition: RuleFitParams.h:217

TMVA::RuleEnsemble::EvalLinEvent
Double_t EvalLinEvent() const
Definition: RuleEnsemble.h:574

TMVA::RuleEnsemble::DoRules
Bool_t DoRules() const
Definition: RuleEnsemble.h:268

TMath::Max
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:202

TMVA::RuleFitParams::Init
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Definition: RuleFitParams.cxx:111

TMath::IsNaN
Int_t IsNaN(Double_t x)
Definition: TMath.h:613

TMVA::RuleFitParams::CalcAverageResponse
Double_t CalcAverageResponse()
calulate the average response - TODO : rewrite bad dependancy on EvaluateAverage() ! ...
Definition: RuleFitParams.cxx:1528

gGDLinLoop
Double_t gGDLinLoop
Definition: RuleFitParams.cxx:56

TMVA::RuleFitParams::fRuleEnsemble
RuleEnsemble * fRuleEnsemble
Definition: RuleFitParams.h:197

TMVA::RuleFitParams::fRuleFit
RuleFit * fRuleFit
Definition: RuleFitParams.h:196

TMVA::RuleEnsemble::GetRules
std::vector< TMVA::Rule * > & GetRules()
Definition: RuleEnsemble.h:278

TMVA::DataSetInfo::IsSignal
Bool_t IsSignal(const Event *ev) const
Definition: DataSetInfo.cxx:176

TMVA::RuleEnsemble::GetCoefficients
void GetCoefficients(std::vector< Double_t > &v)
Retrieve all rule coefficients.
Definition: RuleEnsemble.cxx:242

TMVA::RuleFitParams::InitGD
void InitGD()
Initialize GD path search.
Definition: RuleFitParams.cxx:370

TTree
A TTree object has a header with a name and a title.
Definition: TTree.h:98

TMVA::RuleEnsemble::EvalEvent
Double_t EvalEvent() const
Definition: RuleEnsemble.h:426

gGDPtr
Double_t gGDPtr
Definition: RuleFitParams.cxx:51

TMVA::kVERBOSE
Definition: Types.h:63

TMVA::RuleFitParams::UpdateTstCoefficients
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
Definition: RuleFitParams.cxx:1340

TMVA::RuleFit::GetMethodRuleFit
const MethodRuleFit * GetMethodRuleFit() const
Definition: RuleFit.h:152

TMVA::RuleFitParams::RiskPerfTst
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Definition: RuleFitParams.cxx:1211

TMVA::RuleFitParams::fGDErrTst
std::vector< Double_t > fGDErrTst
Definition: RuleFitParams.h:223

TMath::Sqrt
Double_t Sqrt(Double_t x)
Definition: TMath.h:464

TMVA::RuleFitParams::fNRules
UInt_t fNRules
Definition: RuleFitParams.h:199

TMVA::RuleFitParams::EvaluateAverage
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
Definition: RuleFitParams.cxx:205

TMath.h

TMVA::RuleFitParams::MakeGDPath
void MakeGDPath()
The following finds the gradient directed path in parameter space.
Definition: RuleFitParams.cxx:534

TMVA::kINFO
Definition: Types.h:64

RuleFitParams.h

kTRUE
const Bool_t kTRUE
Definition: Rtypes.h:91

norm
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40

TMVA::RuleFitParams::RiskPerf
Double_t RiskPerf() const
Definition: RuleFitParams.h:117

TMVA::kWARNING
Definition: Types.h:65

TMVA::Timer
Definition: Timer.h:62

TMVA::RuleFitParams::fFstarMedian
Double_t fFstarMedian
Definition: RuleFitParams.h:243

TMVA::RuleFitParams::fGDTauScan
UInt_t fGDTauScan
Definition: RuleFitParams.h:232

gFIRSTTST
Bool_t gFIRSTTST
Definition: RuleFitParams.cxx:47

TMVA::RuleFitParams::Type
Int_t Type(const Event *e) const
Definition: RuleFitParams.cxx:1566

TMVA::RuleFitParams::CalcTstAverageResponse
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...
Definition: RuleFitParams.cxx:1507

TMVA::RuleFitParams::fNTRisk
Double_t fNTRisk
Definition: RuleFitParams.h:246