Logo ROOT   6.12/07
Reference Guide
TMVAClassification_RuleFit.class.C
Go to the documentation of this file.
1 // Class: ReadRuleFit
2 // Automatically generated by MethodBase::MakeClass
3 //
4 
5 /* configuration options =====================================================
6 
7 #GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-
8 
9 Method : RuleFit::RuleFit
10 TMVA Release : 4.2.1 [262657]
11 ROOT Release : 6.12/07 [396295]
12 Creator : sftnight
13 Date : Sat Sep 29 23:25:19 2018
14 Host : Linux ec-ubuntu-14-04-x86-64-2 3.13.0-157-generic #207-Ubuntu SMP Mon Aug 20 16:44:59 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux
15 Dir : /mnt/build/workspace/root-makedoc-v612/rootspi/rdoc/src/v6-12-00-patches/documentation/doxygen
16 Training events: 2000
17 Analysis type : [Classification]
18 
19 
20 #OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-
21 
22 # Set by User:
23 V: "False" [Verbose output (short form of "VerbosityLevel" below - overrides the latter one)]
24 H: "True" [Print method-specific help message]
25 GDTau: "-1.000000e+00" [Gradient-directed (GD) path: default fit cut-off]
26 GDTauPrec: "1.000000e-02" [GD path: precision of tau]
27 GDStep: "1.000000e-02" [GD path: step size]
28 GDNSteps: "10000" [GD path: number of steps]
29 GDErrScale: "1.020000e+00" [Stop scan when error > scale*errmin]
30 fEventsMin: "1.000000e-02" [Minimum fraction of events in a splittable node]
31 fEventsMax: "5.000000e-01" [Maximum fraction of events in a splittable node]
32 nTrees: "20" [Number of trees in forest.]
33 RuleMinDist: "1.000000e-03" [Minimum distance between rules]
34 MinImp: "1.000000e-03" [Minimum rule importance accepted]
35 Model: "modrulelinear" [Model to be used]
36 RuleFitModule: "rftmva" [Which RuleFit module to use]
37 # Default:
38 VerbosityLevel: "Default" [Verbosity level]
39 VarTransform: "None" [List of variable transformations performed before training, e.g., "D_Background,P_Signal,G,N_AllClasses" for: "Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)"]
40 CreateMVAPdfs: "False" [Create PDFs for classifier outputs (signal and background)]
41 IgnoreNegWeightsInTraining: "False" [Events with negative weights are ignored in the training (but are included for testing and performance evaluation)]
42 LinQuantile: "2.500000e-02" [Quantile of linear terms (removes outliers)]
43 GDPathEveFrac: "5.000000e-01" [Fraction of events used for the path search]
44 GDValidEveFrac: "5.000000e-01" [Fraction of events used for the validation]
45 ForestType: "adaboost" [Method to use for forest generation (AdaBoost or RandomForest)]
46 RFWorkDir: "./rulefit" [Friedman's RuleFit module (RFF): working dir]
47 RFNrules: "2000" [RFF: Mximum number of rules]
48 RFNendnodes: "4" [RFF: Average number of end nodes]
49 ##
50 
51 
52 #VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-
53 
54 NVar 4
55 var1+var2 myvar1 myvar1 myvar1 'F' [-8.14423561096,7.26972866058]
56 var1-var2 myvar2 myvar2 Expression 2 'F' [-3.96643972397,4.0258936882]
57 var3 var3 var3 Variable 3 units 'F' [-5.03730010986,4.27845287323]
58 var4 var4 var4 Variable 4 units 'F' [-5.95050764084,4.64035463333]
59 NSpec 2
60 var1*2 spec1 spec1 Spectator 1 units 'F' [-9.91655540466,8.67800140381]
61 var1*3 spec2 spec2 Spectator 2 units 'F' [-14.874833107,13.0170021057]
62 
63 
64 ============================================================================ */
65 
66 #include <array>
67 #include <vector>
68 #include <cmath>
69 #include <string>
70 #include <iostream>
71 
72 #ifndef IClassifierReader__def
73 #define IClassifierReader__def
74 
75 class IClassifierReader {
76 
77  public:
78 
79  // constructor
80  IClassifierReader() : fStatusIsClean( true ) {}
81  virtual ~IClassifierReader() {}
82 
83  // return classifier response
84  virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;
85 
86  // returns classifier status
87  bool IsStatusClean() const { return fStatusIsClean; }
88 
89  protected:
90 
91  bool fStatusIsClean;
92 };
93 
94 #endif
95 
96 class ReadRuleFit : public IClassifierReader {
97 
98  public:
99 
100  // constructor
101  ReadRuleFit( std::vector<std::string>& theInputVars )
102  : IClassifierReader(),
103  fClassName( "ReadRuleFit" ),
104  fNvars( 4 ),
105  fIsNormalised( false )
106  {
107  // the training input variables
108  const char* inputVars[] = { "var1+var2", "var1-var2", "var3", "var4" };
109 
110  // sanity checks
111  if (theInputVars.size() <= 0) {
112  std::cout << "Problem in class \"" << fClassName << "\": empty input vector" << std::endl;
113  fStatusIsClean = false;
114  }
115 
116  if (theInputVars.size() != fNvars) {
117  std::cout << "Problem in class \"" << fClassName << "\": mismatch in number of input values: "
118  << theInputVars.size() << " != " << fNvars << std::endl;
119  fStatusIsClean = false;
120  }
121 
122  // validate input variables
123  for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {
124  if (theInputVars[ivar] != inputVars[ivar]) {
125  std::cout << "Problem in class \"" << fClassName << "\": mismatch in input variable names" << std::endl
126  << " for variable [" << ivar << "]: " << theInputVars[ivar].c_str() << " != " << inputVars[ivar] << std::endl;
127  fStatusIsClean = false;
128  }
129  }
130 
131  // initialize min and max vectors (for normalisation)
132  fVmin[0] = 0;
133  fVmax[0] = 0;
134  fVmin[1] = 0;
135  fVmax[1] = 0;
136  fVmin[2] = 0;
137  fVmax[2] = 0;
138  fVmin[3] = 0;
139  fVmax[3] = 0;
140 
141  // initialize input variable types
142  fType[0] = 'F';
143  fType[1] = 'F';
144  fType[2] = 'F';
145  fType[3] = 'F';
146 
147  // initialize constants
148  Initialize();
149 
150  }
151 
152  // destructor
153  virtual ~ReadRuleFit() {
154  Clear(); // method-specific
155  }
156 
157  // the classifier response
158  // "inputValues" is a vector of input values in the same order as the
159  // variables given to the constructor
160  double GetMvaValue( const std::vector<double>& inputValues ) const;
161 
162  private:
163 
164  // method-specific destructor
165  void Clear();
166 
167  // common member variables
168  const char* fClassName;
169 
170  const size_t fNvars;
171  size_t GetNvar() const { return fNvars; }
172  char GetType( int ivar ) const { return fType[ivar]; }
173 
174  // normalisation of input variables
175  const bool fIsNormalised;
176  bool IsNormalised() const { return fIsNormalised; }
177  double fVmin[4];
178  double fVmax[4];
179  double NormVariable( double x, double xmin, double xmax ) const {
180  // normalise to output range: [-1, 1]
181  return 2*(x - xmin)/(xmax - xmin) - 1.0;
182  }
183 
184  // type of input variable: 'F' or 'I'
185  char fType[4];
186 
187  // initialize internal variables
188  void Initialize();
189  double GetMvaValue__( const std::vector<double>& inputValues ) const;
190 
191  // private members (method specific)
192  // not implemented for class: "ReadRuleFit"
193 };
195 void ReadRuleFit::Clear(){}
196 double ReadRuleFit::GetMvaValue__( const std::vector<double>& inputValues ) const {
197  double rval=1.821665671;
198  //
199  // here follows all rules ordered in importance (most important first)
200  // at the end of each line, the relative importance of the rule is given
201  //
202  if ((inputValues[2]<0.2859873176)&&(inputValues[3]<-0.6145658493)) rval+=-0.5383069125; // importance = 0.523
203  if ((0.6637439728<inputValues[0])&&(inputValues[3]<1.852592468)) rval+=-0.365472615; // importance = 0.354
204  if ((-0.8042526245<inputValues[0])) rval+=-0.3487120504; // importance = 0.352
205  if ((inputValues[2]<0.2859873176)&&(-0.6145658493<inputValues[3])) rval+=0.3265835596; // importance = 0.325
206  if ((inputValues[0]<1.397742271)&&(-0.7250279784<inputValues[1])&&(inputValues[3]<-0.6721984744)) rval+=-0.3591921639; // importance = 0.310
207  if ((inputValues[0]<2.865738869)&&(inputValues[3]<1.206904054)) rval+=-0.3235634573; // importance = 0.283
208  if ((inputValues[3]<1.110067248)) rval+=-0.3014417789; // importance = 0.274
209  if ((inputValues[0]<1.361399651)&&(inputValues[1]<0.2200206369)&&(inputValues[3]<-0.411757946)) rval+=-0.2635351483; // importance = 0.227
210  if ((0.2200206369<inputValues[1])) rval+=-0.199620194; // importance = 0.213
211  if ((-0.07025432587<inputValues[0])&&(0.3575037718<inputValues[1])) rval+=-0.2338918124; // importance = 0.199
212  if ((inputValues[2]<-1.044834495)) rval+=-0.2463489969; // importance = 0.195
213  if ((-1.538250923<inputValues[0])&&(inputValues[0]<1.266595125)&&(inputValues[3]<0.3321121633)) rval+=-0.1879163686; // importance = 0.195
214  if ((inputValues[1]<0.07876376063)&&(inputValues[3]<0.1014136598)) rval+=-0.2062533705; // importance = 0.194
215  if ((-0.6012272835<inputValues[2])&&(0.9022580981<inputValues[3])) rval+=0.1703085413; // importance = 0.165
216  if ((inputValues[1]<0.2200206369)) rval+=-0.1519403671; // importance = 0.162
217  if ((-0.4926698804<inputValues[2])&&(inputValues[3]<0.3393571973)) rval+=-0.1698124189; // importance = 0.160
218  if ((inputValues[0]<1.361399651)&&(inputValues[1]<0.2200206369)) rval+=0.1412135028; // importance = 0.151
219  if ((-0.737119019<inputValues[1])&&(inputValues[1]<0.02489273809)&&(inputValues[3]<0.605740428)) rval+=-0.1831062971; // importance = 0.149
220  if ((inputValues[0]<2.487332582)&&(-0.4926698804<inputValues[2])&&(inputValues[3]<1.069083929)) rval+=-0.1373525789; // importance = 0.147
221  if ((0.2200206369<inputValues[1])&&(inputValues[3]<0.7960036993)) rval+=-0.1430605103; // importance = 0.141
222  if ((0.2200206369<inputValues[1])&&(-1.044834495<inputValues[2])&&(inputValues[3]<0.196363762)) rval+=-0.1744597681; // importance = 0.133
223  if ((0.07876376063<inputValues[1])&&(0.2859873176<inputValues[2])) rval+=-0.1581814857; // importance = 0.131
224  if ((-1.856152892<inputValues[0])&&(inputValues[0]<0.6637439728)&&(0.2456704378<inputValues[3])) rval+=0.1660814983; // importance = 0.123
225  if ((inputValues[0]<-0.3903895915)&&(-0.1605666727<inputValues[1])&&(inputValues[2]<1.173201919)) rval+=0.1320495424; // importance = 0.118
226  if ((-0.876881063<inputValues[0])&&(inputValues[1]<-0.1346641928)&&(0.605740428<inputValues[3])) rval+=0.1373748234; // importance = 0.110
227  if ((inputValues[0]<2.865738869)&&(1.206904054<inputValues[3])) rval+=0.1301838684; // importance = 0.102
228  if ((inputValues[0]<2.865738869)) rval+=-0.1921057245; // importance = 0.092
229  if ((inputValues[0]<0.6637439728)&&(-0.237038821<inputValues[1])&&(0.2456704378<inputValues[3])) rval+=0.1434630503; // importance = 0.087
230  if ((-0.07025432587<inputValues[0])&&(inputValues[1]<0.3575037718)) rval+=-0.08493248301; // importance = 0.086
231  if ((-1.044834495<inputValues[2])) rval+=-0.1051437122; // importance = 0.083
232  if ((0.02489273809<inputValues[1])&&(inputValues[2]<0.7852135301)&&(inputValues[3]<0.605740428)) rval+=-0.08363995833; // importance = 0.082
233  if ((-0.3790929615<inputValues[0])&&(inputValues[1]<0.2200206369)&&(-1.044834495<inputValues[2])&&(inputValues[3]<1.066591382)) rval+=-0.09249249872; // importance = 0.079
234  if ((inputValues[0]<-0.3790929615)&&(inputValues[1]<0.2200206369)&&(-1.044834495<inputValues[2])) rval+=0.09272184004; // importance = 0.071
235  if ((0.2200206369<inputValues[1])&&(inputValues[1]<0.774100244)&&(-1.044834495<inputValues[2])&&(inputValues[3]<0.196363762)) rval+=-0.1208843459; // importance = 0.069
236  if ((-0.3790929615<inputValues[0])&&(inputValues[1]<0.2200206369)&&(-1.044834495<inputValues[2])&&(inputValues[2]<1.375104785)&&(inputValues[3]<1.066591382)) rval+=-0.07764621172; // importance = 0.066
237  if ((-0.3903895915<inputValues[0])&&(-0.1605666727<inputValues[1])&&(inputValues[2]<1.173201919)) rval+=-0.0690361005; // importance = 0.066
238  if ((0.6637439728<inputValues[0])&&(1.386405587<inputValues[3])) rval+=-0.08141573768; // importance = 0.065
239  if ((-0.3790929615<inputValues[0])&&(inputValues[1]<0.2200206369)&&(-1.044834495<inputValues[2])) rval+=-0.06193442764; // importance = 0.063
240  if ((inputValues[0]<1.397742271)&&(-0.7250279784<inputValues[1])) rval+=0.05855968621; // importance = 0.062
241  if ((-0.4885016978<inputValues[0])&&(-1.044834495<inputValues[2])&&(1.261463404<inputValues[3])) rval+=-0.07066745959; // importance = 0.061
242  if ((-0.5954368114<inputValues[1])&&(inputValues[2]<-0.6012272835)) rval+=-0.06641189659; // importance = 0.058
243  if ((-0.876881063<inputValues[0])&&(-0.1346641928<inputValues[1])&&(0.605740428<inputValues[3])) rval+=-0.0589214863; // importance = 0.051
244  if ((-0.5954368114<inputValues[1])&&(inputValues[1]<-0.06197149307)&&(inputValues[2]<0.2926391363)) rval+=-0.05675071978; // importance = 0.039
245  if ((0.2200206369<inputValues[1])&&(0.7960036993<inputValues[3])) rval+=-0.05407344724; // importance = 0.039
246  if ((inputValues[1]<0.2200206369)&&(-1.044834495<inputValues[2])) rval+=0.03043669572; // importance = 0.033
247  if ((0.2200206369<inputValues[1])&&(-1.044834495<inputValues[2])&&(0.196363762<inputValues[3])) rval+=0.03522315626; // importance = 0.031
248  if ((-0.3790929615<inputValues[0])&&(inputValues[1]<0.2200206369)&&(-1.044834495<inputValues[2])&&(1.066591382<inputValues[3])) rval+=0.03174133575; // importance = 0.024
249  if ((-1.810295105<inputValues[2])&&(inputValues[2]<-1.044834495)) rval+=-0.02586694411; // importance = 0.017
250  if ((inputValues[0]<0.6637439728)&&(inputValues[1]<-0.237038821)&&(0.2456704378<inputValues[3])) rval+=0.02297392817; // importance = 0.011
251  if ((-1.411566615<inputValues[3])) rval+=-0.01612698015; // importance = 0.011
252  if ((inputValues[1]<-0.1605666727)&&(inputValues[2]<1.173201919)) rval+=-0.008633417105; // importance = 0.009
253  if ((-0.3790929615<inputValues[0])&&(inputValues[1]<0.2200206369)&&(-1.044834495<inputValues[2])&&(inputValues[2]<0.340533644)&&(1.066591382<inputValues[3])) rval+=0.02724536541; // importance = 0.005
254  //
255  // here follows all linear terms
256  // at the end of each line, the relative importance of the term is given
257  //
258  rval+=-0.230579859*std::min( double(3.424694538), std::max( double(inputValues[0]), double(-3.440958977))); // importance = 0.902
259  rval+=-0.03918419635*std::min( double(2.07433629), std::max( double(inputValues[1]), double(-2.206938982))); // importance = 0.096
260  rval+=0.007237378388*std::min( double(2.088175535), std::max( double(inputValues[2]), double(-2.172396898))); // importance = 0.017
261  rval+=0.3519137031*std::min( double(2.604138613), std::max( double(inputValues[3]), double(-2.361749649))); // importance = 1.000
262  return rval;
263 }
264  inline double ReadRuleFit::GetMvaValue( const std::vector<double>& inputValues ) const
265  {
266  // classifier response value
267  double retval = 0;
268 
269  // classifier response, sanity check first
270  if (!IsStatusClean()) {
271  std::cout << "Problem in class \"" << fClassName << "\": cannot return classifier response"
272  << " because status is dirty" << std::endl;
273  retval = 0;
274  }
275  else {
276  if (IsNormalised()) {
277  // normalise variables
278  std::vector<double> iV;
279  iV.reserve(inputValues.size());
280  int ivar = 0;
281  for (std::vector<double>::const_iterator varIt = inputValues.begin();
282  varIt != inputValues.end(); varIt++, ivar++) {
283  iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));
284  }
285  retval = GetMvaValue__( iV );
286  }
287  else {
288  retval = GetMvaValue__( inputValues );
289  }
290  }
291 
292  return retval;
293  }
float xmin
Definition: THbookFile.cxx:93
Type GetType(const std::string &Name)
Definition: Systematics.cxx:34
Double_t x[n]
Definition: legend1.C:17
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
float xmax
Definition: THbookFile.cxx:93
PyObject * fType