Logo ROOT   6.10/09
Reference Guide
TMVAClassification_RuleFit.class.C
Go to the documentation of this file.
1 // Class: ReadRuleFit
2 // Automatically generated by MethodBase::MakeClass
3 //
4 
5 /* configuration options =====================================================
6 
7 #GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-
8 
9 Method : RuleFit::RuleFit
10 TMVA Release : 4.2.1 [262657]
11 ROOT Release : 6.10/09 [395785]
12 Creator : sftnight
13 Date : Thu May 31 12:04:30 2018
14 Host : Linux SFT-ubuntu-1710-1 4.13.0-31-generic #34-Ubuntu SMP Fri Jan 19 16:34:46 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux
15 Dir : /mnt/build/workspace/root-makedoc-v610/rootspi/rdoc/src/v6-10-00-patches/documentation/doxygen
16 Training events: 2000
17 Analysis type : [Classification]
18 
19 
20 #OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-
21 
22 # Set by User:
23 V: "False" [Verbose output (short form of "VerbosityLevel" below - overrides the latter one)]
24 H: "True" [Print method-specific help message]
25 GDTau: "-1.000000e+00" [Gradient-directed (GD) path: default fit cut-off]
26 GDTauPrec: "1.000000e-02" [GD path: precision of tau]
27 GDStep: "1.000000e-02" [GD path: step size]
28 GDNSteps: "10000" [GD path: number of steps]
29 GDErrScale: "1.020000e+00" [Stop scan when error > scale*errmin]
30 fEventsMin: "1.000000e-02" [Minimum fraction of events in a splittable node]
31 fEventsMax: "5.000000e-01" [Maximum fraction of events in a splittable node]
32 nTrees: "20" [Number of trees in forest.]
33 RuleMinDist: "1.000000e-03" [Minimum distance between rules]
34 MinImp: "1.000000e-03" [Minimum rule importance accepted]
35 Model: "modrulelinear" [Model to be used]
36 RuleFitModule: "rftmva" [Which RuleFit module to use]
37 # Default:
38 VerbosityLevel: "Default" [Verbosity level]
39 VarTransform: "None" [List of variable transformations performed before training, e.g., "D_Background,P_Signal,G,N_AllClasses" for: "Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)"]
40 CreateMVAPdfs: "False" [Create PDFs for classifier outputs (signal and background)]
41 IgnoreNegWeightsInTraining: "False" [Events with negative weights are ignored in the training (but are included for testing and performance evaluation)]
42 LinQuantile: "2.500000e-02" [Quantile of linear terms (removes outliers)]
43 GDPathEveFrac: "5.000000e-01" [Fraction of events used for the path search]
44 GDValidEveFrac: "5.000000e-01" [Fraction of events used for the validation]
45 ForestType: "adaboost" [Method to use for forest generation (AdaBoost or RandomForest)]
46 RFWorkDir: "./rulefit" [Friedman's RuleFit module (RFF): working dir]
47 RFNrules: "2000" [RFF: Mximum number of rules]
48 RFNendnodes: "4" [RFF: Average number of end nodes]
49 ##
50 
51 
52 #VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-
53 
54 NVar 4
55 var1+var2 myvar1 myvar1 myvar1 'F' [-8.14423561096,7.26972866058]
56 var1-var2 myvar2 myvar2 Expression 2 'F' [-3.96643972397,4.0258936882]
57 var3 var3 var3 Variable 3 units 'F' [-5.03730010986,4.27845287323]
58 var4 var4 var4 Variable 4 units 'F' [-5.95050764084,4.64035463333]
59 NSpec 2
60 var1*2 spec1 spec1 Spectator 1 units 'F' [-9.91655540466,8.67800140381]
61 var1*3 spec2 spec2 Spectator 2 units 'F' [-14.874833107,13.0170021057]
62 
63 
64 ============================================================================ */
65 
66 #include <vector>
67 #include <cmath>
68 #include <string>
69 #include <iostream>
70 
71 #ifndef IClassifierReader__def
72 #define IClassifierReader__def
73 
74 class IClassifierReader {
75 
76  public:
77 
78  // constructor
79  IClassifierReader() : fStatusIsClean( true ) {}
80  virtual ~IClassifierReader() {}
81 
82  // return classifier response
83  virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;
84 
85  // returns classifier status
86  bool IsStatusClean() const { return fStatusIsClean; }
87 
88  protected:
89 
90  bool fStatusIsClean;
91 };
92 
93 #endif
94 
95 class ReadRuleFit : public IClassifierReader {
96 
97  public:
98 
99  // constructor
100  ReadRuleFit( std::vector<std::string>& theInputVars )
101  : IClassifierReader(),
102  fClassName( "ReadRuleFit" ),
103  fNvars( 4 ),
104  fIsNormalised( false )
105  {
106  // the training input variables
107  const char* inputVars[] = { "var1+var2", "var1-var2", "var3", "var4" };
108 
109  // sanity checks
110  if (theInputVars.size() <= 0) {
111  std::cout << "Problem in class \"" << fClassName << "\": empty input vector" << std::endl;
112  fStatusIsClean = false;
113  }
114 
115  if (theInputVars.size() != fNvars) {
116  std::cout << "Problem in class \"" << fClassName << "\": mismatch in number of input values: "
117  << theInputVars.size() << " != " << fNvars << std::endl;
118  fStatusIsClean = false;
119  }
120 
121  // validate input variables
122  for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {
123  if (theInputVars[ivar] != inputVars[ivar]) {
124  std::cout << "Problem in class \"" << fClassName << "\": mismatch in input variable names" << std::endl
125  << " for variable [" << ivar << "]: " << theInputVars[ivar].c_str() << " != " << inputVars[ivar] << std::endl;
126  fStatusIsClean = false;
127  }
128  }
129 
130  // initialize min and max vectors (for normalisation)
131  fVmin[0] = 0;
132  fVmax[0] = 0;
133  fVmin[1] = 0;
134  fVmax[1] = 0;
135  fVmin[2] = 0;
136  fVmax[2] = 0;
137  fVmin[3] = 0;
138  fVmax[3] = 0;
139 
140  // initialize input variable types
141  fType[0] = 'F';
142  fType[1] = 'F';
143  fType[2] = 'F';
144  fType[3] = 'F';
145 
146  // initialize constants
147  Initialize();
148 
149  }
150 
151  // destructor
152  virtual ~ReadRuleFit() {
153  Clear(); // method-specific
154  }
155 
156  // the classifier response
157  // "inputValues" is a vector of input values in the same order as the
158  // variables given to the constructor
159  double GetMvaValue( const std::vector<double>& inputValues ) const;
160 
161  private:
162 
163  // method-specific destructor
164  void Clear();
165 
166  // common member variables
167  const char* fClassName;
168 
169  const size_t fNvars;
170  size_t GetNvar() const { return fNvars; }
171  char GetType( int ivar ) const { return fType[ivar]; }
172 
173  // normalisation of input variables
174  const bool fIsNormalised;
175  bool IsNormalised() const { return fIsNormalised; }
176  double fVmin[4];
177  double fVmax[4];
178  double NormVariable( double x, double xmin, double xmax ) const {
179  // normalise to output range: [-1, 1]
180  return 2*(x - xmin)/(xmax - xmin) - 1.0;
181  }
182 
183  // type of input variable: 'F' or 'I'
184  char fType[4];
185 
186  // initialize internal variables
187  void Initialize();
188  double GetMvaValue__( const std::vector<double>& inputValues ) const;
189 
190  // private members (method specific)
191  // not implemented for class: "ReadRuleFit"
192 };
194 void ReadRuleFit::Clear(){}
195 double ReadRuleFit::GetMvaValue__( const std::vector<double>& inputValues ) const {
196  double rval=1.818436747;
197  //
198  // here follows all rules ordered in importance (most important first)
199  // at the end of each line, the relative importance of the rule is given
200  //
201  if ((inputValues[2]<0.2859873176)&&(inputValues[3]<-0.6145658493)) rval+=-0.5379093668; // importance = 0.522
202  if ((0.6637439728<inputValues[0])&&(inputValues[3]<1.852592468)) rval+=-0.3660666206; // importance = 0.354
203  if ((-0.8042526245<inputValues[0])) rval+=-0.3482884165; // importance = 0.352
204  if ((inputValues[2]<0.2859873176)&&(-0.6145658493<inputValues[3])) rval+=0.3266429992; // importance = 0.325
205  if ((inputValues[0]<1.397742271)&&(-0.7250279784<inputValues[1])&&(inputValues[3]<-0.6721984744)) rval+=-0.358918334; // importance = 0.310
206  if ((inputValues[0]<2.865738869)&&(inputValues[3]<1.206904054)) rval+=-0.3232525673; // importance = 0.282
207  if ((inputValues[3]<1.110067248)) rval+=-0.301074141; // importance = 0.274
208  if ((inputValues[0]<1.361399651)&&(inputValues[1]<0.2200206369)&&(inputValues[3]<-0.411757946)) rval+=-0.2633455358; // importance = 0.226
209  if ((0.2200206369<inputValues[1])) rval+=-0.1989356993; // importance = 0.212
210  if ((-0.07025432587<inputValues[0])&&(0.3575037718<inputValues[1])) rval+=-0.2333956588; // importance = 0.198
211  if ((-1.538250923<inputValues[0])&&(inputValues[0]<1.266595125)&&(inputValues[3]<0.3321121633)) rval+=-0.1880227743; // importance = 0.195
212  if ((inputValues[2]<-1.044834495)) rval+=-0.2458634586; // importance = 0.195
213  if ((inputValues[1]<0.07876376063)&&(inputValues[3]<0.1014136598)) rval+=-0.2060729782; // importance = 0.193
214  if ((-0.6012272835<inputValues[2])&&(0.9022580981<inputValues[3])) rval+=0.1708052558; // importance = 0.166
215  if ((inputValues[1]<0.2200206369)) rval+=-0.1516181657; // importance = 0.162
216  if ((-0.4926698804<inputValues[2])&&(inputValues[3]<0.3393571973)) rval+=-0.1697182182; // importance = 0.160
217  if ((inputValues[0]<1.361399651)&&(inputValues[1]<0.2200206369)) rval+=0.1415105291; // importance = 0.151
218  if ((-0.737119019<inputValues[1])&&(inputValues[1]<0.02489273809)&&(inputValues[3]<0.605740428)) rval+=-0.1829364476; // importance = 0.149
219  if ((inputValues[0]<2.487332582)&&(-0.4926698804<inputValues[2])&&(inputValues[3]<1.069083929)) rval+=-0.1373253511; // importance = 0.147
220  if ((0.2200206369<inputValues[1])&&(inputValues[3]<0.7960036993)) rval+=-0.1428094066; // importance = 0.141
221  if ((0.2200206369<inputValues[1])&&(-1.044834495<inputValues[2])&&(inputValues[3]<0.196363762)) rval+=-0.1744087832; // importance = 0.133
222  if ((0.07876376063<inputValues[1])&&(0.2859873176<inputValues[2])) rval+=-0.1577677638; // importance = 0.130
223  if ((-1.856152892<inputValues[0])&&(inputValues[0]<0.6637439728)&&(0.2456704378<inputValues[3])) rval+=0.1659540232; // importance = 0.123
224  if ((inputValues[0]<-0.3903895915)&&(-0.1605666727<inputValues[1])&&(inputValues[2]<1.173201919)) rval+=0.1323516278; // importance = 0.118
225  if ((-0.876881063<inputValues[0])&&(inputValues[1]<-0.1346641928)&&(0.605740428<inputValues[3])) rval+=0.1374161829; // importance = 0.110
226  if ((inputValues[0]<2.865738869)&&(1.206904054<inputValues[3])) rval+=0.1293777376; // importance = 0.101
227  if ((inputValues[0]<2.865738869)) rval+=-0.1926274403; // importance = 0.092
228  if ((inputValues[0]<0.6637439728)&&(-0.237038821<inputValues[1])&&(0.2456704378<inputValues[3])) rval+=0.1433609086; // importance = 0.087
229  if ((-0.07025432587<inputValues[0])&&(inputValues[1]<0.3575037718)) rval+=-0.08498720074; // importance = 0.086
230  if ((-1.044834495<inputValues[2])) rval+=-0.1045792093; // importance = 0.083
231  if ((0.02489273809<inputValues[1])&&(inputValues[2]<0.7852135301)&&(inputValues[3]<0.605740428)) rval+=-0.08330828564; // importance = 0.082
232  if ((-0.3790929615<inputValues[0])&&(inputValues[1]<0.2200206369)&&(-1.044834495<inputValues[2])&&(inputValues[3]<1.066591382)) rval+=-0.09245421628; // importance = 0.079
233  if ((inputValues[0]<-0.3790929615)&&(inputValues[1]<0.2200206369)&&(-1.044834495<inputValues[2])) rval+=0.09276894678; // importance = 0.071
234  if ((0.2200206369<inputValues[1])&&(inputValues[1]<0.774100244)&&(-1.044834495<inputValues[2])&&(inputValues[3]<0.196363762)) rval+=-0.1209340777; // importance = 0.069
235  if ((-0.3903895915<inputValues[0])&&(-0.1605666727<inputValues[1])&&(inputValues[2]<1.173201919)) rval+=-0.06948582893; // importance = 0.066
236  if ((-0.3790929615<inputValues[0])&&(inputValues[1]<0.2200206369)&&(-1.044834495<inputValues[2])&&(inputValues[2]<1.375104785)&&(inputValues[3]<1.066591382)) rval+=-0.07751604737; // importance = 0.066
237  if ((0.6637439728<inputValues[0])&&(1.386405587<inputValues[3])) rval+=-0.08062844867; // importance = 0.064
238  if ((-0.3790929615<inputValues[0])&&(inputValues[1]<0.2200206369)&&(-1.044834495<inputValues[2])) rval+=-0.0618347695; // importance = 0.063
239  if ((inputValues[0]<1.397742271)&&(-0.7250279784<inputValues[1])) rval+=0.05880617942; // importance = 0.063
240  if ((-0.4885016978<inputValues[0])&&(-1.044834495<inputValues[2])&&(1.261463404<inputValues[3])) rval+=-0.06993446415; // importance = 0.060
241  if ((-0.5954368114<inputValues[1])&&(inputValues[2]<-0.6012272835)) rval+=-0.06613817409; // importance = 0.057
242  if ((-0.876881063<inputValues[0])&&(-0.1346641928<inputValues[1])&&(0.605740428<inputValues[3])) rval+=-0.0585316703; // importance = 0.050
243  if ((-0.5954368114<inputValues[1])&&(inputValues[1]<-0.06197149307)&&(inputValues[2]<0.2926391363)) rval+=-0.05672564152; // importance = 0.039
244  if ((0.2200206369<inputValues[1])&&(0.7960036993<inputValues[3])) rval+=-0.05359126258; // importance = 0.039
245  if ((inputValues[1]<0.2200206369)&&(-1.044834495<inputValues[2])) rval+=0.03045857436; // importance = 0.033
246  if ((0.2200206369<inputValues[1])&&(-1.044834495<inputValues[2])&&(0.196363762<inputValues[3])) rval+=0.03558506898; // importance = 0.031
247  if ((-0.3790929615<inputValues[0])&&(inputValues[1]<0.2200206369)&&(-1.044834495<inputValues[2])&&(1.066591382<inputValues[3])) rval+=0.03176435311; // importance = 0.024
248  if ((-1.810295105<inputValues[2])&&(inputValues[2]<-1.044834495)) rval+=-0.02569197519; // importance = 0.017
249  if ((inputValues[0]<0.6637439728)&&(inputValues[1]<-0.237038821)&&(0.2456704378<inputValues[3])) rval+=0.0229313273; // importance = 0.011
250  if ((-1.411566615<inputValues[3])) rval+=-0.0154040687; // importance = 0.010
251  if ((inputValues[1]<-0.1605666727)&&(inputValues[2]<1.173201919)) rval+=-0.008607035181; // importance = 0.009
252  if ((-0.3790929615<inputValues[0])&&(inputValues[1]<0.2200206369)&&(-1.044834495<inputValues[2])&&(inputValues[2]<0.340533644)&&(1.066591382<inputValues[3])) rval+=0.02734064113; // importance = 0.005
253  //
254  // here follows all linear terms
255  // at the end of each line, the relative importance of the term is given
256  //
257  rval+=-0.2303439238*std::min( double(3.434663296), std::max( double(inputValues[0]), double(-3.440958977))); // importance = 0.901
258  rval+=-0.03967082836*std::min( double(2.07433629), std::max( double(inputValues[1]), double(-2.206938982))); // importance = 0.097
259  rval+=0.00702630339*std::min( double(2.088175535), std::max( double(inputValues[2]), double(-2.172396898))); // importance = 0.017
260  rval+=0.3518686371*std::min( double(2.604138613), std::max( double(inputValues[3]), double(-2.361749649))); // importance = 1.000
261  return rval;
262 }
263  inline double ReadRuleFit::GetMvaValue( const std::vector<double>& inputValues ) const
264  {
265  // classifier response value
266  double retval = 0;
267 
268  // classifier response, sanity check first
269  if (!IsStatusClean()) {
270  std::cout << "Problem in class \"" << fClassName << "\": cannot return classifier response"
271  << " because status is dirty" << std::endl;
272  retval = 0;
273  }
274  else {
275  if (IsNormalised()) {
276  // normalise variables
277  std::vector<double> iV;
278  iV.reserve(inputValues.size());
279  int ivar = 0;
280  for (std::vector<double>::const_iterator varIt = inputValues.begin();
281  varIt != inputValues.end(); varIt++, ivar++) {
282  iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));
283  }
284  retval = GetMvaValue__( iV );
285  }
286  else {
287  retval = GetMvaValue__( inputValues );
288  }
289  }
290 
291  return retval;
292  }
float xmin
Definition: THbookFile.cxx:93
Type GetType(const std::string &Name)
Definition: Systematics.cxx:34
Double_t x[n]
Definition: legend1.C:17
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
float xmax
Definition: THbookFile.cxx:93
PyObject * fType