Logo ROOT   6.10/09
Reference Guide
MethodPyRandomForest.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/pymva $Id$
2 // Authors: Omar Zapata, Lorenzo Moneta, Sergei Gleyzer 2015
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodPyRandomForest *
8  * Web : http://oproject.org *
9  * *
10  * Description: *
11  * Random Forest Classifiear from Scikit learn *
12  * *
13  * *
14  * Redistribution and use in source and binary forms, with or without *
15  * modification, are permitted according to the terms listed in LICENSE *
16  * (http://tmva.sourceforge.net/LICENSE) *
17  * *
18  **********************************************************************************/
19 #include <Python.h> // Needs to be included first to avoid redefinition of _POSIX_C_SOURCE
21 
22 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
23 #include <numpy/arrayobject.h>
24 
25 #include "TMVA/Configurable.h"
26 #include "TMVA/ClassifierFactory.h"
27 #include "TMVA/Config.h"
28 #include "TMVA/DataSet.h"
29 #include "TMVA/Event.h"
30 #include "TMVA/IMethod.h"
31 #include "TMVA/MsgLogger.h"
32 #include "TMVA/PDF.h"
33 #include "TMVA/Ranking.h"
34 #include "TMVA/Results.h"
35 #include "TMVA/Tools.h"
36 #include "TMVA/Types.h"
38 
39 #include "Riostream.h"
40 #include "TMath.h"
41 #include "TMatrix.h"
42 #include "TMatrixD.h"
43 #include "TVectorD.h"
44 
45 #include <iomanip>
46 #include <fstream>
47 
48 using namespace TMVA;
49 
50 REGISTER_METHOD(PyRandomForest)
51 
53 
54 //_______________________________________________________________________
56  const TString &methodTitle,
57  DataSetInfo &dsi,
58  const TString &theOption) :
59  PyMethodBase(jobName, Types::kPyRandomForest, methodTitle, dsi, theOption),
60  fNestimators(10),
61  fCriterion("gini"),
62  fMaxDepth("None"),
63  fMinSamplesSplit(2),
64  fMinSamplesLeaf(1),
65  fMinWeightFractionLeaf(0),
66  fMaxFeatures("'auto'"),
67  fMaxLeafNodes("None"),
68  fBootstrap(kTRUE),
69  fOobScore(kFALSE),
70  fNjobs(1),
71  fRandomState("None"),
72  fVerbose(0),
73  fWarmStart(kFALSE),
74  fClassWeight("None")
75 {
76 }
77 
78 //_______________________________________________________________________
79 MethodPyRandomForest::MethodPyRandomForest(DataSetInfo &theData, const TString &theWeightFile)
80  : PyMethodBase(Types::kPyRandomForest, theData, theWeightFile),
81  fNestimators(10),
82  fCriterion("gini"),
83  fMaxDepth("None"),
84  fMinSamplesSplit(2),
85  fMinSamplesLeaf(1),
86  fMinWeightFractionLeaf(0),
87  fMaxFeatures("'auto'"),
88  fMaxLeafNodes("None"),
89  fBootstrap(kTRUE),
90  fOobScore(kFALSE),
91  fNjobs(1),
92  fRandomState("None"),
93  fVerbose(0),
94  fWarmStart(kFALSE),
95  fClassWeight("None")
96 {
97 }
98 
99 
100 //_______________________________________________________________________
102 {
103 }
104 
105 //_______________________________________________________________________
107 {
108  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
109  if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;
110  return kFALSE;
111 }
112 
113 //_______________________________________________________________________
115 {
117 
118  DeclareOptionRef(fNestimators, "NEstimators", "Integer, optional (default=10). The number of trees in the forest.");
119  DeclareOptionRef(fCriterion, "Criterion", "String, optional (default='gini') \
120  The function to measure the quality of a split. Supported criteria are \
121  'gini' for the Gini impurity and 'entropy' for the information gain. \
122  Note: this parameter is tree-specific.");
123 
124  DeclareOptionRef(fMaxDepth, "MaxDepth", "integer or None, optional (default=None) \
125  The maximum depth of the tree. If None, then nodes are expanded until \
126  all leaves are pure or until all leaves contain less than \
127  min_samples_split samples. \
128  Ignored if ``max_leaf_nodes`` is not None.");
129 
130  DeclareOptionRef(fMinSamplesSplit, "MinSamplesSplit", "integer, optional (default=2)\
131  The minimum number of samples required to split an internal node.");
132 
133  DeclareOptionRef(fMinSamplesLeaf, "MinSamplesLeaf", "integer, optional (default=1) \
134  The minimum number of samples in newly created leaves. A split is \
135  discarded if after the split, one of the leaves would contain less then \
136  ``min_samples_leaf`` samples.");
137  DeclareOptionRef(fMinWeightFractionLeaf, "MinWeightFractionLeaf", "//float, optional (default=0.) \
138  The minimum weighted fraction of the input samples required to be at a \
139  leaf node.");
140  DeclareOptionRef(fMaxFeatures, "MaxFeatures", "The number of features to consider when looking for the best split");
141 
142  DeclareOptionRef(fMaxLeafNodes, "MaxLeafNodes", "int or None, optional (default=None)\
143  Grow trees with ``max_leaf_nodes`` in best-first fashion.\
144  Best nodes are defined as relative reduction in impurity.\
145  If None then unlimited number of leaf nodes.\
146  If not None then ``max_depth`` will be ignored.");
147 
148  DeclareOptionRef(fBootstrap, "Bootstrap", "boolean, optional (default=True) \
149  Whether bootstrap samples are used when building trees.");
150 
151  DeclareOptionRef(fOobScore, "OoBScore", " bool Whether to use out-of-bag samples to estimate\
152  the generalization error.");
153 
154  DeclareOptionRef(fNjobs, "NJobs", " integer, optional (default=1) \
155  The number of jobs to run in parallel for both `fit` and `predict`. \
156  If -1, then the number of jobs is set to the number of cores.");
157 
158  DeclareOptionRef(fRandomState, "RandomState", "int, RandomState instance or None, optional (default=None)\
159  If int, random_state is the seed used by the random number generator;\
160  If RandomState instance, random_state is the random number generator;\
161  If None, the random number generator is the RandomState instance used\
162  by `np.random`.");
163 
164  DeclareOptionRef(fVerbose, "Verbose", "int, optional (default=0)\
165  Controls the verbosity of the tree building process.");
166 
167  DeclareOptionRef(fWarmStart, "WarmStart", "bool, optional (default=False)\
168  When set to ``True``, reuse the solution of the previous call to fit\
169  and add more estimators to the ensemble, otherwise, just fit a whole\
170  new forest.");
171 
172  DeclareOptionRef(fClassWeight, "ClassWeight", "dict, list of dicts, \"auto\", \"subsample\" or None, optional\
173  Weights associated with classes in the form ``{class_label: weight}``.\
174  If not given, all classes are supposed to have weight one. For\
175  multi-output problems, a list of dicts can be provided in the same\
176  order as the columns of y.\
177  The \"auto\" mode uses the values of y to automatically adjust\
178  weights inversely proportional to class frequencies in the input data.\
179  The \"subsample\" mode is the same as \"auto\" except that weights are\
180  computed based on the bootstrap sample for every tree grown.\
181  For multi-output, the weights of each column of y will be multiplied.\
182  Note that these weights will be multiplied with sample_weight (passed\
183  through the fit method) if sample_weight is specified.");
184 
185  DeclareOptionRef(fFilenameClassifier, "FilenameClassifier",
186  "Store trained classifier in this file");
187 }
188 
189 //_______________________________________________________________________
190 // Check options and load them to local python namespace
192 {
193  if (fNestimators <= 0) {
194  Log() << kFATAL << " NEstimators <=0... that does not work !! " << Endl;
195  }
197  PyDict_SetItemString(fLocalNS, "nEstimators", pNestimators);
198 
199  if (fCriterion != "gini" && fCriterion != "entropy") {
200  Log() << kFATAL << Form(" Criterion = %s... that does not work !! ", fCriterion.Data())
201  << " The options are `gini` or `entropy`." << Endl;
202  }
203  pCriterion = Eval(Form("'%s'", fCriterion.Data()));
204  PyDict_SetItemString(fLocalNS, "criterion", pCriterion);
205 
207  PyDict_SetItemString(fLocalNS, "maxDepth", pMaxDepth);
208  if (!pMaxDepth) {
209  Log() << kFATAL << Form(" MaxDepth = %s... that does not work !! ", fMaxDepth.Data())
210  << " The options are None or integer." << Endl;
211  }
212 
213  if (fMinSamplesSplit < 0) {
214  Log() << kFATAL << " MinSamplesSplit < 0... that does not work !! " << Endl;
215  }
217  PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
218 
219  if (fMinSamplesLeaf < 0) {
220  Log() << kFATAL << " MinSamplesLeaf < 0... that does not work !! " << Endl;
221  }
223  PyDict_SetItemString(fLocalNS, "minSamplesLeaf", pMinSamplesLeaf);
224 
225  if (fMinWeightFractionLeaf < 0) {
226  Log() << kERROR << " MinWeightFractionLeaf < 0... that does not work !! " << Endl;
227  }
229  PyDict_SetItemString(fLocalNS, "minWeightFractionLeaf", pMinWeightFractionLeaf);
230 
231  if (fMaxFeatures == "auto" || fMaxFeatures == "sqrt" || fMaxFeatures == "log2"){
232  fMaxFeatures = Form("'%s'", fMaxFeatures.Data());
233  }
235  PyDict_SetItemString(fLocalNS, "maxFeatures", pMaxFeatures);
236 
237  if (!pMaxFeatures) {
238  Log() << kFATAL << Form(" MaxFeatures = %s... that does not work !! ", fMaxFeatures.Data())
239  << "int, float, string or None, optional (default='auto')"
240  << "The number of features to consider when looking for the best split:"
241  << "If int, then consider `max_features` features at each split."
242  << "If float, then `max_features` is a percentage and"
243  << "`int(max_features * n_features)` features are considered at each split."
244  << "If 'auto', then `max_features=sqrt(n_features)`."
245  << "If 'sqrt', then `max_features=sqrt(n_features)`."
246  << "If 'log2', then `max_features=log2(n_features)`."
247  << "If None, then `max_features=n_features`." << Endl;
248  }
249 
251  if (!pMaxLeafNodes) {
252  Log() << kFATAL << Form(" MaxLeafNodes = %s... that does not work !! ", fMaxLeafNodes.Data())
253  << " The options are None or integer." << Endl;
254  }
255  PyDict_SetItemString(fLocalNS, "maxLeafNodes", pMaxLeafNodes);
256 
258  if (!pRandomState) {
259  Log() << kFATAL << Form(" RandomState = %s... that does not work !! ", fRandomState.Data())
260  << "If int, random_state is the seed used by the random number generator;"
261  << "If RandomState instance, random_state is the random number generator;"
262  << "If None, the random number generator is the RandomState instance used by `np.random`." << Endl;
263  }
264  PyDict_SetItemString(fLocalNS, "randomState", pRandomState);
265 
267  if (!pClassWeight) {
268  Log() << kFATAL << Form(" ClassWeight = %s... that does not work !! ", fClassWeight.Data())
269  << "dict, list of dicts, 'auto', 'subsample' or None, optional" << Endl;
270  }
271  PyDict_SetItemString(fLocalNS, "classWeight", pClassWeight);
272 
273  if(fNjobs < 1) {
274  Log() << kFATAL << Form(" NJobs = %i... that does not work !! ", fNjobs)
275  << "Value has to be greater than zero." << Endl;
276  }
277  pNjobs = Eval(Form("%i", fNjobs));
278  PyDict_SetItemString(fLocalNS, "nJobs", pNjobs);
279 
280  pBootstrap = Eval(Form("%i", UInt_t(fBootstrap)));
281  PyDict_SetItemString(fLocalNS, "bootstrap", pBootstrap);
282  pOobScore = Eval(Form("%i", UInt_t(fOobScore)));
283  PyDict_SetItemString(fLocalNS, "oobScore", pOobScore);
284  pVerbose = Eval(Form("%i", fVerbose));
285  PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
286  pWarmStart = Eval(Form("%i", UInt_t(fWarmStart)));
287  PyDict_SetItemString(fLocalNS, "warmStart", pWarmStart);
288 
289  // If no filename is given, set default
290  if(fFilenameClassifier.IsNull())
291  {
292  fFilenameClassifier = GetWeightFileDir() + "/PyRFModel_" + GetName() + ".PyData";
293  }
294 }
295 
296 //_______________________________________________________________________
298 {
299  _import_array(); //require to use numpy arrays
300 
301  // Check options and load them to local python namespace
302  ProcessOptions();
303 
304  // Import module for random forest classifier
305  PyRunString("import sklearn.ensemble");
306 
307  // Get data properties
308  fNvars = GetNVariables();
310 }
311 
312 //_______________________________________________________________________
314 {
315  // Load training data (data, classes, weights) to python arrays
316  int fNrowsTraining = Data()->GetNTrainingEvents(); //every row is an event, a class type and a weight
317  npy_intp dimsData[2];
318  dimsData[0] = fNrowsTraining;
319  dimsData[1] = fNvars;
320  fTrainData = (PyArrayObject *)PyArray_SimpleNew(2, dimsData, NPY_FLOAT);
321  PyDict_SetItemString(fLocalNS, "trainData", (PyObject*)fTrainData);
322  float *TrainData = (float *)(PyArray_DATA(fTrainData));
323 
324  npy_intp dimsClasses = (npy_intp) fNrowsTraining;
325  fTrainDataClasses = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
326  PyDict_SetItemString(fLocalNS, "trainDataClasses", (PyObject*)fTrainDataClasses);
327  float *TrainDataClasses = (float *)(PyArray_DATA(fTrainDataClasses));
328 
329  fTrainDataWeights = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
330  PyDict_SetItemString(fLocalNS, "trainDataWeights", (PyObject*)fTrainDataWeights);
331  float *TrainDataWeights = (float *)(PyArray_DATA(fTrainDataWeights));
332 
333  for (int i = 0; i < fNrowsTraining; i++) {
334  // Fill training data matrix
335  const TMVA::Event *e = Data()->GetTrainingEvent(i);
336  for (UInt_t j = 0; j < fNvars; j++) {
337  TrainData[j + i * fNvars] = e->GetValue(j);
338  }
339 
340  // Fill target classes
341  TrainDataClasses[i] = e->GetClass();
342 
343  // Get event weight
344  TrainDataWeights[i] = e->GetWeight();
345  }
346 
347  // Create classifier object
348  PyRunString("classifier = sklearn.ensemble.RandomForestClassifier(bootstrap=bootstrap, class_weight=classWeight, criterion=criterion, max_depth=maxDepth, max_features=maxFeatures, max_leaf_nodes=maxLeafNodes, min_samples_leaf=minSamplesLeaf, min_samples_split=minSamplesSplit, min_weight_fraction_leaf=minWeightFractionLeaf, n_estimators=nEstimators, n_jobs=nJobs, oob_score=oobScore, random_state=randomState, verbose=verbose, warm_start=warmStart)",
349  "Failed to setup classifier");
350 
351  // Fit classifier
352  // NOTE: We dump the output to a variable so that the call does not pollute stdout
353  PyRunString("dump = classifier.fit(trainData, trainDataClasses, trainDataWeights)", "Failed to train classifier");
354 
355  // Store classifier
356  fClassifier = PyDict_GetItemString(fLocalNS, "classifier");
357  if(fClassifier == 0) {
358  Log() << kFATAL << "Can't create classifier object from RandomForestClassifier" << Endl;
359  Log() << Endl;
360  }
361 
362  if (IsModelPersistence()) {
363  Log() << Endl;
364  Log() << gTools().Color("bold") << "Saving state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
365  Log() << Endl;
367  }
368 }
369 
370 //_______________________________________________________________________
372 {
374 }
375 
376 //_______________________________________________________________________
377 std::vector<Double_t> MethodPyRandomForest::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t)
378 {
379  // Load model if not already done
380  if (fClassifier == 0) ReadModelFromFile();
381 
382  // Determine number of events
384  if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
385  if (firstEvt < 0) firstEvt = 0;
386  nEvents = lastEvt-firstEvt;
387 
388  // Get data
389  npy_intp dims[2];
390  dims[0] = nEvents;
391  dims[1] = fNvars;
392  PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
393  float *pValue = (float *)(PyArray_DATA(pEvent));
394 
395  for (Int_t ievt=0; ievt<nEvents; ievt++) {
396  Data()->SetCurrentEvent(ievt);
397  const TMVA::Event *e = Data()->GetEvent();
398  for (UInt_t i = 0; i < fNvars; i++) {
399  pValue[ievt * fNvars + i] = e->GetValue(i);
400  }
401  }
402 
403  // Get prediction from classifier
404  PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
405  double *proba = (double *)(PyArray_DATA(result));
406 
407  // Return signal probabilities
408  if(Long64_t(mvaValues.size()) != nEvents) mvaValues.resize(nEvents);
409  for (int i = 0; i < nEvents; ++i) {
410  mvaValues[i] = proba[fNoutputs*i + TMVA::Types::kSignal];
411  }
412 
413  Py_DECREF(pEvent);
414  Py_DECREF(result);
415 
416  return mvaValues;
417 }
418 
419 //_______________________________________________________________________
421 {
422  // cannot determine error
423  NoErrorCalc(errLower, errUpper);
424 
425  // Load model if not already done
426  if (fClassifier == 0) ReadModelFromFile();
427 
428  // Get current event and load to python array
429  const TMVA::Event *e = Data()->GetEvent();
430  npy_intp dims[2];
431  dims[0] = 1;
432  dims[1] = fNvars;
433  PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
434  float *pValue = (float *)(PyArray_DATA(pEvent));
435  for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
436 
437  // Get prediction from classifier
438  PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
439  double *proba = (double *)(PyArray_DATA(result));
440 
441  // Return MVA value
442  Double_t mvaValue;
443  mvaValue = proba[TMVA::Types::kSignal]; // getting signal probability
444 
445  Py_DECREF(result);
446  Py_DECREF(pEvent);
447 
448  return mvaValue;
449 }
450 
451 //_______________________________________________________________________
453 {
454  // Load model if not already done
455  if (fClassifier == 0) ReadModelFromFile();
456 
457  // Get current event and load to python array
458  const TMVA::Event *e = Data()->GetEvent();
459  npy_intp dims[2];
460  dims[0] = 1;
461  dims[1] = fNvars;
462  PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
463  float *pValue = (float *)(PyArray_DATA(pEvent));
464  for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
465 
466  // Get prediction from classifier
467  PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
468  double *proba = (double *)(PyArray_DATA(result));
469 
470  // Return MVA values
471  if(UInt_t(classValues.size()) != fNoutputs) classValues.resize(fNoutputs);
472  for(UInt_t i = 0; i < fNoutputs; i++) classValues[i] = proba[i];
473 
474  Py_DECREF(pEvent);
475  Py_DECREF(result);
476 
477  return classValues;
478 }
479 
480 //_______________________________________________________________________
482 {
483  if (!PyIsInitialized()) {
484  PyInitialize();
485  }
486 
487  Log() << Endl;
488  Log() << gTools().Color("bold") << "Loading state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
489  Log() << Endl;
490 
491  // Load classifier from file
493  if(err != 0)
494  {
495  Log() << kFATAL << Form("Failed to load classifier from file (error code: %i): %s", err, fFilenameClassifier.Data()) << Endl;
496  }
497 
498  // Book classifier object in python dict
499  PyDict_SetItemString(fLocalNS, "classifier", fClassifier);
500 
501  // Load data properties
502  // NOTE: This has to be repeated here for the reader application
503  fNvars = GetNVariables();
505 }
506 
507 //_______________________________________________________________________
509 {
510  // Get feature importance from classifier as an array with length equal
511  // number of variables, higher value signals a higher importance
512  PyArrayObject* pRanking = (PyArrayObject*) PyObject_GetAttrString(fClassifier, "feature_importances_");
513  if(pRanking == 0) Log() << kFATAL << "Failed to get ranking from classifier" << Endl;
514 
515  // Fill ranking object and return it
516  fRanking = new Ranking(GetName(), "Variable Importance");
517  Double_t* rankingData = (Double_t*) PyArray_DATA(pRanking);
518  for(UInt_t iVar=0; iVar<fNvars; iVar++){
519  fRanking->AddRank(Rank(GetInputLabel(iVar), rankingData[iVar]));
520  }
521 
522  Py_DECREF(pRanking);
523 
524  return fRanking;
525 }
526 
527 //_______________________________________________________________________
529 {
530  // typical length of text line:
531  // "|--------------------------------------------------------------|"
532  Log() << "A random forest is a meta estimator that fits a number of decision" << Endl;
533  Log() << "tree classifiers on various sub-samples of the dataset and use" << Endl;
534  Log() << "averaging to improve the predictive accuracy and control over-fitting." << Endl;
535  Log() << Endl;
536  Log() << "Check out the scikit-learn documentation for more information." << Endl;
537 }
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
std::vector< Double_t > mvaValues
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Singleton class for Global types used by TMVA.
Definition: Types.h:73
long long Long64_t
Definition: RtypesCore.h:69
PyObject * fClassifier
Definition: PyMethodBase.h:120
MsgLogger & Log() const
Definition: Configurable.h:122
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
EAnalysisType
Definition: Types.h:125
Ranking for variables in method (implementation)
Definition: Ranking.h:48
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
static void Serialize(TString file, PyObject *classifier)
Serialize Python object.
PyArrayObject * fTrainDataClasses
Definition: PyMethodBase.h:124
static int PyIsInitialized()
Check Python interpreter initialization status.
static void PyInitialize()
Initialize Python interpreter.
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:334
const TString & GetWeightFileDir() const
Definition: MethodBase.h:474
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=Py_single_input)
Execute Python code from string.
DataSet * Data() const
Definition: MethodBase.h:393
UInt_t GetClass() const
Definition: Event.h:81
PyObject * Eval(TString code)
Evaluate Python code.
DataSetInfo & DataInfo() const
Definition: MethodBase.h:394
Class that contains all the data information.
Definition: DataSetInfo.h:60
PyArrayObject * fTrainDataWeights
Definition: PyMethodBase.h:123
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
#define None
Definition: TGWin32.h:55
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: DataSet.h:85
const int nEvents
Definition: testRooFit.cxx:42
const char * GetName() const
Definition: MethodBase.h:318
MethodPyRandomForest(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
PyArrayObject * fTrainData
Definition: PyMethodBase.h:122
Tools & gTools()
UInt_t GetNVariables() const
Definition: MethodBase.h:329
const Bool_t kFALSE
Definition: RtypesCore.h:92
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
#define ClassImp(name)
Definition: Rtypes.h:336
static Int_t UnSerialize(TString file, PyObject **obj)
Unserialize Python object.
double Double_t
Definition: RtypesCore.h:55
int type
Definition: TGX11.cxx:120
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
Ranking * fRanking
Definition: MethodBase.h:569
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
Definition: Ranking.cxx:86
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
PyObject * fLocalNS
Definition: PyMethodBase.h:143
virtual void TestClassification()
initialization
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:215
double result[121]
std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
const Bool_t kTRUE
Definition: RtypesCore.h:91
std::vector< Float_t > classValues
virtual void TestClassification()
initialization
const Event * GetEvent() const
Definition: DataSet.cxx:202
_object PyObject
Definition: TPyArg.h:20
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:829
std::vector< Float_t > & GetMulticlassValues()
Bool_t IsModelPersistence()
Definition: MethodBase.h:367