Logo ROOT  
Reference Guide
MethodPyGTB.cxx
Go to the documentation of this file.
1// @(#)root/tmva/pymva $Id$
2// Authors: Omar Zapata, Lorenzo Moneta, Sergei Gleyzer 2015
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodPyGTB *
8 * Web : http://oproject.org *
9 * *
10 * Description: *
11 * GradientBoostingClassifier Classifiear from Scikit learn *
12 * *
13 * *
14 * Redistribution and use in source and binary forms, with or without *
15 * modification, are permitted according to the terms listed in LICENSE *
16 * (http://tmva.sourceforge.net/LICENSE) *
17 * *
18 **********************************************************************************/
19
20#include <Python.h> // Needs to be included first to avoid redefinition of _POSIX_C_SOURCE
21#include "TMVA/MethodPyGTB.h"
22
23#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
24#include <numpy/arrayobject.h>
25
26#include "TMVA/Configurable.h"
28#include "TMVA/Config.h"
29#include "TMVA/DataSet.h"
30#include "TMVA/Event.h"
31#include "TMVA/IMethod.h"
32#include "TMVA/MsgLogger.h"
33#include "TMVA/PDF.h"
34#include "TMVA/Ranking.h"
35#include "TMVA/Results.h"
37#include "TMVA/Tools.h"
38#include "TMVA/Types.h"
39#include "TMVA/Timer.h"
41
42#include "Riostream.h"
43#include "TMath.h"
44#include "TMatrix.h"
45#include "TMatrixD.h"
46#include "TVectorD.h"
47
48#include <iomanip>
49#include <fstream>
50
51using namespace TMVA;
52
53namespace TMVA {
54namespace Internal {
55class PyGILRAII {
56 PyGILState_STATE m_GILState;
57
58public:
59 PyGILRAII() : m_GILState(PyGILState_Ensure()) {}
60 ~PyGILRAII() { PyGILState_Release(m_GILState); }
61};
62} // namespace Internal
63} // namespace TMVA
64
65REGISTER_METHOD(PyGTB)
66
68
69//_______________________________________________________________________
70MethodPyGTB::MethodPyGTB(const TString &jobName,
71 const TString &methodTitle,
72 DataSetInfo &dsi,
73 const TString &theOption) :
74 PyMethodBase(jobName, Types::kPyGTB, methodTitle, dsi, theOption),
75 fLoss("deviance"),
76 fLearningRate(0.1),
77 fNestimators(100),
78 fSubsample(1.0),
79 fMinSamplesSplit(2),
80 fMinSamplesLeaf(1),
81 fMinWeightFractionLeaf(0.0),
82 fMaxDepth(3),
83 fInit("None"),
84 fRandomState("None"),
85 fMaxFeatures("None"),
86 fVerbose(0),
87 fMaxLeafNodes("None"),
88 fWarmStart(kFALSE)
89{
90}
91
92//_______________________________________________________________________
93MethodPyGTB::MethodPyGTB(DataSetInfo &theData, const TString &theWeightFile)
94 : PyMethodBase(Types::kPyGTB, theData, theWeightFile),
95 fLoss("deviance"),
96 fLearningRate(0.1),
97 fNestimators(100),
98 fSubsample(1.0),
99 fMinSamplesSplit(2),
100 fMinSamplesLeaf(1),
101 fMinWeightFractionLeaf(0.0),
102 fMaxDepth(3),
103 fInit("None"),
104 fRandomState("None"),
105 fMaxFeatures("None"),
106 fVerbose(0),
107 fMaxLeafNodes("None"),
108 fWarmStart(kFALSE)
109{
110}
111
112
113//_______________________________________________________________________
115{
116}
117
118//_______________________________________________________________________
120{
121 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
122 if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;
123 return kFALSE;
124}
125
126
127//_______________________________________________________________________
129{
131
132 DeclareOptionRef(fLoss, "Loss", "{'deviance', 'exponential'}, optional (default='deviance')\
133 loss function to be optimized. 'deviance' refers to\
134 deviance (= logistic regression) for classification\
135 with probabilistic outputs. For loss 'exponential' gradient\
136 boosting recovers the AdaBoost algorithm.");
137
138 DeclareOptionRef(fLearningRate, "LearningRate", "float, optional (default=0.1)\
139 learning rate shrinks the contribution of each tree by `learning_rate`.\
140 There is a trade-off between learning_rate and n_estimators.");
141
142 DeclareOptionRef(fNestimators, "NEstimators", "int (default=100)\
143 The number of boosting stages to perform. Gradient boosting\
144 is fairly robust to over-fitting so a large number usually\
145 results in better performance.");
146
147 DeclareOptionRef(fSubsample, "Subsample", "float, optional (default=1.0)\
148 The fraction of samples to be used for fitting the individual base\
149 learners. If smaller than 1.0 this results in Stochastic Gradient\
150 Boosting. `subsample` interacts with the parameter `n_estimators`.\
151 Choosing `subsample < 1.0` leads to a reduction of variance\
152 and an increase in bias.");
153
154 DeclareOptionRef(fMinSamplesSplit, "MinSamplesSplit", "integer, optional (default=2)\
155 The minimum number of samples required to split an internal node.");
156
157 DeclareOptionRef(fMinSamplesLeaf, "MinSamplesLeaf", "integer, optional (default=1) \
158 The minimum number of samples in newly created leaves. A split is \
159 discarded if after the split, one of the leaves would contain less then \
160 ``min_samples_leaf`` samples.");
161
162 DeclareOptionRef(fMinWeightFractionLeaf, "MinWeightFractionLeaf", "//float, optional (default=0.) \
163 The minimum weighted fraction of the input samples required to be at a \
164 leaf node.");
165
166 DeclareOptionRef(fMaxDepth, "MaxDepth", "integer or None, optional (default=None) \
167 The maximum depth of the tree. If None, then nodes are expanded until \
168 all leaves are pure or until all leaves contain less than \
169 min_samples_split samples. \
170 Ignored if ``max_leaf_nodes`` is not None.");
171
172 DeclareOptionRef(fInit, "Init", "BaseEstimator, None, optional (default=None)\
173 An estimator object that is used to compute the initial\
174 predictions. ``init`` has to provide ``fit`` and ``predict``.\
175 If None it uses ``loss.init_estimator`");
176
177 DeclareOptionRef(fRandomState, "RandomState", "int, RandomState instance or None, optional (default=None)\
178 If int, random_state is the seed used by the random number generator;\
179 If RandomState instance, random_state is the random number generator;\
180 If None, the random number generator is the RandomState instance used\
181 by `np.random`.");
182
183 DeclareOptionRef(fMaxFeatures, "MaxFeatures", "The number of features to consider when looking for the best split");
184
185 DeclareOptionRef(fVerbose, "Verbose", "int, optional (default=0)\
186 Controls the verbosity of the tree building process.");
187
188 DeclareOptionRef(fMaxLeafNodes, "MaxLeafNodes", "int or None, optional (default=None)\
189 Grow trees with ``max_leaf_nodes`` in best-first fashion.\
190 Best nodes are defined as relative reduction in impurity.\
191 If None then unlimited number of leaf nodes.\
192 If not None then ``max_depth`` will be ignored.");
193
194 DeclareOptionRef(fWarmStart, "WarmStart", "bool, optional (default=False)\
195 When set to ``True``, reuse the solution of the previous call to fit\
196 and add more estimators to the ensemble, otherwise, just fit a whole\
197 new forest.");
198
199 DeclareOptionRef(fFilenameClassifier, "FilenameClassifier",
200 "Store trained classifier in this file");
201}
202
203//_______________________________________________________________________
204// Check options and load them to local python namespace
206{
207 if (fLoss != "deviance" && fLoss != "exponential") {
208 Log() << kFATAL << Form("Loss = %s ... that does not work!", fLoss.Data())
209 << " The options are 'deviance' or 'exponential'." << Endl;
210 }
211 pLoss = Eval(Form("'%s'", fLoss.Data()));
212 PyDict_SetItemString(fLocalNS, "loss", pLoss);
213
214 if (fLearningRate <= 0) {
215 Log() << kFATAL << "LearningRate <= 0 ... that does not work!" << Endl;
216 }
218 PyDict_SetItemString(fLocalNS, "learningRate", pLearningRate);
219
220 if (fNestimators <= 0) {
221 Log() << kFATAL << "NEstimators <= 0 ... that does not work!" << Endl;
222 }
224 PyDict_SetItemString(fLocalNS, "nEstimators", pNestimators);
225
226 if (fMinSamplesSplit < 0) {
227 Log() << kFATAL << "MinSamplesSplit < 0 ... that does not work!" << Endl;
228 }
230 PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
231
232 if (fSubsample < 0) {
233 Log() << kFATAL << "Subsample < 0 ... that does not work!" << Endl;
234 }
235 pSubsample = Eval(Form("%f", fSubsample));
236 PyDict_SetItemString(fLocalNS, "subsample", pSubsample);
237
238 if (fMinSamplesLeaf < 0) {
239 Log() << kFATAL << "MinSamplesLeaf < 0 ... that does not work!" << Endl;
240 }
242 PyDict_SetItemString(fLocalNS, "minSamplesLeaf", pMinSamplesLeaf);
243
244 if (fMinSamplesSplit < 0) {
245 Log() << kFATAL << "MinSamplesSplit < 0 ... that does not work!" << Endl;
246 }
248 PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
249
250 if (fMinWeightFractionLeaf < 0) {
251 Log() << kFATAL << "MinWeightFractionLeaf < 0 ... that does not work !" << Endl;
252 }
254 PyDict_SetItemString(fLocalNS, "minWeightFractionLeaf", pMinWeightFractionLeaf);
255
256 if (fMaxDepth <= 0) {
257 Log() << kFATAL << " MaxDepth <= 0 ... that does not work !! " << Endl;
258 }
259 pMaxDepth = Eval(Form("%i", fMaxDepth));
260 PyDict_SetItemString(fLocalNS, "maxDepth", pMaxDepth);
261
262 pInit = Eval(fInit);
263 if (!pInit) {
264 Log() << kFATAL << Form("Init = %s ... that does not work!", fInit.Data())
265 << " The options are None or BaseEstimator, which is an estimator object that"
266 << "is used to compute the initial predictions. "
267 << "'init' has to provide 'fit' and 'predict' methods."
268 << " If None it uses 'loss.init_estimator'." << Endl;
269 }
270 PyDict_SetItemString(fLocalNS, "init", pInit);
271
273 if (!pRandomState) {
274 Log() << kFATAL << Form(" RandomState = %s ... that does not work! ", fRandomState.Data())
275 << " If int, random_state is the seed used by the random number generator;"
276 << " If RandomState instance, random_state is the random number generator;"
277 << " If None, the random number generator is the RandomState instance used by 'np.random'."
278 << Endl;
279 }
280 PyDict_SetItemString(fLocalNS, "randomState", pRandomState);
281
282 if (fMaxFeatures == "auto" || fMaxFeatures == "sqrt" || fMaxFeatures == "log2"){
283 fMaxFeatures = Form("'%s'", fMaxFeatures.Data());
284 }
286 PyDict_SetItemString(fLocalNS, "maxFeatures", pMaxFeatures);
287
288 if (!pMaxFeatures) {
289 Log() << kFATAL << Form(" MaxFeatures = %s... that does not work !! ", fMaxFeatures.Data())
290 << "int, float, string or None, optional (default='auto')"
291 << "The number of features to consider when looking for the best split:"
292 << "If int, then consider `max_features` features at each split."
293 << "If float, then `max_features` is a percentage and"
294 << "`int(max_features * n_features)` features are considered at each split."
295 << "If 'auto', then `max_features=sqrt(n_features)`."
296 << "If 'sqrt', then `max_features=sqrt(n_features)`."
297 << "If 'log2', then `max_features=log2(n_features)`."
298 << "If None, then `max_features=n_features`." << Endl;
299 }
300
302 if (!pMaxLeafNodes) {
303 Log() << kFATAL << Form(" MaxLeafNodes = %s... that does not work!", fMaxLeafNodes.Data())
304 << " The options are None or integer." << Endl;
305 }
306 PyDict_SetItemString(fLocalNS, "maxLeafNodes", pMaxLeafNodes);
307
308 pVerbose = Eval(Form("%i", fVerbose));
309 PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
310
312 PyDict_SetItemString(fLocalNS, "warmStart", pWarmStart);
313
314 // If no filename is given, set default
316 fFilenameClassifier = GetWeightFileDir() + "/PyGTBModel_" + GetName() + ".PyData";
317 }
318}
319
320//_______________________________________________________________________
322{
323 TMVA::Internal::PyGILRAII raii;
324 _import_array(); //require to use numpy arrays
325
326 // Check options and load them to local python namespace
328
329 // Import module for gradient tree boosting classifier
330 PyRunString("import sklearn.ensemble");
331
332 // Get data properties
335}
336
338{
339 // Load training data (data, classes, weights) to python arrays
340 int fNrowsTraining = Data()->GetNTrainingEvents(); //every row is an event, a class type and a weight
341 npy_intp dimsData[2];
342 dimsData[0] = fNrowsTraining;
343 dimsData[1] = fNvars;
344 PyArrayObject * fTrainData = (PyArrayObject *)PyArray_SimpleNew(2, dimsData, NPY_FLOAT);
345 PyDict_SetItemString(fLocalNS, "trainData", (PyObject*)fTrainData);
346 float *TrainData = (float *)(PyArray_DATA(fTrainData));
347
348 npy_intp dimsClasses = (npy_intp) fNrowsTraining;
349 PyArrayObject * fTrainDataClasses = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
350 PyDict_SetItemString(fLocalNS, "trainDataClasses", (PyObject*)fTrainDataClasses);
351 float *TrainDataClasses = (float *)(PyArray_DATA(fTrainDataClasses));
352
353 PyArrayObject * fTrainDataWeights = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
354 PyDict_SetItemString(fLocalNS, "trainDataWeights", (PyObject*)fTrainDataWeights);
355 float *TrainDataWeights = (float *)(PyArray_DATA(fTrainDataWeights));
356
357 for (int i = 0; i < fNrowsTraining; i++) {
358 // Fill training data matrix
359 const TMVA::Event *e = Data()->GetTrainingEvent(i);
360 for (UInt_t j = 0; j < fNvars; j++) {
361 TrainData[j + i * fNvars] = e->GetValue(j);
362 }
363
364 // Fill target classes
365 TrainDataClasses[i] = e->GetClass();
366
367 // Get event weight
368 TrainDataWeights[i] = e->GetWeight();
369 }
370
371 // Create classifier object
372 PyRunString("classifier = sklearn.ensemble.GradientBoostingClassifier(loss=loss, learning_rate=learningRate, n_estimators=nEstimators, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, min_weight_fraction_leaf=minWeightFractionLeaf, subsample=subsample, max_features=maxFeatures, max_leaf_nodes=maxLeafNodes, init=init, verbose=verbose, warm_start=warmStart, random_state=randomState)",
373 "Failed to setup classifier");
374
375 // Fit classifier
376 // NOTE: We dump the output to a variable so that the call does not pollute stdout
377 PyRunString("dump = classifier.fit(trainData, trainDataClasses, trainDataWeights)", "Failed to train classifier");
378
379 // Store classifier
380 fClassifier = PyDict_GetItemString(fLocalNS, "classifier");
381 if(fClassifier == 0) {
382 Log() << kFATAL << "Can't create classifier object from GradientBoostingClassifier" << Endl;
383 Log() << Endl;
384 }
385
386 if (IsModelPersistence()) {
387 Log() << Endl;
388 Log() << gTools().Color("bold") << "Saving state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
389 Log() << Endl;
391 }
392}
393
394//_______________________________________________________________________
396{
398}
399
400//_______________________________________________________________________
401std::vector<Double_t> MethodPyGTB::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
402{
403 // Load model if not already done
404 if (fClassifier == 0) ReadModelFromFile();
405
406 // Determine number of events
407 Long64_t nEvents = Data()->GetNEvents();
408 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
409 if (firstEvt < 0) firstEvt = 0;
410 nEvents = lastEvt-firstEvt;
411
412 // use timer
413 Timer timer( nEvents, GetName(), kTRUE );
414
415 if (logProgress)
416 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
417 << "Evaluation of " << GetMethodName() << " on "
418 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
419 << " sample (" << nEvents << " events)" << Endl;
420
421 // Get data
422 npy_intp dims[2];
423 dims[0] = nEvents;
424 dims[1] = fNvars;
425 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
426 float *pValue = (float *)(PyArray_DATA(pEvent));
427
428 for (Int_t ievt=0; ievt<nEvents; ievt++) {
429 Data()->SetCurrentEvent(ievt);
430 const TMVA::Event *e = Data()->GetEvent();
431 for (UInt_t i = 0; i < fNvars; i++) {
432 pValue[ievt * fNvars + i] = e->GetValue(i);
433 }
434 }
435
436 // Get prediction from classifier
437 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
438 double *proba = (double *)(PyArray_DATA(result));
439
440 // Return signal probabilities
441 if(Long64_t(mvaValues.size()) != nEvents) mvaValues.resize(nEvents);
442 for (int i = 0; i < nEvents; ++i) {
444 }
445
446 Py_DECREF(pEvent);
447 Py_DECREF(result);
448
449 if (logProgress) {
450 Log() << kINFO
451 << "Elapsed time for evaluation of " << nEvents << " events: "
452 << timer.GetElapsedTime() << " " << Endl;
453 }
454
455
456 return mvaValues;
457}
458
459//_______________________________________________________________________
461{
462 // cannot determine error
463 NoErrorCalc(errLower, errUpper);
464
465 // Load model if not already done
466 if (fClassifier == 0) ReadModelFromFile();
467
468 // Get current event and load to python array
469 const TMVA::Event *e = Data()->GetEvent();
470 npy_intp dims[2];
471 dims[0] = 1;
472 dims[1] = fNvars;
473 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
474 float *pValue = (float *)(PyArray_DATA(pEvent));
475 for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
476
477 // Get prediction from classifier
478 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
479 double *proba = (double *)(PyArray_DATA(result));
480
481 // Return MVA value
482 Double_t mvaValue;
483 mvaValue = proba[TMVA::Types::kSignal]; // getting signal probability
484
485 Py_DECREF(result);
486 Py_DECREF(pEvent);
487
488 return mvaValue;
489}
490
491//_______________________________________________________________________
492std::vector<Float_t>& MethodPyGTB::GetMulticlassValues()
493{
494 // Load model if not already done
495 if (fClassifier == 0) ReadModelFromFile();
496
497 // Get current event and load to python array
498 const TMVA::Event *e = Data()->GetEvent();
499 npy_intp dims[2];
500 dims[0] = 1;
501 dims[1] = fNvars;
502 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
503 float *pValue = (float *)(PyArray_DATA(pEvent));
504 for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
505
506 // Get prediction from classifier
507 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
508 double *proba = (double *)(PyArray_DATA(result));
509
510 // Return MVA values
511 if(UInt_t(classValues.size()) != fNoutputs) classValues.resize(fNoutputs);
512 for(UInt_t i = 0; i < fNoutputs; i++) classValues[i] = proba[i];
513
514 Py_DECREF(pEvent);
515 Py_DECREF(result);
516
517 return classValues;
518}
519
520//_______________________________________________________________________
522{
523 if (!PyIsInitialized()) {
524 PyInitialize();
525 }
526
527 Log() << Endl;
528 Log() << gTools().Color("bold") << "Loading state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
529 Log() << Endl;
530
531 // Load classifier from file
533 if(err != 0)
534 {
535 Log() << kFATAL << Form("Failed to load classifier from file (error code: %i): %s", err, fFilenameClassifier.Data()) << Endl;
536 }
537
538 // Book classifier object in python dict
539 PyDict_SetItemString(fLocalNS, "classifier", fClassifier);
540
541 // Load data properties
542 // NOTE: This has to be repeated here for the reader application
545}
546
547//_______________________________________________________________________
549{
550 // Get feature importance from classifier as an array with length equal
551 // number of variables, higher value signals a higher importance
552 PyArrayObject* pRanking = (PyArrayObject*) PyObject_GetAttrString(fClassifier, "feature_importances_");
553 if(pRanking == 0) Log() << kFATAL << "Failed to get ranking from classifier" << Endl;
554
555 // Fill ranking object and return it
556 fRanking = new Ranking(GetName(), "Variable Importance");
557 Double_t* rankingData = (Double_t*) PyArray_DATA(pRanking);
558 for(UInt_t iVar=0; iVar<fNvars; iVar++){
559 fRanking->AddRank(Rank(GetInputLabel(iVar), rankingData[iVar]));
560 }
561
562 Py_DECREF(pRanking);
563
564 return fRanking;
565}
566
567//_______________________________________________________________________
569{
570 // typical length of text line:
571 // "|--------------------------------------------------------------|"
572 Log() << "A gradient tree boosting classifier builds a model from an ensemble" << Endl;
573 Log() << "of decision trees, which are adapted each boosting step to fit better" << Endl;
574 Log() << "to previously misclassified events." << Endl;
575 Log() << Endl;
576 Log() << "Check out the scikit-learn documentation for more information." << Endl;
577}
578
579
#define REGISTER_METHOD(CLASS)
for example
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
long long Long64_t
Definition: RtypesCore.h:69
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
int type
Definition: TGX11.cxx:120
_object PyObject
Definition: TPyArg.h:20
char * Form(const char *fmt,...)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
MsgLogger & Log() const
Definition: Configurable.h:122
Class that contains all the data information.
Definition: DataSetInfo.h:60
UInt_t GetNClasses() const
Definition: DataSetInfo.h:153
const Event * GetEvent() const
Definition: DataSet.cxx:202
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: DataSet.h:85
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
const char * GetName() const
Definition: MethodBase.h:333
Bool_t IsModelPersistence() const
Definition: MethodBase.h:382
const TString & GetWeightFileDir() const
Definition: MethodBase.h:490
const TString & GetMethodName() const
Definition: MethodBase.h:330
DataSetInfo & DataInfo() const
Definition: MethodBase.h:409
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
Definition: MethodBase.h:344
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:841
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:349
Ranking * fRanking
Definition: MethodBase.h:585
DataSet * Data() const
Definition: MethodBase.h:408
Double_t fSubsample
Definition: MethodPyGTB.h:98
PyObject * pInit
Definition: MethodPyGTB.h:125
PyObject * pMinSamplesLeaf
Definition: MethodPyGTB.h:109
Double_t fMinWeightFractionLeaf
Definition: MethodPyGTB.h:114
std::vector< Double_t > mvaValues
Definition: MethodPyGTB.h:72
PyObject * pMaxFeatures
Definition: MethodPyGTB.h:138
std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
PyObject * pMaxDepth
Definition: MethodPyGTB.h:117
PyObject * pMaxLeafNodes
Definition: MethodPyGTB.h:156
std::vector< Float_t > classValues
Definition: MethodPyGTB.h:73
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Double_t fLearningRate
Definition: MethodPyGTB.h:89
void GetHelpMessage() const
TString fMaxFeatures
Definition: MethodPyGTB.h:139
MethodPyGTB(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Definition: MethodPyGTB.cxx:70
TString fRandomState
Definition: MethodPyGTB.h:132
PyObject * pLearningRate
Definition: MethodPyGTB.h:88
const Ranking * CreateRanking()
virtual void TestClassification()
initialization
std::vector< Float_t > & GetMulticlassValues()
virtual void ReadModelFromFile()
PyObject * pNestimators
Definition: MethodPyGTB.h:93
TString fMaxLeafNodes
Definition: MethodPyGTB.h:157
PyObject * pVerbose
Definition: MethodPyGTB.h:153
TString fFilenameClassifier
Definition: MethodPyGTB.h:77
PyObject * pMinSamplesSplit
Definition: MethodPyGTB.h:105
PyObject * pLoss
Definition: MethodPyGTB.h:81
PyObject * pSubsample
Definition: MethodPyGTB.h:97
PyObject * pRandomState
Definition: MethodPyGTB.h:131
PyObject * pWarmStart
Definition: MethodPyGTB.h:163
PyObject * pMinWeightFractionLeaf
Definition: MethodPyGTB.h:113
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
static int PyIsInitialized()
Check Python interpreter initialization status.
PyObject * Eval(TString code)
Evaluate Python code.
static void PyInitialize()
Initialize Python interpreter.
static void Serialize(TString file, PyObject *classifier)
Serialize Python object.
static Int_t UnSerialize(TString file, PyObject **obj)
Unserialize Python object.
PyObject * fClassifier
Definition: PyMethodBase.h:111
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=Py_single_input)
Execute Python code from string.
PyObject * fLocalNS
Definition: PyMethodBase.h:130
Ranking for variables in method (implementation)
Definition: Ranking.h:48
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
Definition: Ranking.cxx:86
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:140
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
Singleton class for Global types used by TMVA.
Definition: Types.h:73
@ kSignal
Definition: Types.h:136
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kClassification
Definition: Types.h:128
@ kTraining
Definition: Types.h:144
Basic string class.
Definition: TString.h:131
const char * Data() const
Definition: TString.h:364
Bool_t IsNull() const
Definition: TString.h:402
create variable transformations
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158