Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodPyGTB.cxx
Go to the documentation of this file.
1// @(#)root/tmva/pymva $Id$
2// Authors: Omar Zapata, Lorenzo Moneta, Sergei Gleyzer 2015
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodPyGTB *
8 * Web : http://oproject.org *
9 * *
10 * Description: *
11 * GradientBoostingClassifier Classifiear from Scikit learn *
12 * *
13 * *
14 * Redistribution and use in source and binary forms, with or without *
15 * modification, are permitted according to the terms listed in LICENSE *
16 * (http://tmva.sourceforge.net/LICENSE) *
17 * *
18 **********************************************************************************/
19
20#include <Python.h> // Needs to be included first to avoid redefinition of _POSIX_C_SOURCE
21#include "TMVA/MethodPyGTB.h"
22
23#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
24#include <numpy/arrayobject.h>
25
26#include "TMVA/Configurable.h"
28#include "TMVA/Config.h"
29#include "TMVA/DataSet.h"
30#include "TMVA/Event.h"
31#include "TMVA/IMethod.h"
32#include "TMVA/MsgLogger.h"
33#include "TMVA/PDF.h"
34#include "TMVA/Ranking.h"
35#include "TMVA/Results.h"
37#include "TMVA/Tools.h"
38#include "TMVA/Types.h"
39#include "TMVA/Timer.h"
41
42#include "TMatrix.h"
43
44using namespace TMVA;
45
46namespace TMVA {
47namespace Internal {
48class PyGILRAII {
49 PyGILState_STATE m_GILState;
50
51public:
52 PyGILRAII() : m_GILState(PyGILState_Ensure()) {}
53 ~PyGILRAII() { PyGILState_Release(m_GILState); }
54};
55} // namespace Internal
56} // namespace TMVA
57
58REGISTER_METHOD(PyGTB)
59
61
62//_______________________________________________________________________
64 const TString &methodTitle,
65 DataSetInfo &dsi,
66 const TString &theOption) :
67 PyMethodBase(jobName, Types::kPyGTB, methodTitle, dsi, theOption),
68 fLoss("deviance"),
69 fLearningRate(0.1),
70 fNestimators(100),
71 fSubsample(1.0),
72 fMinSamplesSplit(2),
73 fMinSamplesLeaf(1),
74 fMinWeightFractionLeaf(0.0),
75 fMaxDepth(3),
76 fInit("None"),
77 fRandomState("None"),
78 fMaxFeatures("None"),
79 fVerbose(0),
80 fMaxLeafNodes("None"),
81 fWarmStart(kFALSE)
82{
83}
84
85//_______________________________________________________________________
86MethodPyGTB::MethodPyGTB(DataSetInfo &theData, const TString &theWeightFile)
87 : PyMethodBase(Types::kPyGTB, theData, theWeightFile),
88 fLoss("deviance"),
89 fLearningRate(0.1),
90 fNestimators(100),
91 fSubsample(1.0),
92 fMinSamplesSplit(2),
93 fMinSamplesLeaf(1),
94 fMinWeightFractionLeaf(0.0),
95 fMaxDepth(3),
96 fInit("None"),
97 fRandomState("None"),
98 fMaxFeatures("None"),
99 fVerbose(0),
100 fMaxLeafNodes("None"),
101 fWarmStart(kFALSE)
102{
103}
104
105
106//_______________________________________________________________________
108{
109}
110
111//_______________________________________________________________________
113{
114 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
115 if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;
116 return kFALSE;
117}
118
119
120//_______________________________________________________________________
122{
124
125 DeclareOptionRef(fLoss, "Loss", "{'deviance', 'exponential'}, optional (default='deviance')\
126 loss function to be optimized. 'deviance' refers to\
127 deviance (= logistic regression) for classification\
128 with probabilistic outputs. For loss 'exponential' gradient\
129 boosting recovers the AdaBoost algorithm.");
130
131 DeclareOptionRef(fLearningRate, "LearningRate", "float, optional (default=0.1)\
132 learning rate shrinks the contribution of each tree by `learning_rate`.\
133 There is a trade-off between learning_rate and n_estimators.");
134
135 DeclareOptionRef(fNestimators, "NEstimators", "int (default=100)\
136 The number of boosting stages to perform. Gradient boosting\
137 is fairly robust to over-fitting so a large number usually\
138 results in better performance.");
139
140 DeclareOptionRef(fSubsample, "Subsample", "float, optional (default=1.0)\
141 The fraction of samples to be used for fitting the individual base\
142 learners. If smaller than 1.0 this results in Stochastic Gradient\
143 Boosting. `subsample` interacts with the parameter `n_estimators`.\
144 Choosing `subsample < 1.0` leads to a reduction of variance\
145 and an increase in bias.");
146
147 DeclareOptionRef(fMinSamplesSplit, "MinSamplesSplit", "integer, optional (default=2)\
148 The minimum number of samples required to split an internal node.");
149
150 DeclareOptionRef(fMinSamplesLeaf, "MinSamplesLeaf", "integer, optional (default=1) \
151 The minimum number of samples in newly created leaves. A split is \
152 discarded if after the split, one of the leaves would contain less then \
153 ``min_samples_leaf`` samples.");
154
155 DeclareOptionRef(fMinWeightFractionLeaf, "MinWeightFractionLeaf", "//float, optional (default=0.) \
156 The minimum weighted fraction of the input samples required to be at a \
157 leaf node.");
158
159 DeclareOptionRef(fMaxDepth, "MaxDepth", "integer or None, optional (default=None) \
160 The maximum depth of the tree. If None, then nodes are expanded until \
161 all leaves are pure or until all leaves contain less than \
162 min_samples_split samples. \
163 Ignored if ``max_leaf_nodes`` is not None.");
164
165 DeclareOptionRef(fInit, "Init", "BaseEstimator, None, optional (default=None)\
166 An estimator object that is used to compute the initial\
167 predictions. ``init`` has to provide ``fit`` and ``predict``.\
168 If None it uses ``loss.init_estimator`");
169
170 DeclareOptionRef(fRandomState, "RandomState", "int, RandomState instance or None, optional (default=None)\
171 If int, random_state is the seed used by the random number generator;\
172 If RandomState instance, random_state is the random number generator;\
173 If None, the random number generator is the RandomState instance used\
174 by `np.random`.");
175
176 DeclareOptionRef(fMaxFeatures, "MaxFeatures", "The number of features to consider when looking for the best split");
177
178 DeclareOptionRef(fVerbose, "Verbose", "int, optional (default=0)\
179 Controls the verbosity of the tree building process.");
180
181 DeclareOptionRef(fMaxLeafNodes, "MaxLeafNodes", "int or None, optional (default=None)\
182 Grow trees with ``max_leaf_nodes`` in best-first fashion.\
183 Best nodes are defined as relative reduction in impurity.\
184 If None then unlimited number of leaf nodes.\
185 If not None then ``max_depth`` will be ignored.");
186
187 DeclareOptionRef(fWarmStart, "WarmStart", "bool, optional (default=False)\
188 When set to ``True``, reuse the solution of the previous call to fit\
189 and add more estimators to the ensemble, otherwise, just fit a whole\
190 new forest.");
191
192 DeclareOptionRef(fFilenameClassifier, "FilenameClassifier",
193 "Store trained classifier in this file");
194}
195
196//_______________________________________________________________________
197// Check options and load them to local python namespace
199{
200 if (fLoss != "deviance" && fLoss != "exponential") {
201 Log() << kFATAL << Form("Loss = %s ... that does not work!", fLoss.Data())
202 << " The options are 'deviance' or 'exponential'." << Endl;
203 }
204 pLoss = Eval(Form("'%s'", fLoss.Data()));
205 PyDict_SetItemString(fLocalNS, "loss", pLoss);
206
207 if (fLearningRate <= 0) {
208 Log() << kFATAL << "LearningRate <= 0 ... that does not work!" << Endl;
209 }
211 PyDict_SetItemString(fLocalNS, "learningRate", pLearningRate);
212
213 if (fNestimators <= 0) {
214 Log() << kFATAL << "NEstimators <= 0 ... that does not work!" << Endl;
215 }
217 PyDict_SetItemString(fLocalNS, "nEstimators", pNestimators);
218
219 if (fMinSamplesSplit < 0) {
220 Log() << kFATAL << "MinSamplesSplit < 0 ... that does not work!" << Endl;
221 }
223 PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
224
225 if (fSubsample < 0) {
226 Log() << kFATAL << "Subsample < 0 ... that does not work!" << Endl;
227 }
228 pSubsample = Eval(Form("%f", fSubsample));
229 PyDict_SetItemString(fLocalNS, "subsample", pSubsample);
230
231 if (fMinSamplesLeaf < 0) {
232 Log() << kFATAL << "MinSamplesLeaf < 0 ... that does not work!" << Endl;
233 }
235 PyDict_SetItemString(fLocalNS, "minSamplesLeaf", pMinSamplesLeaf);
236
237 if (fMinSamplesSplit < 0) {
238 Log() << kFATAL << "MinSamplesSplit < 0 ... that does not work!" << Endl;
239 }
241 PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
242
243 if (fMinWeightFractionLeaf < 0) {
244 Log() << kFATAL << "MinWeightFractionLeaf < 0 ... that does not work !" << Endl;
245 }
247 PyDict_SetItemString(fLocalNS, "minWeightFractionLeaf", pMinWeightFractionLeaf);
248
249 if (fMaxDepth <= 0) {
250 Log() << kFATAL << " MaxDepth <= 0 ... that does not work !! " << Endl;
251 }
252 pMaxDepth = Eval(Form("%i", fMaxDepth));
253 PyDict_SetItemString(fLocalNS, "maxDepth", pMaxDepth);
254
255 pInit = Eval(fInit);
256 if (!pInit) {
257 Log() << kFATAL << Form("Init = %s ... that does not work!", fInit.Data())
258 << " The options are None or BaseEstimator, which is an estimator object that"
259 << "is used to compute the initial predictions. "
260 << "'init' has to provide 'fit' and 'predict' methods."
261 << " If None it uses 'loss.init_estimator'." << Endl;
262 }
263 PyDict_SetItemString(fLocalNS, "init", pInit);
264
266 if (!pRandomState) {
267 Log() << kFATAL << Form(" RandomState = %s ... that does not work! ", fRandomState.Data())
268 << " If int, random_state is the seed used by the random number generator;"
269 << " If RandomState instance, random_state is the random number generator;"
270 << " If None, the random number generator is the RandomState instance used by 'np.random'."
271 << Endl;
272 }
273 PyDict_SetItemString(fLocalNS, "randomState", pRandomState);
274
275 if (fMaxFeatures == "auto" || fMaxFeatures == "sqrt" || fMaxFeatures == "log2"){
276 fMaxFeatures = Form("'%s'", fMaxFeatures.Data());
277 }
279 PyDict_SetItemString(fLocalNS, "maxFeatures", pMaxFeatures);
280
281 if (!pMaxFeatures) {
282 Log() << kFATAL << Form(" MaxFeatures = %s... that does not work !! ", fMaxFeatures.Data())
283 << "int, float, string or None, optional (default='auto')"
284 << "The number of features to consider when looking for the best split:"
285 << "If int, then consider `max_features` features at each split."
286 << "If float, then `max_features` is a percentage and"
287 << "`int(max_features * n_features)` features are considered at each split."
288 << "If 'auto', then `max_features=sqrt(n_features)`."
289 << "If 'sqrt', then `max_features=sqrt(n_features)`."
290 << "If 'log2', then `max_features=log2(n_features)`."
291 << "If None, then `max_features=n_features`." << Endl;
292 }
293
295 if (!pMaxLeafNodes) {
296 Log() << kFATAL << Form(" MaxLeafNodes = %s... that does not work!", fMaxLeafNodes.Data())
297 << " The options are None or integer." << Endl;
298 }
299 PyDict_SetItemString(fLocalNS, "maxLeafNodes", pMaxLeafNodes);
300
301 pVerbose = Eval(Form("%i", fVerbose));
302 PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
303
305 PyDict_SetItemString(fLocalNS, "warmStart", pWarmStart);
306
307 // If no filename is given, set default
309 fFilenameClassifier = GetWeightFileDir() + "/PyGTBModel_" + GetName() + ".PyData";
310 }
311}
312
313//_______________________________________________________________________
315{
316 TMVA::Internal::PyGILRAII raii;
317 _import_array(); //require to use numpy arrays
318
319 // Check options and load them to local python namespace
321
322 // Import module for gradient tree boosting classifier
323 PyRunString("import sklearn.ensemble");
324
325 // Get data properties
328}
329
331{
332 // Load training data (data, classes, weights) to python arrays
333 int fNrowsTraining = Data()->GetNTrainingEvents(); //every row is an event, a class type and a weight
334 npy_intp dimsData[2];
335 dimsData[0] = fNrowsTraining;
336 dimsData[1] = fNvars;
337 PyArrayObject * fTrainData = (PyArrayObject *)PyArray_SimpleNew(2, dimsData, NPY_FLOAT);
338 PyDict_SetItemString(fLocalNS, "trainData", (PyObject*)fTrainData);
339 float *TrainData = (float *)(PyArray_DATA(fTrainData));
340
341 npy_intp dimsClasses = (npy_intp) fNrowsTraining;
342 PyArrayObject * fTrainDataClasses = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
343 PyDict_SetItemString(fLocalNS, "trainDataClasses", (PyObject*)fTrainDataClasses);
344 float *TrainDataClasses = (float *)(PyArray_DATA(fTrainDataClasses));
345
346 PyArrayObject * fTrainDataWeights = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
347 PyDict_SetItemString(fLocalNS, "trainDataWeights", (PyObject*)fTrainDataWeights);
348 float *TrainDataWeights = (float *)(PyArray_DATA(fTrainDataWeights));
349
350 for (int i = 0; i < fNrowsTraining; i++) {
351 // Fill training data matrix
352 const TMVA::Event *e = Data()->GetTrainingEvent(i);
353 for (UInt_t j = 0; j < fNvars; j++) {
354 TrainData[j + i * fNvars] = e->GetValue(j);
355 }
356
357 // Fill target classes
358 TrainDataClasses[i] = e->GetClass();
359
360 // Get event weight
361 TrainDataWeights[i] = e->GetWeight();
362 }
363
364 // Create classifier object
365 PyRunString("classifier = sklearn.ensemble.GradientBoostingClassifier(loss=loss, learning_rate=learningRate, n_estimators=nEstimators, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, min_weight_fraction_leaf=minWeightFractionLeaf, subsample=subsample, max_features=maxFeatures, max_leaf_nodes=maxLeafNodes, init=init, verbose=verbose, warm_start=warmStart, random_state=randomState)",
366 "Failed to setup classifier");
367
368 // Fit classifier
369 // NOTE: We dump the output to a variable so that the call does not pollute stdout
370 PyRunString("dump = classifier.fit(trainData, trainDataClasses, trainDataWeights)", "Failed to train classifier");
371
372 // Store classifier
373 fClassifier = PyDict_GetItemString(fLocalNS, "classifier");
374 if(fClassifier == 0) {
375 Log() << kFATAL << "Can't create classifier object from GradientBoostingClassifier" << Endl;
376 Log() << Endl;
377 }
378
379 if (IsModelPersistence()) {
380 Log() << Endl;
381 Log() << gTools().Color("bold") << "Saving state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
382 Log() << Endl;
384 }
385}
386
387//_______________________________________________________________________
389{
391}
392
393//_______________________________________________________________________
394std::vector<Double_t> MethodPyGTB::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
395{
396 // Load model if not already done
397 if (fClassifier == 0) ReadModelFromFile();
398
399 // Determine number of events
400 Long64_t nEvents = Data()->GetNEvents();
401 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
402 if (firstEvt < 0) firstEvt = 0;
403 nEvents = lastEvt-firstEvt;
404
405 // use timer
406 Timer timer( nEvents, GetName(), kTRUE );
407
408 if (logProgress)
409 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
410 << "Evaluation of " << GetMethodName() << " on "
411 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
412 << " sample (" << nEvents << " events)" << Endl;
413
414 // Get data
415 npy_intp dims[2];
416 dims[0] = nEvents;
417 dims[1] = fNvars;
418 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
419 float *pValue = (float *)(PyArray_DATA(pEvent));
420
421 for (Int_t ievt=0; ievt<nEvents; ievt++) {
422 Data()->SetCurrentEvent(ievt);
423 const TMVA::Event *e = Data()->GetEvent();
424 for (UInt_t i = 0; i < fNvars; i++) {
425 pValue[ievt * fNvars + i] = e->GetValue(i);
426 }
427 }
428
429 // Get prediction from classifier
430 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
431 double *proba = (double *)(PyArray_DATA(result));
432
433 // Return signal probabilities
434 if(Long64_t(mvaValues.size()) != nEvents) mvaValues.resize(nEvents);
435 for (int i = 0; i < nEvents; ++i) {
437 }
438
439 Py_DECREF(pEvent);
440 Py_DECREF(result);
441
442 if (logProgress) {
443 Log() << kINFO
444 << "Elapsed time for evaluation of " << nEvents << " events: "
445 << timer.GetElapsedTime() << " " << Endl;
446 }
447
448
449 return mvaValues;
450}
451
452//_______________________________________________________________________
454{
455 // cannot determine error
456 NoErrorCalc(errLower, errUpper);
457
458 // Load model if not already done
459 if (fClassifier == 0) ReadModelFromFile();
460
461 // Get current event and load to python array
462 const TMVA::Event *e = Data()->GetEvent();
463 npy_intp dims[2];
464 dims[0] = 1;
465 dims[1] = fNvars;
466 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
467 float *pValue = (float *)(PyArray_DATA(pEvent));
468 for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
469
470 // Get prediction from classifier
471 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
472 double *proba = (double *)(PyArray_DATA(result));
473
474 // Return MVA value
475 Double_t mvaValue;
476 mvaValue = proba[TMVA::Types::kSignal]; // getting signal probability
477
478 Py_DECREF(result);
479 Py_DECREF(pEvent);
480
481 return mvaValue;
482}
483
484//_______________________________________________________________________
485std::vector<Float_t>& MethodPyGTB::GetMulticlassValues()
486{
487 // Load model if not already done
488 if (fClassifier == 0) ReadModelFromFile();
489
490 // Get current event and load to python array
491 const TMVA::Event *e = Data()->GetEvent();
492 npy_intp dims[2];
493 dims[0] = 1;
494 dims[1] = fNvars;
495 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
496 float *pValue = (float *)(PyArray_DATA(pEvent));
497 for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
498
499 // Get prediction from classifier
500 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
501 double *proba = (double *)(PyArray_DATA(result));
502
503 // Return MVA values
504 if(UInt_t(classValues.size()) != fNoutputs) classValues.resize(fNoutputs);
505 for(UInt_t i = 0; i < fNoutputs; i++) classValues[i] = proba[i];
506
507 Py_DECREF(pEvent);
508 Py_DECREF(result);
509
510 return classValues;
511}
512
513//_______________________________________________________________________
515{
516 if (!PyIsInitialized()) {
517 PyInitialize();
518 }
519
520 Log() << Endl;
521 Log() << gTools().Color("bold") << "Loading state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
522 Log() << Endl;
523
524 // Load classifier from file
526 if(err != 0)
527 {
528 Log() << kFATAL << Form("Failed to load classifier from file (error code: %i): %s", err, fFilenameClassifier.Data()) << Endl;
529 }
530
531 // Book classifier object in python dict
532 PyDict_SetItemString(fLocalNS, "classifier", fClassifier);
533
534 // Load data properties
535 // NOTE: This has to be repeated here for the reader application
538}
539
540//_______________________________________________________________________
542{
543 // Get feature importance from classifier as an array with length equal
544 // number of variables, higher value signals a higher importance
545 PyArrayObject* pRanking = (PyArrayObject*) PyObject_GetAttrString(fClassifier, "feature_importances_");
546 if(pRanking == 0) Log() << kFATAL << "Failed to get ranking from classifier" << Endl;
547
548 // Fill ranking object and return it
549 fRanking = new Ranking(GetName(), "Variable Importance");
550 Double_t* rankingData = (Double_t*) PyArray_DATA(pRanking);
551 for(UInt_t iVar=0; iVar<fNvars; iVar++){
552 fRanking->AddRank(Rank(GetInputLabel(iVar), rankingData[iVar]));
553 }
554
555 Py_DECREF(pRanking);
556
557 return fRanking;
558}
559
560//_______________________________________________________________________
562{
563 // typical length of text line:
564 // "|--------------------------------------------------------------|"
565 Log() << "A gradient tree boosting classifier builds a model from an ensemble" << Endl;
566 Log() << "of decision trees, which are adapted each boosting step to fit better" << Endl;
567 Log() << "to previously misclassified events." << Endl;
568 Log() << Endl;
569 Log() << "Check out the scikit-learn documentation for more information." << Endl;
570}
571
572
#define REGISTER_METHOD(CLASS)
for example
_object PyObject
#define e(i)
Definition RSha256.hxx:103
unsigned int UInt_t
Definition RtypesCore.h:46
const Bool_t kFALSE
Definition RtypesCore.h:92
long long Long64_t
Definition RtypesCore.h:73
const Bool_t kTRUE
Definition RtypesCore.h:91
#define ClassImp(name)
Definition Rtypes.h:364
int type
Definition TGX11.cxx:121
char * Form(const char *fmt,...)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
MsgLogger & Log() const
Class that contains all the data information.
Definition DataSetInfo.h:62
UInt_t GetNClasses() const
const Event * GetEvent() const
Definition DataSet.cxx:202
Types::ETreeType GetCurrentType() const
Definition DataSet.h:194
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition DataSet.h:206
Long64_t GetNTrainingEvents() const
Definition DataSet.h:68
void SetCurrentEvent(Long64_t ievt) const
Definition DataSet.h:88
const Event * GetTrainingEvent(Long64_t ievt) const
Definition DataSet.h:74
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition Event.cxx:236
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
const char * GetName() const
Definition MethodBase.h:333
Bool_t IsModelPersistence() const
Definition MethodBase.h:382
const TString & GetWeightFileDir() const
Definition MethodBase.h:490
const TString & GetMethodName() const
Definition MethodBase.h:330
DataSetInfo & DataInfo() const
Definition MethodBase.h:409
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
Definition MethodBase.h:344
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
const TString & GetInputLabel(Int_t i) const
Definition MethodBase.h:349
Ranking * fRanking
Definition MethodBase.h:585
DataSet * Data() const
Definition MethodBase.h:408
Double_t fSubsample
Definition MethodPyGTB.h:99
PyObject * pMinSamplesLeaf
Double_t fMinWeightFractionLeaf
std::vector< Double_t > mvaValues
Definition MethodPyGTB.h:73
PyObject * pMaxFeatures
std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
PyObject * pMaxDepth
PyObject * pMaxLeafNodes
std::vector< Float_t > classValues
Definition MethodPyGTB.h:74
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Double_t fLearningRate
Definition MethodPyGTB.h:90
void GetHelpMessage() const
MethodPyGTB(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
PyObject * pLearningRate
Definition MethodPyGTB.h:89
const Ranking * CreateRanking()
virtual void TestClassification()
initialization
std::vector< Float_t > & GetMulticlassValues()
virtual void ReadModelFromFile()
PyObject * pNestimators
Definition MethodPyGTB.h:94
PyObject * pVerbose
TString fFilenameClassifier
Definition MethodPyGTB.h:78
PyObject * pMinSamplesSplit
PyObject * pLoss
Definition MethodPyGTB.h:82
PyObject * pSubsample
Definition MethodPyGTB.h:98
PyObject * pRandomState
PyObject * pWarmStart
PyObject * pMinWeightFractionLeaf
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
static int PyIsInitialized()
Check Python interpreter initialization status.
PyObject * Eval(TString code)
Evaluate Python code.
static void PyInitialize()
Initialize Python interpreter.
static void Serialize(TString file, PyObject *classifier)
Serialize Python object.
static Int_t UnSerialize(TString file, PyObject **obj)
Unserialize Python object.
PyObject * fClassifier
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=Py_single_input)
Execute Python code from string.
Ranking for variables in method (implementation)
Definition Ranking.h:48
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
Definition Ranking.cxx:86
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition Timer.cxx:146
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:840
Singleton class for Global types used by TMVA.
Definition Types.h:73
@ kMulticlass
Definition Types.h:131
@ kClassification
Definition Types.h:129
@ kTraining
Definition Types.h:145
Basic string class.
Definition TString.h:136
const char * Data() const
Definition TString.h:369
Bool_t IsNull() const
Definition TString.h:407
create variable transformations
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:158