23 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION    24 #include <numpy/arrayobject.h>    57                                    const TString &methodTitle,
    59                                    const TString &theOption) :
    61    fBaseEstimator(
"None"),
    64    fAlgorithm(
"SAMME.R"),
    71                                    const TString &theWeightFile) :
   100       The base estimator from which the boosted ensemble is built.\   101       Support for sample weighting is required, as well as proper `classes_`\   102       and `n_classes_` attributes.");
   105       The maximum number of estimators at which boosting is terminated.\   106       In case of perfect fit, the learning procedure is stopped early.");
   109       Learning rate shrinks the contribution of each classifier by\   110       ``learning_rate``. There is a trade-off between ``learning_rate`` and\   114       If 'SAMME.R' then use the SAMME.R real boosting algorithm.\   115       ``base_estimator`` must support calculation of class probabilities.\   116       If 'SAMME' then use the SAMME discrete boosting algorithm.\   117       The SAMME.R algorithm typically converges faster than SAMME,\   118       achieving a lower test error with fewer boosting iterations.");
   121       If int, random_state is the seed used by the random number generator;\   122       If RandomState instance, random_state is the random number generator;\   123       If None, the random number generator is the RandomState instance used\   127       "Store trained classifier in this file");
   137             << 
" The options are Object or None." << 
Endl;
   142       Log() << kFATAL << 
"NEstimators <=0 ... that does not work!" << 
Endl;
   148       Log() << kFATAL << 
"LearningRate <=0 ... that does not work!" << 
Endl;
   154       Log() << kFATAL << 
Form(
"Algorithm = %s ... that does not work!", 
fAlgorithm.Data())
   155             << 
" The options are SAMME of SAMME.R." << 
Endl;
   158    PyDict_SetItemString(
fLocalNS, 
"algorithm", pAlgorithm);
   162       Log() << kFATAL << 
Form(
" RandomState = %s... that does not work !! ", 
fRandomState.Data())
   163             << 
"If int, random_state is the seed used by the random number generator;"   164             << 
"If RandomState instance, random_state is the random number generator;"   165             << 
"If None, the random number generator is the RandomState instance used by `np.random`." << 
Endl;
   196    npy_intp dimsData[2];
   197    dimsData[0] = fNrowsTraining;
   199    fTrainData = (PyArrayObject *)PyArray_SimpleNew(2, dimsData, NPY_FLOAT);
   201    float *TrainData = (
float *)(PyArray_DATA(fTrainData));
   203    npy_intp dimsClasses = (npy_intp) fNrowsTraining;
   204    fTrainDataClasses = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
   206    float *TrainDataClasses = (
float *)(PyArray_DATA(fTrainDataClasses));
   208    fTrainDataWeights = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
   210    float *TrainDataWeights = (
float *)(PyArray_DATA(fTrainDataWeights));
   212    for (
int i = 0; i < fNrowsTraining; i++) {
   220       TrainDataClasses[i] = e->
GetClass();
   227    PyRunString(
"classifier = sklearn.ensemble.AdaBoostClassifier(base_estimator=baseEstimator, n_estimators=nEstimators, learning_rate=learningRate, algorithm=algorithm, random_state=randomState)",
   228       "Failed to setup classifier");
   232    PyRunString(
"dump = classifier.fit(trainData, trainDataClasses, trainDataWeights)", 
"Failed to train classifier");
   237       Log() << kFATAL << 
"Can't create classifier object from AdaBoostClassifier" << 
Endl;
   263    if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
   264    if (firstEvt < 0) firstEvt = 0;
   265    nEvents = lastEvt-firstEvt;
   271    PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
   272    float *pValue = (
float *)(PyArray_DATA(pEvent));
   274    for (
Int_t ievt=0; ievt<nEvents; ievt++) {
   278          pValue[ievt * fNvars + i] = e->
GetValue(i);
   283    PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(
fClassifier, const_cast<char *>(
"predict_proba"), 
const_cast<char *
>(
"(O)"), pEvent);
   284    double *proba = (
double *)(PyArray_DATA(result));
   288    for (
int i = 0; i < nEvents; ++i) {
   312    PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
   313    float *pValue = (
float *)(PyArray_DATA(pEvent));
   317    PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(
fClassifier, const_cast<char *>(
"predict_proba"), 
const_cast<char *
>(
"(O)"), pEvent);
   318    double *proba = (
double *)(PyArray_DATA(result));
   341    PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
   342    float *pValue = (
float *)(PyArray_DATA(pEvent));
   346    PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(
fClassifier, const_cast<char *>(
"predict_proba"), 
const_cast<char *
>(
"(O)"), pEvent);
   347    double *proba = (
double *)(PyArray_DATA(result));
   388    PyArrayObject* pRanking = (PyArrayObject*) PyObject_GetAttrString(
fClassifier, 
"feature_importances_");
   391    if(pRanking == 0) 
return NULL;
   410    Log() << 
"An AdaBoost classifier is a meta-estimator that begins by fitting" << 
Endl;
   411    Log() << 
"a classifier on the original dataset and then fits additional copies" << 
Endl;
   412    Log() << 
"of the classifier on the same dataset but where the weights of incorrectly" << 
Endl;
   413    Log() << 
"classified instances are adjusted such that subsequent classifiers focus" << 
Endl;
   414    Log() << 
"more on difficult cases." << 
Endl;
   416    Log() << 
"Check out the scikit-learn documentation for more information." << 
Endl;
 
void SetCurrentEvent(Long64_t ievt) const
MsgLogger & Endl(MsgLogger &ml)
Singleton class for Global types used by TMVA. 
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Ranking for variables in method (implementation) 
UInt_t GetNClasses() const
virtual void TestClassification()
initialization 
static void Serialize(TString file, PyObject *classifier)
Serialize Python object. 
PyArrayObject * fTrainDataClasses
static int PyIsInitialized()
Check Python interpreter initialization status. 
static void PyInitialize()
Initialize Python interpreter. 
const TString & GetInputLabel(Int_t i) const
const TString & GetWeightFileDir() const
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=Py_single_input)
Execute Python code from string. 
std::vector< Float_t > & GetMulticlassValues()
PyObject * Eval(TString code)
Evaluate Python code. 
DataSetInfo & DataInfo() const
Class that contains all the data information. 
PyArrayObject * fTrainDataWeights
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Long64_t GetNTrainingEvents() const
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type 
const Event * GetTrainingEvent(Long64_t ievt) const
const char * GetName() const
char * Form(const char *fmt,...)
void GetHelpMessage() const
PyArrayObject * fTrainData
UInt_t GetNVariables() const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable 
TString fFilenameClassifier
static Int_t UnSerialize(TString file, PyObject **obj)
Unserialize Python object. 
PyObject * pBaseEstimator
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
#define REGISTER_METHOD(CLASS)
for example 
Abstract ClassifierFactory template that handles arbitrary types. 
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it. 
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
std::vector< Float_t > classValues
virtual void ReadModelFromFile()
std::vector< Double_t > mvaValues
MethodPyAdaBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
virtual void TestClassification()
initialization 
const Event * GetEvent() const
const Ranking * CreateRanking()
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Bool_t IsModelPersistence()