Logo ROOT  
Reference Guide
MethodPyKeras.cxx
Go to the documentation of this file.
1// @(#)root/tmva/pymva $Id$
2// Author: Stefan Wunsch, 2016
3
4#include <Python.h>
6
7#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
8#include <numpy/arrayobject.h>
9
10#include "TMVA/Types.h"
11#include "TMVA/Config.h"
13#include "TMVA/Results.h"
16#include "TMVA/Tools.h"
17#include "TMVA/Timer.h"
18
19using namespace TMVA;
20
21namespace TMVA {
22namespace Internal {
23class PyGILRAII {
24 PyGILState_STATE m_GILState;
25
26public:
27 PyGILRAII() : m_GILState(PyGILState_Ensure()) {}
28 ~PyGILRAII() { PyGILState_Release(m_GILState); }
29};
30} // namespace Internal
31} // namespace TMVA
32
33REGISTER_METHOD(PyKeras)
34
36
37MethodPyKeras::MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption)
38 : PyMethodBase(jobName, Types::kPyKeras, methodTitle, dsi, theOption) {
39 fNumEpochs = 10;
40 fBatchSize = 100;
41 fVerbose = 1;
42 fContinueTraining = false;
43 fSaveBestOnly = true;
45 fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler
46 fFilenameTrainedModel = ""; // empty string sets output model filename to default (in weights/)
47 fTensorBoard = ""; // empty string deactivates TensorBoard callback
48}
49
50MethodPyKeras::MethodPyKeras(DataSetInfo &theData, const TString &theWeightFile)
51 : PyMethodBase(Types::kPyKeras, theData, theWeightFile) {
52 fNumEpochs = 10;
53 fNumThreads = 0;
54 fBatchSize = 100;
55 fVerbose = 1;
56 fContinueTraining = false;
57 fSaveBestOnly = true;
59 fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler
60 fFilenameTrainedModel = ""; // empty string sets output model filename to default (in weights/)
61 fTensorBoard = ""; // empty string deactivates TensorBoard callback
62}
63
65}
66
68 if (type == Types::kRegression) return kTRUE;
69 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
70 if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;
71 return kFALSE;
72}
73
74///////////////////////////////////////////////////////////////////////////////
75
77 DeclareOptionRef(fFilenameModel, "FilenameModel", "Filename of the initial Keras model");
78 DeclareOptionRef(fFilenameTrainedModel, "FilenameTrainedModel", "Filename of the trained output Keras model");
79 DeclareOptionRef(fBatchSize, "BatchSize", "Training batch size");
80 DeclareOptionRef(fNumEpochs, "NumEpochs", "Number of training epochs");
81 DeclareOptionRef(fNumThreads, "NumThreads", "Number of CPU threads (only for Tensorflow backend)");
82 DeclareOptionRef(fGpuOptions, "GpuOptions", "GPU options for tensorflow, such as allow_growth");
83 DeclareOptionRef(fVerbose, "Verbose", "Keras verbosity during training");
84 DeclareOptionRef(fContinueTraining, "ContinueTraining", "Load weights from previous training");
85 DeclareOptionRef(fSaveBestOnly, "SaveBestOnly", "Store only weights with smallest validation loss");
86 DeclareOptionRef(fTriesEarlyStopping, "TriesEarlyStopping", "Number of epochs with no improvement in validation loss after which training will be stopped. The default or a negative number deactivates this option.");
87 DeclareOptionRef(fLearningRateSchedule, "LearningRateSchedule", "Set new learning rate during training at specific epochs, e.g., \"50,0.01;70,0.005\"");
88 DeclareOptionRef(fTensorBoard, "TensorBoard",
89 "Write a log during training to visualize and monitor the training performance with TensorBoard");
90 DeclareOptionRef(fTensorBoard, "TensorBoard",
91 "Write a log during training to visualize and monitor the training performance with TensorBoard");
92
93 DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
94 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
95 "Specify as 100 to use exactly 100 events. (Default: 20%)");
96
97}
98
99
100////////////////////////////////////////////////////////////////////////////////
101/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
102/// 100 etc.
103/// - 20% and 0.2 selects 20% of the training set as validation data.
104/// - 100 selects 100 events as the validation data.
105///
106/// @return number of samples in validation set
107///
109{
110 Int_t nValidationSamples = 0;
111 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
112
113 // Parsing + Validation
114 // --------------------
115 if (fNumValidationString.EndsWith("%")) {
116 // Relative spec. format 20%
117 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
118
119 if (intValStr.IsFloat()) {
120 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
121 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
122 } else {
123 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
124 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
125 }
126 } else if (fNumValidationString.IsFloat()) {
127 Double_t valSizeAsDouble = fNumValidationString.Atof();
128
129 if (valSizeAsDouble < 1.0) {
130 // Relative spec. format 0.2
131 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
132 } else {
133 // Absolute spec format 100 or 100.0
134 nValidationSamples = valSizeAsDouble;
135 }
136 } else {
137 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
138 << Endl;
139 }
140
141 // Value validation
142 // ----------------
143 if (nValidationSamples < 0) {
144 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
145 }
146
147 if (nValidationSamples == 0) {
148 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
149 }
150
151 if (nValidationSamples >= (Int_t)trainingSetSize) {
152 Log() << kFATAL << "Validation size \"" << fNumValidationString
153 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
154 }
155
156 return nValidationSamples;
157}
158
160 // Set default filename for trained model if option is not used
162 fFilenameTrainedModel = GetWeightFileDir() + "/TrainedModel_" + GetName() + ".h5";
163 }
164
165 // set here some specific options for Tensorflow backend
166 // - when using tensorflow gpu set option to allow memory growth to avoid allocating all memory
167 // - set up number of threads for CPU if NumThreads option was specified
168
169 // check first if using tensorflow backend
170 if (GetKerasBackend() == kTensorFlow) {
171 Log() << kINFO << "Using TensorFlow backend - setting special configuration options " << Endl;
172 PyRunString("import tensorflow as tf");
173 PyRunString("from keras.backend import tensorflow_backend as K");
174
175 // check tensorflow version
176 PyRunString("tf_major_version = int(tf.__version__.split('.')[0])");
177 //PyRunString("print(tf.__version__,'major is ',tf_major_version)");
178 PyObject *pyTfVersion = PyDict_GetItemString(fLocalNS, "tf_major_version");
179 int tfVersion = PyLong_AsLong(pyTfVersion);
180 Log() << kINFO << "Using Tensorflow version " << tfVersion << Endl;
181
182 // use different naming in tf2 for ConfigProto and Session
183 TString configProto = (tfVersion >= 2) ? "tf.compat.v1.ConfigProto" : "tf.ConfigProto";
184 TString session = (tfVersion >= 2) ? "tf.compat.v1.Session" : "tf.Session";
185
186 // in case specify number of threads
187 int num_threads = fNumThreads;
188 if (num_threads > 0) {
189 Log() << kINFO << "Setting the CPU number of threads = " << num_threads << Endl;
190
191 PyRunString(TString::Format("session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)",
192 configProto.Data(), num_threads,num_threads));
193 }
194 else
195 PyRunString(TString::Format("session_conf = %s()",configProto.Data()));
196
197 // applying GPU options such as allow_growth=True to avoid allocating all memory on GPU
198 // that prevents running later TMVA-GPU
199 // Also new Nvidia RTX cards (e.g. RTX 2070) require this option
200 if (!fGpuOptions.IsNull() ) {
201 TObjArray * optlist = fGpuOptions.Tokenize(",");
202 for (int item = 0; item < optlist->GetEntries(); ++item) {
203 Log() << kINFO << "Applying GPU option: gpu_options." << optlist->At(item)->GetName() << Endl;
204 PyRunString(TString::Format("session_conf.gpu_options.%s", optlist->At(item)->GetName()));
205 }
206 }
207 PyRunString(TString::Format("sess = %s(config=session_conf)", session.Data()));
208
209 if (tfVersion < 2) {
210 PyRunString("K.set_session(sess)");
211 } else {
212 PyRunString("tf.compat.v1.keras.backend.set_session(sess)");
213 }
214 }
215 else {
216 if (fNumThreads > 0)
217 Log() << kWARNING << "Cannot set the given " << fNumThreads << " threads when not using tensorflow as backend" << Endl;
218 if (!fGpuOptions.IsNull() ) {
219 Log() << kWARNING << "Cannot set the given GPU option " << fGpuOptions << " when not using tensorflow as backend" << Endl;
220 }
221 }
222
223 // Setup model, either the initial model from `fFilenameModel` or
224 // the trained model from `fFilenameTrainedModel`
225 if (fContinueTraining) Log() << kINFO << "Continue training with trained model" << Endl;
227}
228
229void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) {
230 /*
231 * Load Keras model from file
232 */
233
234 // Load initial model or already trained model
235 TString filenameLoadModel;
236 if (loadTrainedModel) {
237 filenameLoadModel = fFilenameTrainedModel;
238 }
239 else {
240 filenameLoadModel = fFilenameModel;
241 }
242 PyRunString("model = keras.models.load_model('"+filenameLoadModel+"')",
243 "Failed to load Keras model from file: "+filenameLoadModel);
244 Log() << kINFO << "Load model from file: " << filenameLoadModel << Endl;
245
246
247 /*
248 * Init variables and weights
249 */
250
251 // Get variables, classes and target numbers
255 else Log() << kFATAL << "Selected analysis type is not implemented" << Endl;
256
257 // Init evaluation (needed for getMvaValue)
258 fVals = new float[fNVars]; // holds values used for classification and regression
259 npy_intp dimsVals[2] = {(npy_intp)1, (npy_intp)fNVars};
260 PyArrayObject* pVals = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsVals, NPY_FLOAT, (void*)fVals);
261 PyDict_SetItemString(fLocalNS, "vals", (PyObject*)pVals);
262
263 fOutput.resize(fNOutputs); // holds classification probabilities or regression output
264 npy_intp dimsOutput[2] = {(npy_intp)1, (npy_intp)fNOutputs};
265 PyArrayObject* pOutput = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsOutput, NPY_FLOAT, (void*)&fOutput[0]);
266 PyDict_SetItemString(fLocalNS, "output", (PyObject*)pOutput);
267
268 // Mark the model as setup
269 fModelIsSetup = true;
270}
271
273
274 TMVA::Internal::PyGILRAII raii;
275
276 if (!PyIsInitialized()) {
277 Log() << kFATAL << "Python is not initialized" << Endl;
278 }
279 _import_array(); // required to use numpy arrays
280
281 // Import Keras
282 // NOTE: sys.argv has to be cleared because otherwise TensorFlow breaks
283 PyRunString("import sys; sys.argv = ['']", "Set sys.argv failed");
284 PyRunString("import keras", "Import Keras failed");
285
286 // Set flag that model is not setup
287 fModelIsSetup = false;
288}
289
291 if(!fModelIsSetup) Log() << kFATAL << "Model is not setup for training" << Endl;
292
293 /*
294 * Load training data to numpy array
295 */
296
297 UInt_t nAllEvents = Data()->GetNTrainingEvents();
298 UInt_t nValEvents = GetNumValidationSamples();
299 UInt_t nTrainingEvents = nAllEvents - nValEvents;
300
301 Log() << kINFO << "Split TMVA training data in " << nTrainingEvents << " training events and "
302 << nValEvents << " validation events" << Endl;
303
304 float* trainDataX = new float[nTrainingEvents*fNVars];
305 float* trainDataY = new float[nTrainingEvents*fNOutputs];
306 float* trainDataWeights = new float[nTrainingEvents];
307 for (UInt_t i=0; i<nTrainingEvents; i++) {
308 const TMVA::Event* e = GetTrainingEvent(i);
309 // Fill variables
310 for (UInt_t j=0; j<fNVars; j++) {
311 trainDataX[j + i*fNVars] = e->GetValue(j);
312 }
313 // Fill targets
314 // NOTE: For classification, convert class number in one-hot vector,
315 // e.g., 1 -> [0, 1] or 0 -> [1, 0] for binary classification
317 for (UInt_t j=0; j<fNOutputs; j++) {
318 trainDataY[j + i*fNOutputs] = 0;
319 }
320 trainDataY[e->GetClass() + i*fNOutputs] = 1;
321 }
322 else if (GetAnalysisType() == Types::kRegression) {
323 for (UInt_t j=0; j<fNOutputs; j++) {
324 trainDataY[j + i*fNOutputs] = e->GetTarget(j);
325 }
326 }
327 else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl;
328 // Fill weights
329 // NOTE: If no weight branch is given, this defaults to ones for all events
330 trainDataWeights[i] = e->GetWeight();
331 }
332
333 npy_intp dimsTrainX[2] = {(npy_intp)nTrainingEvents, (npy_intp)fNVars};
334 npy_intp dimsTrainY[2] = {(npy_intp)nTrainingEvents, (npy_intp)fNOutputs};
335 npy_intp dimsTrainWeights[1] = {(npy_intp)nTrainingEvents};
336 PyArrayObject* pTrainDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainX, NPY_FLOAT, (void*)trainDataX);
337 PyArrayObject* pTrainDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainY, NPY_FLOAT, (void*)trainDataY);
338 PyArrayObject* pTrainDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsTrainWeights, NPY_FLOAT, (void*)trainDataWeights);
339 PyDict_SetItemString(fLocalNS, "trainX", (PyObject*)pTrainDataX);
340 PyDict_SetItemString(fLocalNS, "trainY", (PyObject*)pTrainDataY);
341 PyDict_SetItemString(fLocalNS, "trainWeights", (PyObject*)pTrainDataWeights);
342
343 /*
344 * Load validation data to numpy array
345 */
346
347 // NOTE: from TMVA, we get the validation data as a subset of all the training data
348 // we will not use test data for validation. They will be used for the real testing
349
350
351 float* valDataX = new float[nValEvents*fNVars];
352 float* valDataY = new float[nValEvents*fNOutputs];
353 float* valDataWeights = new float[nValEvents];
354 //validation events follows the trainig one in the TMVA training vector
355 for (UInt_t i=0; i< nValEvents ; i++) {
356 UInt_t ievt = nTrainingEvents + i; // TMVA event index
357 const TMVA::Event* e = GetTrainingEvent(ievt);
358 // Fill variables
359 for (UInt_t j=0; j<fNVars; j++) {
360 valDataX[j + i*fNVars] = e->GetValue(j);
361 }
362 // Fill targets
364 for (UInt_t j=0; j<fNOutputs; j++) {
365 valDataY[j + i*fNOutputs] = 0;
366 }
367 valDataY[e->GetClass() + i*fNOutputs] = 1;
368 }
369 else if (GetAnalysisType() == Types::kRegression) {
370 for (UInt_t j=0; j<fNOutputs; j++) {
371 valDataY[j + i*fNOutputs] = e->GetTarget(j);
372 }
373 }
374 else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl;
375 // Fill weights
376 valDataWeights[i] = e->GetWeight();
377 }
378
379 npy_intp dimsValX[2] = {(npy_intp)nValEvents, (npy_intp)fNVars};
380 npy_intp dimsValY[2] = {(npy_intp)nValEvents, (npy_intp)fNOutputs};
381 npy_intp dimsValWeights[1] = {(npy_intp)nValEvents};
382 PyArrayObject* pValDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValX, NPY_FLOAT, (void*)valDataX);
383 PyArrayObject* pValDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValY, NPY_FLOAT, (void*)valDataY);
384 PyArrayObject* pValDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsValWeights, NPY_FLOAT, (void*)valDataWeights);
385 PyDict_SetItemString(fLocalNS, "valX", (PyObject*)pValDataX);
386 PyDict_SetItemString(fLocalNS, "valY", (PyObject*)pValDataY);
387 PyDict_SetItemString(fLocalNS, "valWeights", (PyObject*)pValDataWeights);
388
389 /*
390 * Train Keras model
391 */
392 Log() << kINFO << "Training Model Summary" << Endl;
393 PyRunString("model.summary()");
394
395 // Setup parameters
396
397 PyObject* pBatchSize = PyLong_FromLong(fBatchSize);
398 PyObject* pNumEpochs = PyLong_FromLong(fNumEpochs);
399 PyObject* pVerbose = PyLong_FromLong(fVerbose);
400 PyDict_SetItemString(fLocalNS, "batchSize", pBatchSize);
401 PyDict_SetItemString(fLocalNS, "numEpochs", pNumEpochs);
402 PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
403
404 // Setup training callbacks
405 PyRunString("callbacks = []");
406
407 // Callback: Save only weights with smallest validation loss
408 if (fSaveBestOnly) {
409 PyRunString("callbacks.append(keras.callbacks.ModelCheckpoint('"+fFilenameTrainedModel+"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))", "Failed to setup training callback: SaveBestOnly");
410 Log() << kINFO << "Option SaveBestOnly: Only model weights with smallest validation loss will be stored" << Endl;
411 }
412
413 // Callback: Stop training early if no improvement in validation loss is observed
414 if (fTriesEarlyStopping>=0) {
415 TString tries;
416 tries.Form("%i", fTriesEarlyStopping);
417 PyRunString("callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss', patience="+tries+", verbose=verbose, mode='auto'))", "Failed to setup training callback: TriesEarlyStopping");
418 Log() << kINFO << "Option TriesEarlyStopping: Training will stop after " << tries << " number of epochs with no improvement of validation loss" << Endl;
419 }
420
421 // Callback: Learning rate scheduler
422 if (fLearningRateSchedule!="") {
423 // Setup a python dictionary with the desired learning rate steps
424 PyRunString("strScheduleSteps = '"+fLearningRateSchedule+"'\n"
425 "schedulerSteps = {}\n"
426 "for c in strScheduleSteps.split(';'):\n"
427 " x = c.split(',')\n"
428 " schedulerSteps[int(x[0])] = float(x[1])\n",
429 "Failed to setup steps for scheduler function from string: "+fLearningRateSchedule,
430 Py_file_input);
431 // Set scheduler function as piecewise function with given steps
432 PyRunString("def schedule(epoch, model=model, schedulerSteps=schedulerSteps):\n"
433 " if epoch in schedulerSteps: return float(schedulerSteps[epoch])\n"
434 " else: return float(model.optimizer.lr.get_value())\n",
435 "Failed to setup scheduler function with string: "+fLearningRateSchedule,
436 Py_file_input);
437 // Setup callback
438 PyRunString("callbacks.append(keras.callbacks.LearningRateScheduler(schedule))",
439 "Failed to setup training callback: LearningRateSchedule");
440 Log() << kINFO << "Option LearningRateSchedule: Set learning rate during training: " << fLearningRateSchedule << Endl;
441 }
442
443 // Callback: TensorBoard
444 if (fTensorBoard != "") {
445 TString logdir = TString("'") + fTensorBoard + TString("'");
447 "callbacks.append(keras.callbacks.TensorBoard(log_dir=" + logdir +
448 ", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))",
449 "Failed to setup training callback: TensorBoard");
450 Log() << kINFO << "Option TensorBoard: Log files for training monitoring are stored in: " << logdir << Endl;
451 }
452
453 // Train model
454 PyRunString("history = model.fit(trainX, trainY, sample_weight=trainWeights, batch_size=batchSize, epochs=numEpochs, verbose=verbose, validation_data=(valX, valY, valWeights), callbacks=callbacks)",
455 "Failed to train model");
456
457
458 std::vector<float> fHistory; // Hold training history (val_acc or loss etc)
459 fHistory.resize(fNumEpochs); // holds training loss or accuracy output
460 npy_intp dimsHistory[1] = { (npy_intp)fNumEpochs};
461 PyArrayObject* pHistory = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsHistory, NPY_FLOAT, (void*)&fHistory[0]);
462 PyDict_SetItemString(fLocalNS, "HistoryOutput", (PyObject*)pHistory);
463
464 // Store training history data
465 Int_t iHis=0;
466 PyRunString("number_of_keys=len(history.history.keys())");
467 PyObject* PyNkeys=PyDict_GetItemString(fLocalNS, "number_of_keys");
468 int nkeys=PyLong_AsLong(PyNkeys);
469 for (iHis=0; iHis<nkeys; iHis++) {
470
471 PyRunString(TString::Format("copy_string=str(list(history.history.keys())[%d])",iHis));
472 //PyRunString("print (copy_string)");
473 PyObject* stra=PyDict_GetItemString(fLocalNS, "copy_string");
474 if(!stra) break;
475#if PY_MAJOR_VERSION < 3 // for Python2
476 const char *stra_name = PyBytes_AsString(stra);
477 // need to add string delimiter for Python2
478 TString sname = TString::Format("'%s'",stra_name);
479 const char * name = sname.Data();
480#else // for Python3
481 PyObject* repr = PyObject_Repr(stra);
482 PyObject* str = PyUnicode_AsEncodedString(repr, "utf-8", "~E~");
483 const char *name = PyBytes_AsString(str);
484#endif
485
486 Log() << kINFO << "Getting training history for item:" << iHis << " name = " << name << Endl;
487 PyRunString(TString::Format("for i,p in enumerate(history.history[%s]):\n HistoryOutput[i]=p\n",name),
488 TString::Format("Failed to get %s from training history",name));
489 for (size_t i=0; i<fHistory.size(); i++)
490 fTrainHistory.AddValue(name,i+1,fHistory[i]);
491
492 }
493//#endif
494
495 /*
496 * Store trained model to file (only if option 'SaveBestOnly' is NOT activated,
497 * because we do not want to override the best model checkpoint)
498 */
499
500 if (!fSaveBestOnly) {
501 PyRunString("model.save('"+fFilenameTrainedModel+"', overwrite=True)",
502 "Failed to save trained model: "+fFilenameTrainedModel);
503 Log() << kINFO << "Trained model written to file: " << fFilenameTrainedModel << Endl;
504 }
505
506 /*
507 * Clean-up
508 */
509
510 delete[] trainDataX;
511 delete[] trainDataY;
512 delete[] trainDataWeights;
513 delete[] valDataX;
514 delete[] valDataY;
515 delete[] valDataWeights;
516}
517
520}
521
523 // Cannot determine error
524 NoErrorCalc(errLower, errUpper);
525
526 // Check whether the model is setup
527 // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
528 if (!fModelIsSetup) {
529 // Setup the trained model
530 SetupKerasModel(true);
531 }
532
533 // Get signal probability (called mvaValue here)
534 const TMVA::Event* e = GetEvent();
535 for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
536 PyRunString("for i,p in enumerate(model.predict(vals)): output[i]=p\n",
537 "Failed to get predictions");
538
540}
541
542std::vector<Double_t> MethodPyKeras::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress) {
543 // Check whether the model is setup
544 // NOTE: Unfortunately this is needed because during evaluation ProcessOptions is not called again
545 if (!fModelIsSetup) {
546 // Setup the trained model
547 SetupKerasModel(true);
548 }
549
550 // Load data to numpy array
551 Long64_t nEvents = Data()->GetNEvents();
552 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
553 if (firstEvt < 0) firstEvt = 0;
554 nEvents = lastEvt-firstEvt;
555
556 // use timer
557 Timer timer( nEvents, GetName(), kTRUE );
558
559 if (logProgress)
560 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
561 << "Evaluation of " << GetMethodName() << " on "
562 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
563 << " sample (" << nEvents << " events)" << Endl;
564
565 float* data = new float[nEvents*fNVars];
566 for (UInt_t i=0; i<nEvents; i++) {
567 Data()->SetCurrentEvent(i);
568 const TMVA::Event *e = GetEvent();
569 for (UInt_t j=0; j<fNVars; j++) {
570 data[j + i*fNVars] = e->GetValue(j);
571 }
572 }
573
574 npy_intp dimsData[2] = {(npy_intp)nEvents, (npy_intp)fNVars};
575 PyArrayObject* pDataMvaValues = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsData, NPY_FLOAT, (void*)data);
576 if (pDataMvaValues==0) Log() << "Failed to load data to Python array" << Endl;
577
578 // Get prediction for all events
579 PyObject* pModel = PyDict_GetItemString(fLocalNS, "model");
580 if (pModel==0) Log() << kFATAL << "Failed to get model Python object" << Endl;
581 PyArrayObject* pPredictions = (PyArrayObject*) PyObject_CallMethod(pModel, (char*)"predict", (char*)"O", pDataMvaValues);
582 if (pPredictions==0) Log() << kFATAL << "Failed to get predictions" << Endl;
583 delete[] data;
584
585 // Load predictions to double vector
586 // NOTE: The signal probability is given at the output
587 std::vector<double> mvaValues(nEvents);
588 float* predictionsData = (float*) PyArray_DATA(pPredictions);
589 for (UInt_t i=0; i<nEvents; i++) {
590 mvaValues[i] = (double) predictionsData[i*fNOutputs + TMVA::Types::kSignal];
591 }
592
593 if (logProgress) {
594 Log() << kINFO
595 << "Elapsed time for evaluation of " << nEvents << " events: "
596 << timer.GetElapsedTime() << " " << Endl;
597 }
598
599
600 return mvaValues;
601}
602
603std::vector<Float_t>& MethodPyKeras::GetRegressionValues() {
604 // Check whether the model is setup
605 // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
606 if (!fModelIsSetup){
607 // Setup the model and load weights
608 SetupKerasModel(true);
609 }
610
611 // Get regression values
612 const TMVA::Event* e = GetEvent();
613 for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
614 PyRunString("for i,p in enumerate(model.predict(vals)): output[i]=p\n",
615 "Failed to get predictions");
616
617 // Use inverse transformation of targets to get final regression values
618 Event * eTrans = new Event(*e);
619 for (UInt_t i=0; i<fNOutputs; ++i) {
620 eTrans->SetTarget(i,fOutput[i]);
621 }
622
623 const Event* eTrans2 = GetTransformationHandler().InverseTransform(eTrans);
624 for (UInt_t i=0; i<fNOutputs; ++i) {
625 fOutput[i] = eTrans2->GetTarget(i);
626 }
627
628 return fOutput;
629}
630
631std::vector<Float_t>& MethodPyKeras::GetMulticlassValues() {
632 // Check whether the model is setup
633 // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
634 if (!fModelIsSetup){
635 // Setup the model and load weights
636 SetupKerasModel(true);
637 }
638
639 // Get class probabilites
640 const TMVA::Event* e = GetEvent();
641 for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
642 PyRunString("for i,p in enumerate(model.predict(vals)): output[i]=p\n",
643 "Failed to get predictions");
644
645 return fOutput;
646}
647
649}
650
652// typical length of text line:
653// "|--------------------------------------------------------------|"
654 Log() << Endl;
655 Log() << "Keras is a high-level API for the Theano and Tensorflow packages." << Endl;
656 Log() << "This method wraps the training and predictions steps of the Keras" << Endl;
657 Log() << "Python package for TMVA, so that dataloading, preprocessing and" << Endl;
658 Log() << "evaluation can be done within the TMVA system. To use this Keras" << Endl;
659 Log() << "interface, you have to generate a model with Keras first. Then," << Endl;
660 Log() << "this model can be loaded and trained in TMVA." << Endl;
661 Log() << Endl;
662}
663
665 // get the keras backend
666 // check first if using tensorflow backend
667 PyRunString("keras_backend_is_set = keras.backend.backend() == \"tensorflow\"");
668 PyObject * keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
669 if (keras_backend != nullptr && keras_backend == Py_True)
670 return kTensorFlow;
671
672 PyRunString("keras_backend_is_set = keras.backend.backend() == \"theano\"");
673 keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
674 if (keras_backend != nullptr && keras_backend == Py_True)
675 return kTheano;
676
677 PyRunString("keras_backend_is_set = keras.backend.backend() == \"cntk\"");
678 keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
679 if (keras_backend != nullptr && keras_backend == Py_True)
680 return kCNTK;
681
682 return kUndefined;
683}
684
686 // get the keras backend name
688 if (type == kTensorFlow) return "TensorFlow";
689 if (type == kTheano) return "Theano";
690 if (type == kCNTK) return "CNTK";
691 return "Undefined";
692}
#define REGISTER_METHOD(CLASS)
for example
#define PyBytes_AsString
Definition: PyROOT.h:67
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
long long Long64_t
Definition: RtypesCore.h:69
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
_object PyObject
Definition: TPyArg.h:20
char * Form(const char *fmt,...)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
MsgLogger & Log() const
Definition: Configurable.h:122
Class that contains all the data information.
Definition: DataSetInfo.h:60
UInt_t GetNClasses() const
Definition: DataSetInfo.h:153
UInt_t GetNTargets() const
Definition: DataSetInfo.h:126
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:360
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
const char * GetName() const
Definition: MethodBase.h:333
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:437
const TString & GetWeightFileDir() const
Definition: MethodBase.h:490
const TString & GetMethodName() const
Definition: MethodBase.h:330
const Event * GetEvent() const
Definition: MethodBase.h:749
DataSetInfo & DataInfo() const
Definition: MethodBase.h:409
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
Definition: MethodBase.h:344
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:393
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:841
TrainingHistory fTrainHistory
Definition: MethodBase.h:425
DataSet * Data() const
Definition: MethodBase.h:408
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: MethodBase.h:769
void GetHelpMessage() const
std::vector< float > fOutput
Definition: MethodPyKeras.h:96
virtual void TestClassification()
initialization
EBackendType
enumeration defining the used Keras backend
Definition: MethodPyKeras.h:73
void SetupKerasModel(Bool_t loadTrainedModel)
std::vector< Float_t > & GetMulticlassValues()
UInt_t GetNumValidationSamples()
Validation of the ValidationSize option.
Double_t GetMvaValue(Double_t *errLower, Double_t *errUpper)
std::vector< Float_t > & GetRegressionValues()
TString fNumValidationString
Definition: MethodPyKeras.h:91
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
TString GetKerasBackendName()
MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
TString fLearningRateSchedule
Definition: MethodPyKeras.h:89
EBackendType GetKerasBackend()
Get the Keras backend (can be: TensorFlow, Theano or CNTK)
TString fFilenameTrainedModel
Definition: MethodPyKeras.h:99
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
get all the MVA values for the events of the current Data type
static int PyIsInitialized()
Check Python interpreter initialization status.
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=Py_single_input)
Execute Python code from string.
PyObject * fLocalNS
Definition: PyMethodBase.h:130
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:140
void AddValue(TString Property, Int_t stage, Double_t value)
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
Singleton class for Global types used by TMVA.
Definition: Types.h:73
@ kSignal
Definition: Types.h:136
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kClassification
Definition: Types.h:128
@ kRegression
Definition: Types.h:129
@ kTraining
Definition: Types.h:144
An array of TObjects.
Definition: TObjArray.h:37
Int_t GetEntries() const
Return the number of objects in array (i.e.
Definition: TObjArray.cxx:522
TObject * At(Int_t idx) const
Definition: TObjArray.h:166
virtual const char * GetName() const
Returns name of object.
Definition: TObject.cxx:357
Basic string class.
Definition: TString.h:131
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791
const char * Data() const
Definition: TString.h:364
@ kTrailing
Definition: TString.h:262
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2197
Bool_t IsNull() const
Definition: TString.h:402
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2311
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition: TString.cxx:2289
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Log(Double_t x)
Definition: TMath.h:750