Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TMVA_CNN_Classification.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_tmva
3## \notebook
4## TMVA Classification Example Using a Convolutional Neural Network
5##
6## This is an example of using a CNN in TMVA. We do classification using a toy image data set
7## that is generated when running the example macro
8##
9## \macro_image
10## \macro_output
11## \macro_code
12##
13## \author Harshal Shende
14
15
16# TMVA Classification Example Using a Convolutional Neural Network
17
18
19## Helper function to create input images data
20## we create a signal and background 2D histograms from 2d gaussians
21## with a location (means in X and Y) different for each event
22## The difference between signal and background is in the gaussian width.
23## The width for the background gaussian is slightly larger than the signal width by few % values
24
25
26import ROOT
27
28
29TMVA = ROOT.TMVA
30TFile = ROOT.TFile
31
32
33import os
34import importlib
35
37
38def MakeImagesTree(n, nh, nw):
39 # image size (nh x nw)
40 ntot = nh * nw
41 fileOutName = "images_data_16x16.root"
42 nRndmEvts = 10000 # number of events we use to fill each image
43 delta_sigma = 0.1 # 5% difference in the sigma
44 pixelNoise = 5
45
46 sX1 = 3
47 sY1 = 3
48 sX2 = sX1 + delta_sigma
49 sY2 = sY1 - delta_sigma
50 h1 = ROOT.TH2D("h1", "h1", nh, 0, 10, nw, 0, 10)
51 h2 = ROOT.TH2D("h2", "h2", nh, 0, 10, nw, 0, 10)
52 f1 = ROOT.TF2("f1", "xygaus")
53 f2 = ROOT.TF2("f2", "xygaus")
54 sgn = ROOT.TTree("sig_tree", "signal_tree")
55 bkg = ROOT.TTree("bkg_tree", "background_tree")
56
57 f = TFile(fileOutName, "RECREATE")
58 x1 = ROOT.std.vector["float"](ntot)
59 x2 = ROOT.std.vector["float"](ntot)
60
61 # create signal and background trees with a single branch
62 # an std::vector<float> of size nh x nw containing the image data
63 bkg.Branch("vars", "std::vector<float>", x1)
64 sgn.Branch("vars", "std::vector<float>", x2)
65
66 sgn.SetDirectory(f)
67 bkg.SetDirectory(f)
68
69 f1.SetParameters(1, 5, sX1, 5, sY1)
70 f2.SetParameters(1, 5, sX2, 5, sY2)
71 ROOT.gRandom.SetSeed(0)
72 ROOT.Info("TMVA_CNN_Classification", "Filling ROOT tree \n")
73 for i in range(n):
74 if i % 1000 == 0:
75 print("Generating image event ...", i)
76
77 h1.Reset()
78 h2.Reset()
79 # generate random means in range [3,7] to be not too much on the border
80 f1.SetParameter(1, ROOT.gRandom.Uniform(3, 7))
81 f1.SetParameter(3, ROOT.gRandom.Uniform(3, 7))
82 f2.SetParameter(1, ROOT.gRandom.Uniform(3, 7))
83 f2.SetParameter(3, ROOT.gRandom.Uniform(3, 7))
84
85 h1.FillRandom("f1", nRndmEvts)
86 h2.FillRandom("f2", nRndmEvts)
87
88 for k in range(nh):
89 for l in range(nw):
90 m = k * nw + l
91 # add some noise in each bin
92 x1[m] = h1.GetBinContent(k + 1, l + 1) + ROOT.gRandom.Gaus(0, pixelNoise)
93 x2[m] = h2.GetBinContent(k + 1, l + 1) + ROOT.gRandom.Gaus(0, pixelNoise)
94
95 sgn.Fill()
96 bkg.Fill()
97
98 sgn.Write()
99 bkg.Write()
100
101 print("Signal and background tree with images data written to the file %s", f.GetName())
102 sgn.Print()
103 bkg.Print()
104 f.Close()
105
106hasGPU = "tmva-gpu" in ROOT.gROOT.GetConfigFeatures()
107hasCPU = "tmva-cpu" in ROOT.gROOT.GetConfigFeatures()
108
109nevt = 1000 # use a larger value to get better results
110opt = [1, 1, 1, 1, 1]
111useTMVACNN = opt[0] if len(opt) > 0 else False
112useKerasCNN = opt[1] if len(opt) > 1 else False
113useTMVADNN = opt[2] if len(opt) > 2 else False
114useTMVABDT = opt[3] if len(opt) > 3 else False
115usePyTorchCNN = opt[4] if len(opt) > 4 else False
116
117if (not hasCPU and not hasGPU) :
118 ROOT.Warning("TMVA_CNN_Classificaton","ROOT is not supporting tmva-cpu and tmva-gpu skip using TMVA-DNN and TMVA-CNN")
119 useTMVACNN = False
120 useTMVADNN = False
121
122if not "tmva-pymva" in ROOT.gROOT.GetConfigFeatures():
123 useKerasCNN = False
124 usePyTorchCNN = False
125else:
127
128tf_spec = importlib.util.find_spec("tensorflow")
129if tf_spec is None:
130 useKerasCNN = False
131 ROOT.Warning("TMVA_CNN_Classificaton","Skip using Keras since tensorflow is not installed")
132
133torch_spec = importlib.util.find_spec("torch")
134if torch_spec is None:
135 usePyTorchCNN = False
136 ROOT.Warning("TMVA_CNN_Classificaton","Skip using PyTorch since torch is not installed")
137
138if not useTMVACNN:
139 ROOT.Warning(
140 "TMVA_CNN_Classificaton",
141 "TMVA is not build with GPU or CPU multi-thread support. Cannot use TMVA Deep Learning for CNN",
142 )
143
144writeOutputFile = True
145
146num_threads = 4 # use max 4 threads
147max_epochs = 10 # maximum number of epochs used for training
148
149
150# do enable MT running
151if "imt" in ROOT.gROOT.GetConfigFeatures():
152 ROOT.EnableImplicitMT(num_threads)
153 ROOT.gSystem.Setenv("OMP_NUM_THREADS", "1") # switch OFF MT in OpenBLAS
154 print("Running with nthreads = {}".format(ROOT.GetThreadPoolSize()))
155else:
156 print("Running in serial mode since ROOT does not support MT")
157
158
159
160
161outputFile = None
162if writeOutputFile:
163 outputFile = TFile.Open("TMVA_CNN_ClassificationOutput.root", "RECREATE")
164
165
166## Create TMVA Factory
167
168# Create the Factory class. Later you can choose the methods
169# whose performance you'd like to investigate.
170
171# The factory is the major TMVA object you have to interact with. Here is the list of parameters you need to pass
172
173# - The first argument is the base of the name of all the output
174# weight files in the directory weight/ that will be created with the
175# method parameters
176
177# - The second argument is the output file for the training results
178
179# - The third argument is a string option defining some general configuration for the TMVA session.
180# For example all TMVA output can be suppressed by removing the "!" (not) in front of the "Silent" argument in the
181# option string
182
183# - note that we disable any pre-transformation of the input variables and we avoid computing correlations between
184# input variables
185
186
187factory = TMVA.Factory(
188 "TMVA_CNN_Classification",
189 outputFile,
190 V=False,
191 ROC=True,
192 Silent=False,
193 Color=True,
194 AnalysisType="Classification",
195 Transformations=None,
196 Correlations=False,
197)
198
199
200## Declare DataLoader(s)
201
202# The next step is to declare the DataLoader class that deals with input variables
203
204# Define the input variables that shall be used for the MVA training
205# note that you may also use variable expressions, which can be parsed by TTree::Draw( "expression" )]
206
207# In this case the input data consists of an image of 16x16 pixels. Each single pixel is a branch in a ROOT TTree
208
209loader = TMVA.DataLoader("dataset")
210
211
212## Setup Dataset(s)
213
214# Define input data file and signal and background trees
215
216
217imgSize = 16 * 16
218inputFileName = "images_data_16x16.root"
219
220# if the input file does not exist create it
221if ROOT.gSystem.AccessPathName(inputFileName):
222 MakeImagesTree(nevt, 16, 16)
223
224inputFile = TFile.Open(inputFileName)
225if inputFile is None:
226 ROOT.Warning("TMVA_CNN_Classification", "Error opening input file %s - exit", inputFileName.Data())
227
228
229# inputFileName = "tmva_class_example.root"
230
231
232# --- Register the training and test trees
233
234signalTree = inputFile.Get("sig_tree")
235backgroundTree = inputFile.Get("bkg_tree")
236
237nEventsSig = signalTree.GetEntries()
238nEventsBkg = backgroundTree.GetEntries()
239
240# global event weights per tree (see below for setting event-wise weights)
241signalWeight = 1.0
242backgroundWeight = 1.0
243
244# You can add an arbitrary number of signal or background trees
245loader.AddSignalTree(signalTree, signalWeight)
246loader.AddBackgroundTree(backgroundTree, backgroundWeight)
247
248## add event variables (image)
249## use new method (from ROOT 6.20 to add a variable array for all image data)
250loader.AddVariablesArray("vars", imgSize)
251
252# Set individual event weights (the variables must exist in the original TTree)
253# for signal : factory->SetSignalWeightExpression ("weight1*weight2");
254# for background: factory->SetBackgroundWeightExpression("weight1*weight2");
255# loader->SetBackgroundWeightExpression( "weight" );
256
257# Apply additional cuts on the signal and background samples (can be different)
258mycuts = "" # for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
259mycutb = "" # for example: TCut mycutb = "abs(var1)<0.5";
260
261# Tell the factory how to use the training and testing events
262# If no numbers of events are given, half of the events in the tree are used
263# for training, and the other half for testing:
264# loader.PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
265# It is possible also to specify the number of training and testing events,
266# note we disable the computation of the correlation matrix of the input variables
267
268nTrainSig = 0.8 * nEventsSig
269nTrainBkg = 0.8 * nEventsBkg
270
271# build the string options for DataLoader::PrepareTrainingAndTestTree
272
273loader.PrepareTrainingAndTestTree(
274 mycuts,
275 mycutb,
276 nTrain_Signal=nTrainSig,
277 nTrain_Background=nTrainBkg,
278 SplitMode="Random",
279 SplitSeed=100,
280 NormMode="NumEvents",
281 V=False,
282 CalcCorrelations=False,
283)
284
285
286# DataSetInfo : [dataset] : Added class "Signal"
287# : Add Tree sig_tree of type Signal with 10000 events
288# DataSetInfo : [dataset] : Added class "Background"
289# : Add Tree bkg_tree of type Background with 10000 events
290
291# signalTree.Print();
292
293# Booking Methods
294
295# Here we book the TMVA methods. We book a Boosted Decision Tree method (BDT)
296
297
298# Boosted Decision Trees
299if useTMVABDT:
300 factory.BookMethod(
301 loader,
302 TMVA.Types.kBDT,
303 "BDT",
304 V=False,
305 NTrees=400,
306 MinNodeSize="2.5%",
307 MaxDepth=2,
308 BoostType="AdaBoost",
309 AdaBoostBeta=0.5,
310 UseBaggedBoost=True,
311 BaggedSampleFraction=0.5,
312 SeparationType="GiniIndex",
313 nCuts=20,
314 )
315
316
317#### Booking Deep Neural Network
318
319# Here we book the DNN of TMVA. See the example TMVA_Higgs_Classification.C for a detailed description of the
320# options
321
322if useTMVADNN:
323 layoutString = ROOT.TString(
324 "DENSE|100|RELU,BNORM,DENSE|100|RELU,BNORM,DENSE|100|RELU,BNORM,DENSE|100|RELU,DENSE|1|LINEAR"
325 )
326
327 # Training strategies
328 # one can catenate several training strings with different parameters (e.g. learning rates or regularizations
329 # parameters) The training string must be concatenated with the `|` delimiter
330 trainingString1 = ROOT.TString(
331 "LearningRate=1e-3,Momentum=0.9,Repetitions=1,"
332 "ConvergenceSteps=5,BatchSize=100,TestRepetitions=1,"
333 "WeightDecay=1e-4,Regularization=None,"
334 "Optimizer=ADAM,DropConfig=0.0+0.0+0.0+0."
335 ) # + "|" + trainingString2 + ...
336 trainingString1 += ",MaxEpochs=" + str(max_epochs)
337
338 # Build now the full DNN Option string
339 dnnMethodName = "TMVA_DNN_CPU"
340
341 # use GPU if available
342 dnnOptions = "CPU"
343 if hasGPU :
344 dnnOptions = "GPU"
345 dnnMethodName = "TMVA_DNN_GPU"
346
347 factory.BookMethod(
348 loader,
349 TMVA.Types.kDL,
350 dnnMethodName,
351 H=False,
352 V=True,
353 ErrorStrategy="CROSSENTROPY",
354 VarTransform=None,
355 WeightInitialization="XAVIER",
356 Layout=layoutString,
357 TrainingStrategy=trainingString1,
358 Architecture=dnnOptions
359 )
360
361
362### Book Convolutional Neural Network in TMVA
363
364# For building a CNN one needs to define
365
366# - Input Layout : number of channels (in this case = 1) | image height | image width
367# - Batch Layout : batch size | number of channels | image size = (height*width)
368
369# Then one add Convolutional layers and MaxPool layers.
370
371# - For Convolutional layer the option string has to be:
372# - CONV | number of units | filter height | filter width | stride height | stride width | padding height | paddig
373# width | activation function
374
375# - note in this case we are using a filer 3x3 and padding=1 and stride=1 so we get the output dimension of the
376# conv layer equal to the input
377
378# - note we use after the first convolutional layer a batch normalization layer. This seems to help significantly the
379# convergence
380
381# - For the MaxPool layer:
382# - MAXPOOL | pool height | pool width | stride height | stride width
383
384# The RESHAPE layer is needed to flatten the output before the Dense layer
385
386# Note that to run the CNN is required to have CPU or GPU support
387
388
389if useTMVACNN:
390 # Training strategies.
391 trainingString1 = ROOT.TString(
392 "LearningRate=1e-3,Momentum=0.9,Repetitions=1,"
393 "ConvergenceSteps=5,BatchSize=100,TestRepetitions=1,"
394 "WeightDecay=1e-4,Regularization=None,"
395 "Optimizer=ADAM,DropConfig=0.0+0.0+0.0+0.0"
396 )
397 trainingString1 += ",MaxEpochs=" + str(max_epochs)
398
399 ## New DL (CNN)
400 cnnMethodName = "TMVA_CNN_CPU"
401 cnnOptions = "CPU"
402 # use GPU if available
403 if hasGPU:
404 cnnOptions = "GPU"
405 cnnMethodName = "TMVA_CNN_GPU"
406
407 factory.BookMethod(
408 loader,
409 TMVA.Types.kDL,
410 cnnMethodName,
411 H=False,
412 V=True,
413 ErrorStrategy="CROSSENTROPY",
414 VarTransform=None,
415 WeightInitialization="XAVIER",
416 InputLayout="1|16|16",
417 Layout="CONV|10|3|3|1|1|1|1|RELU,BNORM,CONV|10|3|3|1|1|1|1|RELU,MAXPOOL|2|2|1|1,RESHAPE|FLAT,DENSE|100|RELU,DENSE|1|LINEAR",
418 TrainingStrategy=trainingString1,
419 Architecture=cnnOptions,
420 )
421
422
423### Book Convolutional Neural Network in Keras using a generated model
424
425
426if usePyTorchCNN:
427 ROOT.Info("TMVA_CNN_Classification", "Using Convolutional PyTorch Model")
428 pyTorchFileName = str(ROOT.gROOT.GetTutorialDir())
429 pyTorchFileName += "/tmva/PyTorch_Generate_CNN_Model.py"
430 # check that pytorch can be imported and file defining the model exists
431 torch_spec = importlib.util.find_spec("torch")
432 if torch_spec is not None and os.path.exists(pyTorchFileName):
433 #cmd = str(ROOT.TMVA.Python_Executable()) + " " + pyTorchFileName
434 #os.system(cmd)
435 #import PyTorch_Generate_CNN_Model
436 ROOT.Info("TMVA_CNN_Classification", "Booking PyTorch CNN model")
437 factory.BookMethod(
438 loader,
439 TMVA.Types.kPyTorch,
440 "PyTorch",
441 H=True,
442 V=False,
443 VarTransform=None,
444 FilenameModel="PyTorchModelCNN.pt",
445 FilenameTrainedModel="PyTorchTrainedModelCNN.pt",
446 NumEpochs=max_epochs,
447 BatchSize=100,
448 UserCode=str(pyTorchFileName)
449 )
450 else:
451 ROOT.Warning(
452 "TMVA_CNN_Classification",
453 "PyTorch is not installed or model building file is not existing - skip using PyTorch",
454 )
455
456if useKerasCNN:
457 ROOT.Info("TMVA_CNN_Classification", "Building convolutional keras model")
458 # create python script which can be executed
459 # create 2 conv2d layer + maxpool + dense
460 import tensorflow
461 from tensorflow.keras.models import Sequential
462 from tensorflow.keras.optimizers import Adam
463
464 # from keras.initializers import TruncatedNormal
465 # from keras import initializations
466 from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Reshape
467
468 # from keras.callbacks import ReduceLROnPlateau
469 model = Sequential()
470 model.add(Reshape((16, 16, 1), input_shape=(256,)))
471 model.add(Conv2D(10, kernel_size=(3, 3), kernel_initializer="TruncatedNormal", activation="relu", padding="same"))
472 model.add(Conv2D(10, kernel_size=(3, 3), kernel_initializer="TruncatedNormal", activation="relu", padding="same"))
473 # stride for maxpool is equal to pool size
474 model.add(MaxPooling2D(pool_size=(2, 2)))
475 model.add(Flatten())
476 model.add(Dense(64, activation="tanh"))
477 # model.add(Dropout(0.2))
478 model.add(Dense(2, activation="sigmoid"))
479 model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.001), weighted_metrics=["accuracy"])
480 model.save("model_cnn.h5")
481 model.summary()
482
483 if not os.path.exists("model_cnn.h5"):
484 raise FileNotFoundError("Error creating Keras model file - skip using Keras")
485 else:
486 # book PyKeras method only if Keras model could be created
487 ROOT.Info("TMVA_CNN_Classification", "Booking convolutional keras model")
488 factory.BookMethod(
489 loader,
490 TMVA.Types.kPyKeras,
491 "PyKeras",
492 H=True,
493 V=False,
494 VarTransform=None,
495 FilenameModel="model_cnn.h5",
496 FilenameTrainedModel="trained_model_cnn.h5",
497 NumEpochs=max_epochs,
498 BatchSize=100,
499 GpuOptions="allow_growth=True",
500 ) # needed for RTX NVidia card and to avoid TF allocates all GPU memory
501
502
503
504## Train Methods
505
506factory.TrainAllMethods()
507
508## Test and Evaluate Methods
509
510factory.TestAllMethods()
511
512factory.EvaluateAllMethods()
513
514## Plot ROC Curve
515
516c1 = factory.GetROCCurve(loader)
517c1.Draw()
518
519# close outputfile to save output file
520outputFile.Close()
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:53
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4082
This is the main MVA steering class.
Definition Factory.h:80
static void PyInitialize()
Initialize Python interpreter.
static Tools & Instance()
Definition Tools.cxx:71
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition TROOT.cxx:537
UInt_t GetThreadPoolSize()
Returns the size of ROOT's thread pool.
Definition TROOT.cxx:575