Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
TMVA_SOFIE_Models.py File Reference

Detailed Description

View in nbviewer Open in SWAN
Example of inference with SOFIE using a set of models trained with Keras.

This tutorial shows how to store several models in a single header file and the weights in a ROOT binary file. The models are then evaluated using the RDataFrame First, generate the input model by running TMVA_Higgs_Classification.C.

This tutorial parses the input model and runs the inference using ROOT's JITing capability.

import ROOT
from os.path import exists
## generate and train Keras models with different architectures
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
def CreateModel(nlayers = 4, nunits = 64):
model = Sequential()
model.add(Dense(nunits, activation='relu',input_dim=7))
for i in range(1,nlayers) :
model.add(Dense(nunits, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer = Adam(learning_rate = 0.001), weighted_metrics = ['accuracy'])
return model
def PrepareData() :
#get the input data
inputFile = ROOT.gROOT.GetTutorialDir() + "machine_learning/data/Higgs_data.root"
df1 = ROOT.RDataFrame("sig_tree", inputFile)
sigData = df1.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb'])
#print(sigData)
# stack all the 7 numpy array in a single array (nevents x nvars)
data_sig_size = xsig.shape[0]
print("size of data", data_sig_size)
# make SOFIE inference on background data
df2 = ROOT.RDataFrame("bkg_tree", inputFile)
bkgData = df2.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb'])
data_bkg_size = xbkg.shape[0]
ysig = np.ones(data_sig_size)
ybkg = np.zeros(data_bkg_size)
inputs_data = np.concatenate((xsig,xbkg),axis=0)
inputs_targets = np.concatenate((ysig,ybkg),axis=0)
#split data in training and test data
x_train, x_test, y_train, y_test = train_test_split(
inputs_data, inputs_targets, test_size=0.50, random_state=1234)
return x_train, y_train, x_test, y_test
def TrainModel(model, x, y, name) :
model.fit(x,y,epochs=10,batch_size=50)
modelFile = name + '.h5'
model.save(modelFile)
return modelFile
### run the models
x_train, y_train, x_test, y_test = PrepareData()
## create models and train them
model1 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_4L_50')
model2 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_4L_200')
model3 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_2L_500')
#evaluate with SOFIE the 3 trained models
def GenerateModelCode(modelFile, generatedHeaderFile):
print("Generating inference code for the Keras model from ",modelFile,"in the header ", generatedHeaderFile)
#Generating inference code using a ROOT binary file
# add option to append to the same file the generated headers (pass True for append flag)
model.OutputGenerated(generatedHeaderFile, True)
#model.PrintGenerated()
return generatedHeaderFile
generatedHeaderFile = "Higgs_Model.hxx"
#need to remove existing header file since we are appending on same one
import os
if (os.path.exists(generatedHeaderFile)):
weightFile = "Higgs_Model.root"
print("removing existing files", generatedHeaderFile,weightFile)
os.remove(generatedHeaderFile)
os.remove(weightFile)
GenerateModelCode(model1, generatedHeaderFile)
GenerateModelCode(model2, generatedHeaderFile)
GenerateModelCode(model3, generatedHeaderFile)
#compile the generated code
ROOT.gInterpreter.Declare('#include "' + generatedHeaderFile + '"')
#run the inference on the test data
session1 = ROOT.TMVA_SOFIE_Higgs_Model_4L_50.Session("Higgs_Model.root")
session2 = ROOT.TMVA_SOFIE_Higgs_Model_4L_200.Session("Higgs_Model.root")
session3 = ROOT.TMVA_SOFIE_Higgs_Model_2L_500.Session("Higgs_Model.root")
hs1 = ROOT.TH1D("hs1","Signal result 4L 50",100,0,1)
hs2 = ROOT.TH1D("hs2","Signal result 4L 200",100,0,1)
hs3 = ROOT.TH1D("hs3","Signal result 2L 500",100,0,1)
hb1 = ROOT.TH1D("hb1","Background result 4L 50",100,0,1)
hb2 = ROOT.TH1D("hb2","Background result 4L 200",100,0,1)
hb3 = ROOT.TH1D("hb3","Background result 2L 500",100,0,1)
def EvalModel(session, x) :
result = session.infer(x)
return result[0]
for i in range(0,x_test.shape[0]):
result1 = EvalModel(session1, x_test[i,:])
result2 = EvalModel(session2, x_test[i,:])
result3 = EvalModel(session3, x_test[i,:])
if (y_test[i] == 1) :
hs1.Fill(result1)
hs2.Fill(result2)
hs3.Fill(result3)
else:
hb1.Fill(result1)
hb2.Fill(result2)
hb3.Fill(result3)
def PlotHistos(hs,hb):
hb.SetLineColor("kBlue")
hb.Draw("same")
PlotHistos(hs1,hb1)
PlotHistos(hs2,hb2)
PlotHistos(hs3,hb3)
## draw also ROC curves
def GetContent(h) :
x = ROOT.std.vector['float'](n)
w = ROOT.std.vector['float'](n)
for i in range(0,n):
x[i] = h.GetBinCenter(i+1)
w[i] = h.GetBinContent(i+1)
return x,w
def MakeROCCurve(hs, hb) :
xs,ws = GetContent(hs)
xb,wb = GetContent(hb)
roc = ROOT.TMVA.ROCCurve(xs,xb,ws,wb)
print("ROC integral for ",hs.GetName(), roc.GetROCIntegral())
curve = roc.GetROCCurve()
return roc,curve
r1,curve1 = MakeROCCurve(hs1,hb1)
r2,curve2 = MakeROCCurve(hs2,hb2)
r3,curve3 = MakeROCCurve(hs3,hb3)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Author
Lorenzo Moneta

Definition in file TMVA_SOFIE_Models.py.