Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RegressionPyTorch.py File Reference

Detailed Description

View in nbviewer Open in SWAN
This tutorial shows how to do regression in TMVA with neural networks trained with PyTorch.

# PyTorch has to be imported before ROOT to avoid crashes because of clashing
# std::regexp symbols that are exported by cppyy.
# See also: https://github.com/wlav/cppyy/issues/227
import torch
from torch import nn
from ROOT import TMVA, TFile, TTree, TCut
from subprocess import call
from os.path import isfile
# Setup TMVA
# create factory without output file since it is not needed
factory = TMVA.Factory('TMVARegression',
'!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Regression')
# Load data
if not isfile('tmva_reg_example.root'):
call(['curl', '-L', '-O', 'http://root.cern.ch/files/tmva_reg_example.root'])
data = TFile.Open('tmva_reg_example.root')
tree = data.Get('TreeR')
dataloader = TMVA.DataLoader('dataset')
for branch in tree.GetListOfBranches():
name = branch.GetName()
if name != 'fvalue':
'nTrain_Regression=4000:SplitMode=Random:NormMode=NumEvents:!V')
# Generate model
# Define model
model = nn.Sequential()
model.add_module('linear_1', nn.Linear(in_features=2, out_features=64))
model.add_module('linear_2', nn.Linear(in_features=64, out_features=1))
# Construct loss function and Optimizer.
optimizer = torch.optim.SGD
# Define train function
def train(model, train_loader, val_loader, num_epochs, batch_size, optimizer, criterion, save_best, scheduler):
trainer = optimizer(model.parameters(), lr=0.01)
schedule, schedulerSteps = scheduler
best_val = None
for epoch in range(num_epochs):
# Training Loop
# Set to train mode
running_train_loss = 0.0
running_val_loss = 0.0
for i, (X, y) in enumerate(train_loader):
output = model(X)
train_loss = criterion(output, y)
# print train statistics
running_train_loss += train_loss.item()
if i % 32 == 31: # print every 32 mini-batches
print("[{}, {}] train loss: {:.3f}".format(epoch+1, i+1, running_train_loss / 32))
running_train_loss = 0.0
if schedule:
schedule(optimizer, epoch, schedulerSteps)
# Validation Loop
# Set to eval mode
with torch.no_grad():
for i, (X, y) in enumerate(val_loader):
output = model(X)
val_loss = criterion(output, y)
running_val_loss += val_loss.item()
curr_val = running_val_loss / len(val_loader)
if save_best:
if best_val==None:
best_val = curr_val
best_val = save_best(model, curr_val, best_val)
# print val statistics per epoch
print("[{}] val loss: {:.3f}".format(epoch+1, curr_val))
running_val_loss = 0.0
print("Finished Training on {} Epochs!".format(epoch+1))
return model
# Define predict function
def predict(model, test_X, batch_size=32):
# Set to eval mode
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
predictions = []
with torch.no_grad():
for i, data in enumerate(test_loader):
X = data[0]
outputs = model(X)
preds = torch.cat(predictions)
return preds.numpy()
load_model_custom_objects = {"optimizer": optimizer, "criterion": loss, "train_func": train, "predict_func": predict}
# Store model to file
# Convert the model to torchscript before saving
m = torch.jit.script(model)
torch.jit.save(m, "modelRegression.pt")
print(m)
# Book methods
factory.BookMethod(dataloader, TMVA.Types.kPyTorch, 'PyTorch',
'H:!V:VarTransform=D,G:FilenameModel=modelRegression.pt:FilenameTrainedModel=trainedModelRegression.pt:NumEpochs=20:BatchSize=32')
factory.BookMethod(dataloader, TMVA.Types.kBDT, 'BDTG',
'!H:!V:VarTransform=D,G:NTrees=1000:BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=4')
# Run TMVA
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
A specialized string object used for TTree selections.
Definition TCut.h:25
This is the main MVA steering class.
Definition Factory.h:80
Date
2020
Author
Anirudh Dagar aniru.nosp@m.dhda.nosp@m.gar6@.nosp@m.gmai.nosp@m.l.com - IIT, Roorkee

Definition in file RegressionPyTorch.py.