{
"cells": [
{
"cell_type": "markdown",
"id": "c0f39155",
"metadata": {},
"source": [
"# TMVA_SOFIE_GNN_Application\n",
"Macro evaluating a GNN model which was generated with the Parser macro\n",
"\n",
"\n",
"\n",
"\n",
"**Author:** \n",
"This notebook tutorial was automatically generated with ROOTBOOK-izer from the macro found in the ROOT repository on Tuesday, May 19, 2026 at 08:23 PM."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b502967b",
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#ifdef __CLING__\n",
"R__ADD_INCLUDE_PATH($PWD)\n",
"#endif\n",
"\n",
"#include \"encoder.hxx\"\n",
"#include \"core.hxx\"\n",
"#include \"decoder.hxx\"\n",
"#include \"output_transform.hxx\"\n",
"\n",
"#include \"TMVA/SOFIE_common.hxx\"\n",
"#include \"TRandom3.h\"\n",
"#include \"TH1.h\"\n",
"#include \"TCanvas.h\"\n",
"#include \"TFile.h\"\n",
"#include \"TTree.h\"\n",
"#include \"TSystem.h\"\n",
"#include \"ROOT/RDataFrame.hxx\"\n",
"\n",
"using namespace TMVA::Experimental;\n",
"using namespace TMVA::Experimental::SOFIE;\n",
"\n",
"%%cpp -d\n",
"struct SOFIE_GNN {\n",
" bool verbose = false;\n",
" TMVA_SOFIE_encoder::Session encoder;\n",
" TMVA_SOFIE_core::Session core;\n",
" TMVA_SOFIE_decoder::Session decoder;\n",
" TMVA_SOFIE_output_transform::Session output_transform;\n",
"\n",
" std::vector Infer(const GNN_Data & data, int nsteps) {\n",
" // infer function\n",
" auto input_data = Copy(data);\n",
" if (verbose) Print(input_data,\"input_data\");\n",
" encoder.infer(input_data);\n",
" // latent0 is result of encoder. Need to copy because this stays the same\n",
" auto latent0 = Copy(input_data);\n",
" GNN_Data latent = input_data; // this can be a view\n",
" std::vector outputData;\n",
" for (int i = 0; i < nsteps; i++) {\n",
" if (verbose) Print(latent0,\"input decoded data\");\n",
" if (verbose) Print(latent,\"latent data\");\n",
" auto core_input = Concatenate(latent0, latent,1);\n",
" if (verbose) Print(core_input, \"after concatenate\");\n",
" core.infer(core_input);\n",
" if (verbose) Print(core_input, \"after core inference\");\n",
" // here I need to copy\n",
" latent = Copy(core_input);\n",
" decoder.infer(core_input);\n",
" output_transform.infer(core_input);\n",
" outputData.push_back(Copy(core_input));\n",
" }\n",
" return outputData;\n",
" }\n",
"\n",
" SOFIE_GNN(bool v = false) : verbose(v) {}\n",
"\n",
"};\n",
"\n",
"const int num_max_nodes = 10;\n",
"const int num_max_edges = 30;\n",
"const int NODE_FEATURE_SIZE = 4;\n",
"const int EDGE_FEATURE_SIZE = 4;\n",
"const int GLOBAL_FEATURE_SIZE = 1;"
]
},
{
"cell_type": "markdown",
"id": "8594f46c",
"metadata": {},
"source": [
" need to add include path to find generated model file\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c9184019",
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%%cpp -d\n",
"double check_mem(std::string s = \"\"){\n",
" ProcInfo_t p;\n",
" printf(\"%s - \",s.c_str());\n",
" gSystem->GetProcInfo(&p);\n",
" printf(\" Rmem = %8.3f MB, Vmem = %8.f3 MB \\n\",\n",
" p.fMemResident /1024., /// convert memory from kB to MB\n",
" p.fMemVirtual /1024.\n",
" );\n",
" return p.fMemResident / 1024.;\n",
"}"
]
},
{
"cell_type": "markdown",
"id": "c63ae08e",
"metadata": {},
"source": [
" Definition of a helper function: "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "075a6f89",
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%%cpp -d\n",
"template\n",
"void PrintTensor(RTensor & t) {\n",
" std::cout << \" shape : \" << ConvertShapeToString(t.GetShape()) << \" size : \" << t.GetSize() << \"\\n\";\n",
" auto & shape = t.GetShape();\n",
" auto p = t.GetData();\n",
" size_t nrows = (shape.size() > 1) ? shape[0] : 1;\n",
" size_t ncols = (shape.size() > 1) ? t.GetStrides()[0] : shape[0];\n",
" for (size_t i = 0; i < nrows; i++) {\n",
" for (size_t j = 0; j < ncols; j++) {\n",
" if (j==ncols-1) {\n",
" if (j>10) std::cout << \"... \";\n",
" std::cout << *p << std::endl;\n",
" }\n",
" else if (j<10)\n",
" std::cout << *p << \", \";\n",
" p++;\n",
" }\n",
" }\n",
" std::cout << std::endl;\n",
"}"
]
},
{
"cell_type": "markdown",
"id": "25716a32",
"metadata": {},
"source": [
" Definition of a helper function: "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a618c921",
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%%cpp -d\n",
"void Print(GNN_Data & d, std::string txt = \"\") {\n",
" if (!txt.empty()) std::cout << std::endl << txt << std::endl;\n",
" std::cout << \"node data:\"; PrintTensor(d.node_data);\n",
" std::cout << \"edge data:\"; PrintTensor(d.edge_data);\n",
" std::cout << \"global data:\"; PrintTensor(d.global_data);\n",
" std::cout << \"edge index:\"; PrintTensor(d.edge_index);\n",
"}"
]
},
{
"cell_type": "markdown",
"id": "8df91160",
"metadata": {},
"source": [
" Definition of a helper function: "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "08ca1e1c",
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%%cpp -d\n",
"std::vector GenerateData(int nevts, int seed) {\n",
" TRandom3 r(seed);\n",
" std::vector dataSet;\n",
" dataSet.reserve(nevts);\n",
" for (int i = 0; i < nevts; i++) {\n",
" // generate first number of nodes and edges\n",
" // size_t num_nodes = num_max_nodes;//r.Integer(num_max_nodes-2) + 2;\n",
" // size_t num_edges = num_max_edges; //r.Integer(num_max_edges-1) + 1;\n",
" size_t num_nodes = r.Integer(num_max_nodes-2) + 2;\n",
" size_t num_edges = r.Integer(num_max_edges-1) + 1;\n",
" GNN_Data gd;\n",
" gd.node_data = RTensor({num_nodes, NODE_FEATURE_SIZE});\n",
" gd.edge_data = RTensor({num_edges, EDGE_FEATURE_SIZE});\n",
" gd.global_data = RTensor({1, GLOBAL_FEATURE_SIZE});\n",
" gd.edge_index = RTensor({2, num_edges});\n",
"\n",
" auto genValue = [&]() { return r.Rndm()*10 -5; };\n",
" auto genLink = [&] () { return r.Integer(num_nodes);};\n",
" std::generate(gd.node_data.begin(), gd.node_data.end(), genValue);\n",
" std::generate(gd.edge_data.begin(), gd.edge_data.end(), genValue);\n",
" std::generate(gd.global_data.begin(), gd.global_data.end(), genValue);\n",
" std::generate(gd.edge_index.begin(), gd.edge_index.end(), genLink);\n",
" dataSet.emplace_back(gd);\n",
" }\n",
" return dataSet;\n",
"}"
]
},
{
"cell_type": "markdown",
"id": "fb4fb279",
"metadata": {},
"source": [
" Definition of a helper function: "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8c0dae6d",
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%%cpp -d\n",
"std::vector ReadData(std::string treename, std::string filename) {\n",
" bool verbose = false;\n",
" ROOT::RDataFrame df(treename,filename);\n",
" auto ndata = df.Take>(\"node_data\");\n",
" auto edata = df.Take>(\"edge_data\");\n",
" auto gdata = df.Take>(\"global_data\");\n",
" auto rdata = df.Take>(\"receivers\");\n",
" auto sdata = df.Take>(\"senders\");\n",
" int nevts = ndata.GetPtr()->size();\n",
" std::vector dataSet;\n",
" dataSet.reserve(nevts);\n",
" for (int i = 0; i < nevts; i++) {\n",
" GNN_Data gd;\n",
" auto & n = (*(ndata.GetPtr()))[i];\n",
" size_t num_nodes = n.size()/NODE_FEATURE_SIZE;\n",
" auto & e = (*(edata.GetPtr()))[i];\n",
" size_t num_edges = e.size()/EDGE_FEATURE_SIZE;\n",
" auto & g = (*(gdata.GetPtr()))[i];\n",
" gd.node_data = RTensor(n.data(), {num_nodes, NODE_FEATURE_SIZE});\n",
" gd.edge_data = RTensor(e.data(), {num_edges, EDGE_FEATURE_SIZE});\n",
" gd.global_data = RTensor(g.data(), {1, GLOBAL_FEATURE_SIZE});\n",
" gd.edge_index = RTensor({2, num_edges});\n",
" auto & r = (*(rdata.GetPtr()))[i];\n",
" auto & s = (*(sdata.GetPtr()))[i];\n",
" // need to copy receivers/senders in edge_index tensor\n",
" std::copy(r.begin(), r.end(), gd.edge_index.GetData());\n",
" std::copy(s.begin(), s.end(), gd.edge_index.GetData()+num_edges);\n",
"\n",
" dataSet.emplace_back(Copy(gd)); // need to copy data in vector to own\n",
" if (i < 1 && verbose) Print(dataSet[i],\"Input for Event\" + std::to_string(i));\n",
" }\n",
" return dataSet;\n",
"}"
]
},
{
"cell_type": "markdown",
"id": "404bac3b",
"metadata": {},
"source": [
" Arguments are defined. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "58bfa904",
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"bool verbose = false;"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf3b9529",
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"check_mem(\"Initial memory\");\n",
"SOFIE_GNN gnn;\n",
"check_mem(\"After creating GNN\");\n",
"\n",
"\n",
"const int seed = 111;\n",
"const int nproc_steps = 5;"
]
},
{
"cell_type": "markdown",
"id": "e0a2d46e",
"metadata": {},
"source": [
"generate the input data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2dc3f008",
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"int nevts;"
]
},
{
"cell_type": "markdown",
"id": "5efdb402",
"metadata": {},
"source": [
"std::cout << \"generating data\\n\";\n",
"nevts = 100;\n",
"auto inputData = GenerateData(nevts, seed);"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0211bf5",
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"std::cout << \"reading data\\n\";\n",
"auto inputData = ReadData(\"gdata\",\"graph_data.root\");\n",
"nevts = inputData.size();"
]
},
{
"cell_type": "markdown",
"id": "06530bc6",
"metadata": {},
"source": [
"std::cout << \"padding data\\n\";\n",
"PadData(inputData) ;"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cb746907",
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"auto h1 = new TH1D(\"h1\",\"SOFIE Node data\",40,1,0);\n",
"auto h2 = new TH1D(\"h2\",\"SOFIE Edge data\",40,1,0);\n",
"auto h3 = new TH1D(\"h3\",\"SOFIE Global data\",40,1,0);\n",
"std::cout << \"doing inference...\\n\";\n",
"\n",
"\n",
"check_mem(\"Before evaluating\");\n",
"TStopwatch w; w.Start();\n",
"for (int i = 0; i < nevts; i++) {\n",
" auto result = gnn.Infer(inputData[i], nproc_steps);\n",
" // compute resulting mean and plot them\n",
" auto & lr = result.back();\n",
" if (i < 1 && verbose) Print(lr,\"Output for Event\" + std::to_string(i));\n",
" h1->Fill(TMath::Mean(lr.node_data.begin(), lr.node_data.end()));\n",
" h2->Fill(TMath::Mean(lr.edge_data.begin(), lr.edge_data.end()));\n",
" h3->Fill(TMath::Mean(lr.global_data.begin(), lr.global_data.end()));\n",
"}\n",
"w.Stop();\n",
"w.Print();\n",
"check_mem(\"End evaluation\");\n",
"auto c1 = new TCanvas(\"c1\",\"SOFIE Results\");\n",
"c1->Divide(1,3);\n",
"c1->cd(1); h1->Draw();\n",
"c1->cd(2); h2->Draw();\n",
"c1->cd(3); h3->Draw();"
]
},
{
"cell_type": "markdown",
"id": "223d822b",
"metadata": {},
"source": [
"compare with the reference"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e8674258",
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"auto c2 = new TCanvas(\"c2\",\"Reference Results\");\n",
"auto file = TFile::Open(\"graph_data.root\");\n",
"auto o1 = file->Get(\"h1\");\n",
"auto o2 = file->Get(\"h2\");\n",
"auto o3 = file->Get(\"h3\");\n",
"c2->Divide(1,3);\n",
"c2->cd(1); o1->Draw();\n",
"c2->cd(2); o2->Draw();\n",
"c2->cd(3); o3->Draw();"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ROOT C++",
"language": "c++",
"name": "root"
},
"language_info": {
"codemirror_mode": "text/x-c++src",
"file_extension": ".C",
"mimetype": " text/x-c++src",
"name": "c++"
}
},
"nbformat": 4,
"nbformat_minor": 5
}