{ "cells": [ { "cell_type": "markdown", "id": "c0f39155", "metadata": {}, "source": [ "# TMVA_SOFIE_GNN_Application\n", "Macro evaluating a GNN model which was generated with the Parser macro\n", "\n", "\n", "\n", "\n", "**Author:** \n", "This notebook tutorial was automatically generated with ROOTBOOK-izer from the macro found in the ROOT repository on Tuesday, May 19, 2026 at 08:23 PM." ] }, { "cell_type": "code", "execution_count": null, "id": "b502967b", "metadata": { "collapsed": false }, "outputs": [], "source": [ "#ifdef __CLING__\n", "R__ADD_INCLUDE_PATH($PWD)\n", "#endif\n", "\n", "#include \"encoder.hxx\"\n", "#include \"core.hxx\"\n", "#include \"decoder.hxx\"\n", "#include \"output_transform.hxx\"\n", "\n", "#include \"TMVA/SOFIE_common.hxx\"\n", "#include \"TRandom3.h\"\n", "#include \"TH1.h\"\n", "#include \"TCanvas.h\"\n", "#include \"TFile.h\"\n", "#include \"TTree.h\"\n", "#include \"TSystem.h\"\n", "#include \"ROOT/RDataFrame.hxx\"\n", "\n", "using namespace TMVA::Experimental;\n", "using namespace TMVA::Experimental::SOFIE;\n", "\n", "%%cpp -d\n", "struct SOFIE_GNN {\n", " bool verbose = false;\n", " TMVA_SOFIE_encoder::Session encoder;\n", " TMVA_SOFIE_core::Session core;\n", " TMVA_SOFIE_decoder::Session decoder;\n", " TMVA_SOFIE_output_transform::Session output_transform;\n", "\n", " std::vector Infer(const GNN_Data & data, int nsteps) {\n", " // infer function\n", " auto input_data = Copy(data);\n", " if (verbose) Print(input_data,\"input_data\");\n", " encoder.infer(input_data);\n", " // latent0 is result of encoder. Need to copy because this stays the same\n", " auto latent0 = Copy(input_data);\n", " GNN_Data latent = input_data; // this can be a view\n", " std::vector outputData;\n", " for (int i = 0; i < nsteps; i++) {\n", " if (verbose) Print(latent0,\"input decoded data\");\n", " if (verbose) Print(latent,\"latent data\");\n", " auto core_input = Concatenate(latent0, latent,1);\n", " if (verbose) Print(core_input, \"after concatenate\");\n", " core.infer(core_input);\n", " if (verbose) Print(core_input, \"after core inference\");\n", " // here I need to copy\n", " latent = Copy(core_input);\n", " decoder.infer(core_input);\n", " output_transform.infer(core_input);\n", " outputData.push_back(Copy(core_input));\n", " }\n", " return outputData;\n", " }\n", "\n", " SOFIE_GNN(bool v = false) : verbose(v) {}\n", "\n", "};\n", "\n", "const int num_max_nodes = 10;\n", "const int num_max_edges = 30;\n", "const int NODE_FEATURE_SIZE = 4;\n", "const int EDGE_FEATURE_SIZE = 4;\n", "const int GLOBAL_FEATURE_SIZE = 1;" ] }, { "cell_type": "markdown", "id": "8594f46c", "metadata": {}, "source": [ " need to add include path to find generated model file\n", " " ] }, { "cell_type": "code", "execution_count": null, "id": "c9184019", "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%cpp -d\n", "double check_mem(std::string s = \"\"){\n", " ProcInfo_t p;\n", " printf(\"%s - \",s.c_str());\n", " gSystem->GetProcInfo(&p);\n", " printf(\" Rmem = %8.3f MB, Vmem = %8.f3 MB \\n\",\n", " p.fMemResident /1024., /// convert memory from kB to MB\n", " p.fMemVirtual /1024.\n", " );\n", " return p.fMemResident / 1024.;\n", "}" ] }, { "cell_type": "markdown", "id": "c63ae08e", "metadata": {}, "source": [ " Definition of a helper function: " ] }, { "cell_type": "code", "execution_count": null, "id": "075a6f89", "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%cpp -d\n", "template\n", "void PrintTensor(RTensor & t) {\n", " std::cout << \" shape : \" << ConvertShapeToString(t.GetShape()) << \" size : \" << t.GetSize() << \"\\n\";\n", " auto & shape = t.GetShape();\n", " auto p = t.GetData();\n", " size_t nrows = (shape.size() > 1) ? shape[0] : 1;\n", " size_t ncols = (shape.size() > 1) ? t.GetStrides()[0] : shape[0];\n", " for (size_t i = 0; i < nrows; i++) {\n", " for (size_t j = 0; j < ncols; j++) {\n", " if (j==ncols-1) {\n", " if (j>10) std::cout << \"... \";\n", " std::cout << *p << std::endl;\n", " }\n", " else if (j<10)\n", " std::cout << *p << \", \";\n", " p++;\n", " }\n", " }\n", " std::cout << std::endl;\n", "}" ] }, { "cell_type": "markdown", "id": "25716a32", "metadata": {}, "source": [ " Definition of a helper function: " ] }, { "cell_type": "code", "execution_count": null, "id": "a618c921", "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%cpp -d\n", "void Print(GNN_Data & d, std::string txt = \"\") {\n", " if (!txt.empty()) std::cout << std::endl << txt << std::endl;\n", " std::cout << \"node data:\"; PrintTensor(d.node_data);\n", " std::cout << \"edge data:\"; PrintTensor(d.edge_data);\n", " std::cout << \"global data:\"; PrintTensor(d.global_data);\n", " std::cout << \"edge index:\"; PrintTensor(d.edge_index);\n", "}" ] }, { "cell_type": "markdown", "id": "8df91160", "metadata": {}, "source": [ " Definition of a helper function: " ] }, { "cell_type": "code", "execution_count": null, "id": "08ca1e1c", "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%cpp -d\n", "std::vector GenerateData(int nevts, int seed) {\n", " TRandom3 r(seed);\n", " std::vector dataSet;\n", " dataSet.reserve(nevts);\n", " for (int i = 0; i < nevts; i++) {\n", " // generate first number of nodes and edges\n", " // size_t num_nodes = num_max_nodes;//r.Integer(num_max_nodes-2) + 2;\n", " // size_t num_edges = num_max_edges; //r.Integer(num_max_edges-1) + 1;\n", " size_t num_nodes = r.Integer(num_max_nodes-2) + 2;\n", " size_t num_edges = r.Integer(num_max_edges-1) + 1;\n", " GNN_Data gd;\n", " gd.node_data = RTensor({num_nodes, NODE_FEATURE_SIZE});\n", " gd.edge_data = RTensor({num_edges, EDGE_FEATURE_SIZE});\n", " gd.global_data = RTensor({1, GLOBAL_FEATURE_SIZE});\n", " gd.edge_index = RTensor({2, num_edges});\n", "\n", " auto genValue = [&]() { return r.Rndm()*10 -5; };\n", " auto genLink = [&] () { return r.Integer(num_nodes);};\n", " std::generate(gd.node_data.begin(), gd.node_data.end(), genValue);\n", " std::generate(gd.edge_data.begin(), gd.edge_data.end(), genValue);\n", " std::generate(gd.global_data.begin(), gd.global_data.end(), genValue);\n", " std::generate(gd.edge_index.begin(), gd.edge_index.end(), genLink);\n", " dataSet.emplace_back(gd);\n", " }\n", " return dataSet;\n", "}" ] }, { "cell_type": "markdown", "id": "fb4fb279", "metadata": {}, "source": [ " Definition of a helper function: " ] }, { "cell_type": "code", "execution_count": null, "id": "8c0dae6d", "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%cpp -d\n", "std::vector ReadData(std::string treename, std::string filename) {\n", " bool verbose = false;\n", " ROOT::RDataFrame df(treename,filename);\n", " auto ndata = df.Take>(\"node_data\");\n", " auto edata = df.Take>(\"edge_data\");\n", " auto gdata = df.Take>(\"global_data\");\n", " auto rdata = df.Take>(\"receivers\");\n", " auto sdata = df.Take>(\"senders\");\n", " int nevts = ndata.GetPtr()->size();\n", " std::vector dataSet;\n", " dataSet.reserve(nevts);\n", " for (int i = 0; i < nevts; i++) {\n", " GNN_Data gd;\n", " auto & n = (*(ndata.GetPtr()))[i];\n", " size_t num_nodes = n.size()/NODE_FEATURE_SIZE;\n", " auto & e = (*(edata.GetPtr()))[i];\n", " size_t num_edges = e.size()/EDGE_FEATURE_SIZE;\n", " auto & g = (*(gdata.GetPtr()))[i];\n", " gd.node_data = RTensor(n.data(), {num_nodes, NODE_FEATURE_SIZE});\n", " gd.edge_data = RTensor(e.data(), {num_edges, EDGE_FEATURE_SIZE});\n", " gd.global_data = RTensor(g.data(), {1, GLOBAL_FEATURE_SIZE});\n", " gd.edge_index = RTensor({2, num_edges});\n", " auto & r = (*(rdata.GetPtr()))[i];\n", " auto & s = (*(sdata.GetPtr()))[i];\n", " // need to copy receivers/senders in edge_index tensor\n", " std::copy(r.begin(), r.end(), gd.edge_index.GetData());\n", " std::copy(s.begin(), s.end(), gd.edge_index.GetData()+num_edges);\n", "\n", " dataSet.emplace_back(Copy(gd)); // need to copy data in vector to own\n", " if (i < 1 && verbose) Print(dataSet[i],\"Input for Event\" + std::to_string(i));\n", " }\n", " return dataSet;\n", "}" ] }, { "cell_type": "markdown", "id": "404bac3b", "metadata": {}, "source": [ " Arguments are defined. " ] }, { "cell_type": "code", "execution_count": null, "id": "58bfa904", "metadata": { "collapsed": false }, "outputs": [], "source": [ "bool verbose = false;" ] }, { "cell_type": "code", "execution_count": null, "id": "cf3b9529", "metadata": { "collapsed": false }, "outputs": [], "source": [ "check_mem(\"Initial memory\");\n", "SOFIE_GNN gnn;\n", "check_mem(\"After creating GNN\");\n", "\n", "\n", "const int seed = 111;\n", "const int nproc_steps = 5;" ] }, { "cell_type": "markdown", "id": "e0a2d46e", "metadata": {}, "source": [ "generate the input data" ] }, { "cell_type": "code", "execution_count": null, "id": "2dc3f008", "metadata": { "collapsed": false }, "outputs": [], "source": [ "int nevts;" ] }, { "cell_type": "markdown", "id": "5efdb402", "metadata": {}, "source": [ "std::cout << \"generating data\\n\";\n", "nevts = 100;\n", "auto inputData = GenerateData(nevts, seed);" ] }, { "cell_type": "code", "execution_count": null, "id": "f0211bf5", "metadata": { "collapsed": false }, "outputs": [], "source": [ "std::cout << \"reading data\\n\";\n", "auto inputData = ReadData(\"gdata\",\"graph_data.root\");\n", "nevts = inputData.size();" ] }, { "cell_type": "markdown", "id": "06530bc6", "metadata": {}, "source": [ "std::cout << \"padding data\\n\";\n", "PadData(inputData) ;" ] }, { "cell_type": "code", "execution_count": null, "id": "cb746907", "metadata": { "collapsed": false }, "outputs": [], "source": [ "auto h1 = new TH1D(\"h1\",\"SOFIE Node data\",40,1,0);\n", "auto h2 = new TH1D(\"h2\",\"SOFIE Edge data\",40,1,0);\n", "auto h3 = new TH1D(\"h3\",\"SOFIE Global data\",40,1,0);\n", "std::cout << \"doing inference...\\n\";\n", "\n", "\n", "check_mem(\"Before evaluating\");\n", "TStopwatch w; w.Start();\n", "for (int i = 0; i < nevts; i++) {\n", " auto result = gnn.Infer(inputData[i], nproc_steps);\n", " // compute resulting mean and plot them\n", " auto & lr = result.back();\n", " if (i < 1 && verbose) Print(lr,\"Output for Event\" + std::to_string(i));\n", " h1->Fill(TMath::Mean(lr.node_data.begin(), lr.node_data.end()));\n", " h2->Fill(TMath::Mean(lr.edge_data.begin(), lr.edge_data.end()));\n", " h3->Fill(TMath::Mean(lr.global_data.begin(), lr.global_data.end()));\n", "}\n", "w.Stop();\n", "w.Print();\n", "check_mem(\"End evaluation\");\n", "auto c1 = new TCanvas(\"c1\",\"SOFIE Results\");\n", "c1->Divide(1,3);\n", "c1->cd(1); h1->Draw();\n", "c1->cd(2); h2->Draw();\n", "c1->cd(3); h3->Draw();" ] }, { "cell_type": "markdown", "id": "223d822b", "metadata": {}, "source": [ "compare with the reference" ] }, { "cell_type": "code", "execution_count": null, "id": "e8674258", "metadata": { "collapsed": false }, "outputs": [], "source": [ "auto c2 = new TCanvas(\"c2\",\"Reference Results\");\n", "auto file = TFile::Open(\"graph_data.root\");\n", "auto o1 = file->Get(\"h1\");\n", "auto o2 = file->Get(\"h2\");\n", "auto o3 = file->Get(\"h3\");\n", "c2->Divide(1,3);\n", "c2->cd(1); o1->Draw();\n", "c2->cd(2); o2->Draw();\n", "c2->cd(3); o3->Draw();" ] } ], "metadata": { "kernelspec": { "display_name": "ROOT C++", "language": "c++", "name": "root" }, "language_info": { "codemirror_mode": "text/x-c++src", "file_extension": ".C", "mimetype": " text/x-c++src", "name": "c++" } }, "nbformat": 4, "nbformat_minor": 5 }