//Code generated automatically by TMVA for Inference of Model file [gnn_core] at [Tue May 19 20:23:38 2026] #ifndef ROOT_TMVA_SOFIE_GNN_CORE #define ROOT_TMVA_SOFIE_GNN_CORE #include #include #include #include "TMVA/SOFIE_common.hxx" #include namespace TMVA_SOFIE_gnn_core{ namespace BLAS{ extern "C" void saxpy_(const int * n, const float * alpha, const float * x, const int * incx, float * y, const int * incy); extern "C" void sgemv_(const char * trans, const int * m, const int * n, const float * alpha, const float * A, const int * lda, const float * X, const int * incx, const float * beta, const float * Y, const int * incy); extern "C" void sgemm_(const char * transa, const char * transb, const int * m, const int * n, const int * k, const float * alpha, const float * A, const int * lda, const float * B, const int * ldb, const float * beta, float * C, const int * ldc); }//BLAS namespace Edge_Update{ struct Session { //--------- GNN_Update_Function---edge_update // initialized (weights and constant) tensors std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blocklayer_normscale0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blocklayer_normscale0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blocklayer_normscale0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_3w0 = std::vector(10000); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_3w0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_3w0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_3b0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_3b0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_3b0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blocklayer_normoffset0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blocklayer_normoffset0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blocklayer_normoffset0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_2w0 = std::vector(10000); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_2w0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_2w0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_2b0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_2b0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_2b0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_1w0 = std::vector(10000); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_1w0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_1w0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_0w0 = std::vector(80000); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_0w0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_0w0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_1b0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_1b0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_1b0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_0b0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_0b0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_0b0.data(); // --- Positioning intermediate tensor memory --//--- declare the dynamic tensors float * tensor_edge_updateRelu4output = nullptr; float * tensor_edge_updateRelu4 = nullptr; float * tensor_edge_updateGemm2 = nullptr; float * tensor_edge_updateRelu1 = nullptr; float * tensor_edge_updateGemm1 = nullptr; float * tensor_edge_updateGemm4 = nullptr; float * tensor_edge_updateRelu2 = nullptr; float * tensor_edge_updateRelu0 = nullptr; float * tensor_edge_updateGemm0 = nullptr; float * tensor_edge_updateInputConcat = nullptr; //--- dynamic tensors pool std::vector fDynamicMemoryPool; // dynamic shape parameters size_t fNum_edges; Session(std::string filename ="gnn_core.dat", size_t num_edges = 20) { fNum_edges = num_edges; //--- reading weights from file std::ifstream f; f.open(filename); if (!f.is_open()) { throw std::runtime_error("tmva-sofie failed to open file " + filename + " for input weights"); } f.seekg(0); using TMVA::Experimental::SOFIE::ReadTensorFromStream; ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blocklayer_normscale0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blocklayer_normscale0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_3w0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_3w0", 10000); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_3b0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_3b0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blocklayer_normoffset0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blocklayer_normoffset0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_2w0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_2w0", 10000); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_2b0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_2b0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_1w0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_1w0", 10000); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_0w0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_0w0", 80000); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_1b0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_1b0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_0b0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_0b0", 100); f.close(); // dynamic tensor memory management std::vector dynamicTensorInfos; dynamicTensorInfos.reserve(10); dynamicTensorInfos.push_back( {0, 2, 4* (num_edges * 800) }); // tensor_edge_updateInputConcat dynamicTensorInfos.push_back( {1, 3, 4* (num_edges * 100) }); // tensor_edge_updateGemm0 dynamicTensorInfos.push_back( {2, 4, 4* (num_edges * 100) }); // tensor_edge_updateRelu0 dynamicTensorInfos.push_back( {3, 5, 4* (num_edges * 100) }); // tensor_edge_updateGemm1 dynamicTensorInfos.push_back( {4, 6, 4* (num_edges * 100) }); // tensor_edge_updateRelu1 dynamicTensorInfos.push_back( {5, 7, 4* (num_edges * 100) }); // tensor_edge_updateGemm2 dynamicTensorInfos.push_back( {6, 8, 4* (num_edges * 100) }); // tensor_edge_updateRelu2 dynamicTensorInfos.push_back( {7, 9, 4* (num_edges * 100) }); // tensor_edge_updateGemm4 dynamicTensorInfos.push_back( {8, 10, 4* (num_edges * 100) }); // tensor_edge_updateRelu4 auto memory_result = OrganizeMemory(dynamicTensorInfos); // allocating now the memory fDynamicMemoryPool = std::vector(memory_result.total_bytes); int idx = 0; tensor_edge_updateInputConcat = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_edge_updateGemm0 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_edge_updateRelu0 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_edge_updateGemm1 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_edge_updateRelu1 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_edge_updateGemm2 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_edge_updateRelu2 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_edge_updateGemm4 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_edge_updateRelu4 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); } std::vector infer(size_t num_edges,float const* tensor_edge,float const* tensor_receiver,float const* tensor_sender,float const* tensor_global){ std::vector output_tensor_edge_updateRelu4output(num_edges * 100); if (num_edges > fNum_edges) { throw std::runtime_error("TMVA-SOFIE: dynamic input tensor shape parameter num_edges exceeds the initialized maximum allowed shape."); } doInfer(*this, num_edges,tensor_edge,tensor_receiver,tensor_sender,tensor_global, output_tensor_edge_updateRelu4output.data() ); output_tensor_edge_updateRelu4output.resize(num_edges * 100); return {output_tensor_edge_updateRelu4output}; } inline void doInfer(Session const &session, size_t num_edges,float const* tensor_edge,float const* tensor_receiver,float const* tensor_sender,float const* tensor_global, float *tensor_edge_updateRelu4output ) { //--------- Concat op_0 --> edge_updateInputConcat { num_edges , 800 } for (size_t i0 = 0; i0 < num_edges; ++i0) { int idxOut = 800*i0; int idxIn0 = 200*i0; for (size_t iC = 0; iC < 200; ++iC) { tensor_edge_updateInputConcat[idxOut+iC] = tensor_edge[idxIn0+iC]; } idxOut += 200; int idxIn1 = 200*i0; for (size_t iC = 0; iC < 200; ++iC) { tensor_edge_updateInputConcat[idxOut+iC] = tensor_receiver[idxIn1+iC]; } idxOut += 200; int idxIn2 = 200*i0; for (size_t iC = 0; iC < 200; ++iC) { tensor_edge_updateInputConcat[idxOut+iC] = tensor_sender[idxIn2+iC]; } idxOut += 200; int idxIn3 = 200*i0; for (size_t iC = 0; iC < 200; ++iC) { tensor_edge_updateInputConcat[idxOut+iC] = tensor_global[idxIn3+iC]; } } //--------- Gemm op_1 { num_edges , 800 } * { 800 , 100 } -> { num_edges , 100 } for (size_t j = 0; j < num_edges; j++) { size_t y_index = 100 * j; TMVA::Experimental::SOFIE::Copy(tensor_edge_updateGemm0 + y_index, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_0b0, 100); } TMVA::Experimental::SOFIE::Gemm_Call(tensor_edge_updateGemm0, false, false, 100, num_edges, 800, 1, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_0w0, tensor_edge_updateInputConcat, 1,nullptr); //------ RELU for (int id = 0; id < num_edges * 100 ; id++){ tensor_edge_updateRelu0[id] = ((tensor_edge_updateGemm0[id] > 0 )? tensor_edge_updateGemm0[id] : 0); } //--------- Gemm op_3 { num_edges , 100 } * { 100 , 100 } -> { num_edges , 100 } for (size_t j = 0; j < num_edges; j++) { size_t y_index = 100 * j; TMVA::Experimental::SOFIE::Copy(tensor_edge_updateGemm1 + y_index, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_1b0, 100); } TMVA::Experimental::SOFIE::Gemm_Call(tensor_edge_updateGemm1, false, false, 100, num_edges, 100, 1, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_1w0, tensor_edge_updateRelu0, 1,nullptr); //------ RELU for (int id = 0; id < num_edges * 100 ; id++){ tensor_edge_updateRelu1[id] = ((tensor_edge_updateGemm1[id] > 0 )? tensor_edge_updateGemm1[id] : 0); } //--------- Gemm op_5 { num_edges , 100 } * { 100 , 100 } -> { num_edges , 100 } for (size_t j = 0; j < num_edges; j++) { size_t y_index = 100 * j; TMVA::Experimental::SOFIE::Copy(tensor_edge_updateGemm2 + y_index, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_2b0, 100); } TMVA::Experimental::SOFIE::Gemm_Call(tensor_edge_updateGemm2, false, false, 100, num_edges, 100, 1, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_2w0, tensor_edge_updateRelu1, 1,nullptr); //------ RELU for (int id = 0; id < num_edges * 100 ; id++){ tensor_edge_updateRelu2[id] = ((tensor_edge_updateGemm2[id] > 0 )? tensor_edge_updateGemm2[id] : 0); } //--------- Gemm op_7 { num_edges , 100 } * { 100 , 100 } -> { num_edges , 100 } for (size_t j = 0; j < num_edges; j++) { size_t y_index = 100 * j; TMVA::Experimental::SOFIE::Copy(tensor_edge_updateGemm4 + y_index, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_3b0, 100); } TMVA::Experimental::SOFIE::Gemm_Call(tensor_edge_updateGemm4, false, false, 100, num_edges, 100, 1, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blockmlplinear_3w0, tensor_edge_updateRelu2, 1,nullptr); //------ RELU for (int id = 0; id < num_edges * 100 ; id++){ tensor_edge_updateRelu4[id] = ((tensor_edge_updateGemm4[id] > 0 )? tensor_edge_updateGemm4[id] : 0); } //---- Layer Normalization operator op_9 // Compute the mean for (size_t axis_0 = 0; axis_0 < num_edges; axis_0++) { float mean = 0.; for (size_t axis_1 = 0; axis_1 < 100; axis_1++) { mean += tensor_edge_updateRelu4[axis_0 * 100 + axis_1]; } mean /= float(100); // Compute the inverse Standard Deviation float sum = 0.; for (size_t axis_1 = 0; axis_1 < 100; axis_1++){ float tmp = tensor_edge_updateRelu4[axis_0 * 100 + axis_1] - mean; sum += tmp*tmp; } float invStdDev = 1 / std::sqrt(sum / float(100) + 1e-05); // Y = Scale o InvStdDev (X - Mean) for (size_t axis_1 = 0; axis_1 < 100; axis_1++){ tensor_edge_updateRelu4output[axis_0 * 100 + axis_1] = tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blocklayer_normscale0[axis_1] * invStdDev * (tensor_edge_updateRelu4[axis_0 * 100 + axis_1] - mean) + tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkedge_blocklayer_normoffset0[axis_1]; } } } }; } namespace Node_Update{ struct Session { //--------- GNN_Update_Function---node_update // initialized (weights and constant) tensors std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blocklayer_normscale0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blocklayer_normscale0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blocklayer_normscale0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_3b0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_3b0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_3b0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_2w0 = std::vector(10000); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_2w0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_2w0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_2b0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_2b0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_2b0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blocklayer_normoffset0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blocklayer_normoffset0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blocklayer_normoffset0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_1w0 = std::vector(10000); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_1w0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_1w0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_3w0 = std::vector(10000); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_3w0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_3w0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_1b0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_1b0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_1b0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_0w0 = std::vector(50000); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_0w0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_0w0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_0b0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_0b0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_0b0.data(); // --- Positioning intermediate tensor memory --//--- declare the dynamic tensors float * tensor_node_updateRelu4 = nullptr; float * tensor_node_updateGemm4 = nullptr; float * tensor_node_updateRelu2 = nullptr; float * tensor_node_updateRelu4output = nullptr; float * tensor_node_updateGemm2 = nullptr; float * tensor_node_updateGemm1 = nullptr; float * tensor_node_updateRelu0 = nullptr; float * tensor_node_updateRelu1 = nullptr; float * tensor_node_updateGemm0 = nullptr; float * tensor_node_updateInputConcat = nullptr; //--- dynamic tensors pool std::vector fDynamicMemoryPool; // dynamic shape parameters size_t fNum_nodes; Session(std::string filename ="gnn_core.dat", size_t num_nodes = 5) { fNum_nodes = num_nodes; //--- reading weights from file std::ifstream f; f.open(filename); if (!f.is_open()) { throw std::runtime_error("tmva-sofie failed to open file " + filename + " for input weights"); } f.seekg(1489593); using TMVA::Experimental::SOFIE::ReadTensorFromStream; ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blocklayer_normscale0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blocklayer_normscale0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_3b0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_3b0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_2w0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_2w0", 10000); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_2b0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_2b0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blocklayer_normoffset0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blocklayer_normoffset0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_1w0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_1w0", 10000); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_3w0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_3w0", 10000); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_1b0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_1b0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_0w0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_0w0", 50000); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_0b0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_0b0", 100); f.close(); // dynamic tensor memory management std::vector dynamicTensorInfos; dynamicTensorInfos.reserve(10); dynamicTensorInfos.push_back( {0, 2, 4* (num_nodes * 500) }); // tensor_node_updateInputConcat dynamicTensorInfos.push_back( {1, 3, 4* (num_nodes * 100) }); // tensor_node_updateGemm0 dynamicTensorInfos.push_back( {2, 4, 4* (num_nodes * 100) }); // tensor_node_updateRelu0 dynamicTensorInfos.push_back( {3, 5, 4* (num_nodes * 100) }); // tensor_node_updateGemm1 dynamicTensorInfos.push_back( {4, 6, 4* (num_nodes * 100) }); // tensor_node_updateRelu1 dynamicTensorInfos.push_back( {5, 7, 4* (num_nodes * 100) }); // tensor_node_updateGemm2 dynamicTensorInfos.push_back( {6, 8, 4* (num_nodes * 100) }); // tensor_node_updateRelu2 dynamicTensorInfos.push_back( {7, 9, 4* (num_nodes * 100) }); // tensor_node_updateGemm4 dynamicTensorInfos.push_back( {8, 10, 4* (num_nodes * 100) }); // tensor_node_updateRelu4 auto memory_result = OrganizeMemory(dynamicTensorInfos); // allocating now the memory fDynamicMemoryPool = std::vector(memory_result.total_bytes); int idx = 0; tensor_node_updateInputConcat = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_node_updateGemm0 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_node_updateRelu0 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_node_updateGemm1 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_node_updateRelu1 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_node_updateGemm2 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_node_updateRelu2 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_node_updateGemm4 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); tensor_node_updateRelu4 = reinterpret_cast(fDynamicMemoryPool.data() + memory_result.offsets[idx++]); } std::vector infer(size_t num_nodes,float const* tensor_edge,float const* tensor_node,float const* tensor_global){ std::vector output_tensor_node_updateRelu4output(num_nodes * 100); if (num_nodes > fNum_nodes) { throw std::runtime_error("TMVA-SOFIE: dynamic input tensor shape parameter num_nodes exceeds the initialized maximum allowed shape."); } doInfer(*this, num_nodes,tensor_edge,tensor_node,tensor_global, output_tensor_node_updateRelu4output.data() ); output_tensor_node_updateRelu4output.resize(num_nodes * 100); return {output_tensor_node_updateRelu4output}; } inline void doInfer(Session const &session, size_t num_nodes,float const* tensor_edge,float const* tensor_node,float const* tensor_global, float *tensor_node_updateRelu4output ) { //--------- Concat op_0 --> node_updateInputConcat { num_nodes , 500 } for (size_t i0 = 0; i0 < num_nodes; ++i0) { int idxOut = 500*i0; int idxIn0 = 100*i0; for (size_t iC = 0; iC < 100; ++iC) { tensor_node_updateInputConcat[idxOut+iC] = tensor_edge[idxIn0+iC]; } idxOut += 100; int idxIn1 = 200*i0; for (size_t iC = 0; iC < 200; ++iC) { tensor_node_updateInputConcat[idxOut+iC] = tensor_node[idxIn1+iC]; } idxOut += 200; int idxIn2 = 200*i0; for (size_t iC = 0; iC < 200; ++iC) { tensor_node_updateInputConcat[idxOut+iC] = tensor_global[idxIn2+iC]; } } //--------- Gemm op_1 { num_nodes , 500 } * { 500 , 100 } -> { num_nodes , 100 } for (size_t j = 0; j < num_nodes; j++) { size_t y_index = 100 * j; TMVA::Experimental::SOFIE::Copy(tensor_node_updateGemm0 + y_index, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_0b0, 100); } TMVA::Experimental::SOFIE::Gemm_Call(tensor_node_updateGemm0, false, false, 100, num_nodes, 500, 1, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_0w0, tensor_node_updateInputConcat, 1,nullptr); //------ RELU for (int id = 0; id < num_nodes * 100 ; id++){ tensor_node_updateRelu0[id] = ((tensor_node_updateGemm0[id] > 0 )? tensor_node_updateGemm0[id] : 0); } //--------- Gemm op_3 { num_nodes , 100 } * { 100 , 100 } -> { num_nodes , 100 } for (size_t j = 0; j < num_nodes; j++) { size_t y_index = 100 * j; TMVA::Experimental::SOFIE::Copy(tensor_node_updateGemm1 + y_index, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_1b0, 100); } TMVA::Experimental::SOFIE::Gemm_Call(tensor_node_updateGemm1, false, false, 100, num_nodes, 100, 1, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_1w0, tensor_node_updateRelu0, 1,nullptr); //------ RELU for (int id = 0; id < num_nodes * 100 ; id++){ tensor_node_updateRelu1[id] = ((tensor_node_updateGemm1[id] > 0 )? tensor_node_updateGemm1[id] : 0); } //--------- Gemm op_5 { num_nodes , 100 } * { 100 , 100 } -> { num_nodes , 100 } for (size_t j = 0; j < num_nodes; j++) { size_t y_index = 100 * j; TMVA::Experimental::SOFIE::Copy(tensor_node_updateGemm2 + y_index, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_2b0, 100); } TMVA::Experimental::SOFIE::Gemm_Call(tensor_node_updateGemm2, false, false, 100, num_nodes, 100, 1, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_2w0, tensor_node_updateRelu1, 1,nullptr); //------ RELU for (int id = 0; id < num_nodes * 100 ; id++){ tensor_node_updateRelu2[id] = ((tensor_node_updateGemm2[id] > 0 )? tensor_node_updateGemm2[id] : 0); } //--------- Gemm op_7 { num_nodes , 100 } * { 100 , 100 } -> { num_nodes , 100 } for (size_t j = 0; j < num_nodes; j++) { size_t y_index = 100 * j; TMVA::Experimental::SOFIE::Copy(tensor_node_updateGemm4 + y_index, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_3b0, 100); } TMVA::Experimental::SOFIE::Gemm_Call(tensor_node_updateGemm4, false, false, 100, num_nodes, 100, 1, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blockmlplinear_3w0, tensor_node_updateRelu2, 1,nullptr); //------ RELU for (int id = 0; id < num_nodes * 100 ; id++){ tensor_node_updateRelu4[id] = ((tensor_node_updateGemm4[id] > 0 )? tensor_node_updateGemm4[id] : 0); } //---- Layer Normalization operator op_9 // Compute the mean for (size_t axis_0 = 0; axis_0 < num_nodes; axis_0++) { float mean = 0.; for (size_t axis_1 = 0; axis_1 < 100; axis_1++) { mean += tensor_node_updateRelu4[axis_0 * 100 + axis_1]; } mean /= float(100); // Compute the inverse Standard Deviation float sum = 0.; for (size_t axis_1 = 0; axis_1 < 100; axis_1++){ float tmp = tensor_node_updateRelu4[axis_0 * 100 + axis_1] - mean; sum += tmp*tmp; } float invStdDev = 1 / std::sqrt(sum / float(100) + 1e-05); // Y = Scale o InvStdDev (X - Mean) for (size_t axis_1 = 0; axis_1 < 100; axis_1++){ tensor_node_updateRelu4output[axis_0 * 100 + axis_1] = tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blocklayer_normscale0[axis_1] * invStdDev * (tensor_node_updateRelu4[axis_0 * 100 + axis_1] - mean) + tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networknode_blocklayer_normoffset0[axis_1]; } } } }; } namespace Global_Update{ struct Session { //--------- GNN_Update_Function---global_update // initialized (weights and constant) tensors std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_3w0 = std::vector(10000); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_3w0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_3w0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_3b0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_3b0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_3b0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blocklayer_normscale0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blocklayer_normscale0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blocklayer_normscale0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_2w0 = std::vector(10000); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_2w0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_2w0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blocklayer_normoffset0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blocklayer_normoffset0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blocklayer_normoffset0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_2b0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_2b0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_2b0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_1w0 = std::vector(10000); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_1w0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_1w0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_1b0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_1b0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_1b0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_0w0 = std::vector(40000); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_0w0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_0w0.data(); std::vector fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_0b0 = std::vector(100); float * tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_0b0 = fTensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_0b0.data(); //--- Allocating session memory pool to be used for allocating intermediate tensors std::vector fIntermediateMemoryPool = std::vector(2000); // --- Positioning intermediate tensor memory -- // Allocating memory for intermediate tensor global_updateInputConcat with size 1600 bytes float* tensor_global_updateInputConcat = reinterpret_cast(fIntermediateMemoryPool.data() + 0); // Allocating memory for intermediate tensor global_updateGemm0 with size 400 bytes float* tensor_global_updateGemm0 = reinterpret_cast(fIntermediateMemoryPool.data() + 1600); // Allocating memory for intermediate tensor global_updateRelu0 with size 400 bytes float* tensor_global_updateRelu0 = reinterpret_cast(fIntermediateMemoryPool.data() + 1200); // Allocating memory for intermediate tensor global_updateGemm1 with size 400 bytes float* tensor_global_updateGemm1 = reinterpret_cast(fIntermediateMemoryPool.data() + 800); // Allocating memory for intermediate tensor global_updateRelu1 with size 400 bytes float* tensor_global_updateRelu1 = reinterpret_cast(fIntermediateMemoryPool.data() + 400); // Allocating memory for intermediate tensor global_updateGemm2 with size 400 bytes float* tensor_global_updateGemm2 = reinterpret_cast(fIntermediateMemoryPool.data() + 0); // Allocating memory for intermediate tensor global_updateRelu2 with size 400 bytes float* tensor_global_updateRelu2 = reinterpret_cast(fIntermediateMemoryPool.data() + 1600); // Allocating memory for intermediate tensor global_updateGemm4 with size 400 bytes float* tensor_global_updateGemm4 = reinterpret_cast(fIntermediateMemoryPool.data() + 1200); // Allocating memory for intermediate tensor global_updateRelu4 with size 400 bytes float* tensor_global_updateRelu4 = reinterpret_cast(fIntermediateMemoryPool.data() + 800); // Allocating memory for intermediate tensor global_updateRelu4output with size 400 bytes float* tensor_global_updateRelu4output = reinterpret_cast(fIntermediateMemoryPool.data() + 400); Session(std::string filename ="gnn_core.dat") { //--- reading weights from file std::ifstream f; f.open(filename); if (!f.is_open()) { throw std::runtime_error("tmva-sofie failed to open file " + filename + " for input weights"); } f.seekg(2567259); using TMVA::Experimental::SOFIE::ReadTensorFromStream; ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_3w0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_3w0", 10000); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_3b0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_3b0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blocklayer_normscale0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blocklayer_normscale0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_2w0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_2w0", 10000); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blocklayer_normoffset0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blocklayer_normoffset0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_2b0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_2b0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_1w0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_1w0", 10000); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_1b0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_1b0", 100); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_0w0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_0w0", 40000); ReadTensorFromStream(f, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_0b0, "tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_0b0", 100); f.close(); } std::vector infer(float const* tensor_edge,float const* tensor_node,float const* tensor_global){ std::vector output_tensor_global_updateRelu4output(100); doInfer(*this, tensor_edge,tensor_node,tensor_global, output_tensor_global_updateRelu4output.data() ); return {output_tensor_global_updateRelu4output}; } inline void doInfer(Session const &session, float const* tensor_edge,float const* tensor_node,float const* tensor_global, float *tensor_global_updateRelu4output ) { //--------- Concat op_0 --> global_updateInputConcat { 1 , 400 } TMVA::Experimental::SOFIE::Copy(tensor_global_updateInputConcat, tensor_edge, 100); TMVA::Experimental::SOFIE::Copy(tensor_global_updateInputConcat + 100, tensor_node, 100); TMVA::Experimental::SOFIE::Copy(tensor_global_updateInputConcat + 100 + 100, tensor_global, 200); //--------- Gemm op_1 { 1 , 400 } * { 400 , 100 } -> { 1 , 100 } for (size_t j = 0; j < 1; j++) { size_t y_index = 100 * j; TMVA::Experimental::SOFIE::Copy(tensor_global_updateGemm0 + y_index, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_0b0, 100); } TMVA::Experimental::SOFIE::Gemm_Call(tensor_global_updateGemm0, false, false, 100, 1, 400, 1, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_0w0, tensor_global_updateInputConcat, 1,nullptr); //------ RELU for (int id = 0; id < 100 ; id++){ tensor_global_updateRelu0[id] = ((tensor_global_updateGemm0[id] > 0 )? tensor_global_updateGemm0[id] : 0); } //--------- Gemm op_3 { 1 , 100 } * { 100 , 100 } -> { 1 , 100 } for (size_t j = 0; j < 1; j++) { size_t y_index = 100 * j; TMVA::Experimental::SOFIE::Copy(tensor_global_updateGemm1 + y_index, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_1b0, 100); } TMVA::Experimental::SOFIE::Gemm_Call(tensor_global_updateGemm1, false, false, 100, 1, 100, 1, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_1w0, tensor_global_updateRelu0, 1,nullptr); //------ RELU for (int id = 0; id < 100 ; id++){ tensor_global_updateRelu1[id] = ((tensor_global_updateGemm1[id] > 0 )? tensor_global_updateGemm1[id] : 0); } //--------- Gemm op_5 { 1 , 100 } * { 100 , 100 } -> { 1 , 100 } for (size_t j = 0; j < 1; j++) { size_t y_index = 100 * j; TMVA::Experimental::SOFIE::Copy(tensor_global_updateGemm2 + y_index, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_2b0, 100); } TMVA::Experimental::SOFIE::Gemm_Call(tensor_global_updateGemm2, false, false, 100, 1, 100, 1, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_2w0, tensor_global_updateRelu1, 1,nullptr); //------ RELU for (int id = 0; id < 100 ; id++){ tensor_global_updateRelu2[id] = ((tensor_global_updateGemm2[id] > 0 )? tensor_global_updateGemm2[id] : 0); } //--------- Gemm op_7 { 1 , 100 } * { 100 , 100 } -> { 1 , 100 } for (size_t j = 0; j < 1; j++) { size_t y_index = 100 * j; TMVA::Experimental::SOFIE::Copy(tensor_global_updateGemm4 + y_index, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_3b0, 100); } TMVA::Experimental::SOFIE::Gemm_Call(tensor_global_updateGemm4, false, false, 100, 1, 100, 1, tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blockmlplinear_3w0, tensor_global_updateRelu2, 1,nullptr); //------ RELU for (int id = 0; id < 100 ; id++){ tensor_global_updateRelu4[id] = ((tensor_global_updateGemm4[id] > 0 )? tensor_global_updateGemm4[id] : 0); } //---- Layer Normalization operator op_9 // Compute the mean for (size_t axis_0 = 0; axis_0 < 1; axis_0++) { float mean = 0.; for (size_t axis_1 = 0; axis_1 < 100; axis_1++) { mean += tensor_global_updateRelu4[axis_0 * 100 + axis_1]; } mean /= float(100); // Compute the inverse Standard Deviation float sum = 0.; for (size_t axis_1 = 0; axis_1 < 100; axis_1++){ float tmp = tensor_global_updateRelu4[axis_0 * 100 + axis_1] - mean; sum += tmp*tmp; } float invStdDev = 1 / std::sqrt(sum / float(100) + 1e-05); // Y = Scale o InvStdDev (X - Mean) for (size_t axis_1 = 0; axis_1 < 100; axis_1++){ tensor_global_updateRelu4output[axis_0 * 100 + axis_1] = tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blocklayer_normscale0[axis_1] * invStdDev * (tensor_global_updateRelu4[axis_0 * 100 + axis_1] - mean) + tensor_EncodeProcessDecodeMLPGraphNetworkgraph_networkglobal_blocklayer_normoffset0[axis_1]; } } } }; } //--------- GNN_Aggregate_Function---Aggregate_by_Sum std::vector Aggregate_by_Sum(const int& num_features, const std::vector& inputs){ std::vector result(num_features,0); for(auto &it:inputs){ std::transform(result.begin(), result.end(), it, result.begin(), std::plus()); } return result; } struct Session { // Instantiating session objects for graph components Edge_Update::Session edge_update; Node_Update::Session node_update; Global_Update::Session global_update; std::vector fEdgeUpdates = std::vector(20*100); std::vector fNodeUpdates = std::vector(5*100); // input vectors for edge update std::vector fEdgeInputs = std::vector(20*200); std::vector fRecNodeInputs = std::vector(20*200); std::vector fSndNodeInputs = std::vector(20*200); std::vector fGlobInputs = std::vector(20*200); // input vectors for node update std::vector fNodeInputs = std::vector(5*200); std::vector fNodeEdgeAggregate = std::vector(5*200, 0); std::vector fNodeAggregateTemp; void infer(TMVA::Experimental::SOFIE::GNN_Data& input_graph){ // --- Edge Update --- size_t n_edges = input_graph.edge_data.GetShape()[0]; if (n_edges > 20) throw std::runtime_error("Number of input edges larger than 20" ); auto receivers = input_graph.edge_index.GetData(); auto senders = input_graph.edge_index.GetData() + n_edges; for (size_t k = 0; k < n_edges; k++) { std::copy(input_graph.edge_data.GetData() + k * 200, input_graph.edge_data.GetData() + (k + 1) * 200, fEdgeInputs.begin() + k * 200); std::copy(input_graph.node_data.GetData() + receivers[k] * 200, input_graph.node_data.GetData() + (receivers[k] + 1) * 200, fRecNodeInputs.begin() + k * 200); std::copy(input_graph.node_data.GetData() + senders[k] * 200, input_graph.node_data.GetData() + (senders[k] + 1) * 200, fSndNodeInputs.begin() + k * 200); std::copy(input_graph.global_data.GetData(), input_graph.global_data.GetData() + 200, fGlobInputs.begin() + k * 200); } fEdgeUpdates = edge_update.infer(n_edges,fEdgeInputs.data(), fRecNodeInputs.data(), fSndNodeInputs.data(), fGlobInputs.data()); // resize edge graph data since output feature size is not equal to input size input_graph.edge_data = input_graph.edge_data.Resize({n_edges, 100}); for (size_t k = 0; k < n_edges; k++) { std::copy(fEdgeUpdates.begin()+ k * 100, fEdgeUpdates.begin()+ (k+1) * 100,input_graph.edge_data.GetData() + k * 100); } // --- Node Update --- size_t n_nodes = input_graph.node_data.GetShape()[0]; for (size_t k = 0; k < n_nodes; k++) { std::copy(input_graph.node_data.GetData() + k * 200, input_graph.node_data.GetData() + (k + 1) * 200, fNodeInputs.begin() + k * 200); } std::fill(fNodeEdgeAggregate.begin(), fNodeEdgeAggregate.end(), 0.); // resize global vector feature to number of nodes if needed if (n_nodes > n_edges) { fGlobInputs.resize( n_nodes * 200); for (size_t k = n_edges; k < n_nodes; k++) std::copy(fGlobInputs.begin(), fGlobInputs.begin() + 200 , fGlobInputs.begin() + k * 200); } // aggregate edges going to a node for (size_t j = 0; j < n_nodes; j++) { std::vector edgesData; edgesData.reserve( int(n_edges/n_nodes) +1); for (size_t k = 0; k < n_edges; k++) { if (receivers[k] == j) edgesData.emplace_back(input_graph.edge_data.GetData() + k * 100); } fNodeAggregateTemp = Aggregate_by_Sum(100,edgesData); std::copy(fNodeAggregateTemp.begin(), fNodeAggregateTemp.end(), fNodeEdgeAggregate.begin() + 100 * j); } fNodeUpdates = node_update.infer(n_nodes,fNodeEdgeAggregate.data(),fNodeInputs.data(),fGlobInputs.data()); // resize node graph data since output feature size is not equal to input size input_graph.node_data = input_graph.node_data.Resize({n_nodes, 100}); for (size_t k = 0; k < n_nodes; k++) { std::copy(fNodeUpdates.begin()+ k * 100, fNodeUpdates.begin() + (k+1) * 100,input_graph.node_data.GetData() + k * 100); } std::vector allEdgesData; allEdgesData.reserve(n_edges); for (size_t k = 0; k < n_edges; k++) { allEdgesData.emplace_back(input_graph.edge_data.GetData() + k * 100); } std::vector allNodesData; allNodesData.reserve(n_nodes); for (size_t k = 0; k < n_nodes; k++) { allNodesData.emplace_back(input_graph.node_data.GetData() + k * 100); } // --- Global Update --- std::vector Edge_Global_Aggregate = Aggregate_by_Sum(100,allEdgesData); std::vector Node_Global_Aggregate = Aggregate_by_Sum(100,allNodesData); std::vector Global_Data = global_update.infer(Edge_Global_Aggregate.data(),Node_Global_Aggregate.data(),input_graph.global_data.GetData()); // resize global graph data since output feature size is not equal to input size input_graph.global_data = input_graph.global_data.Resize({100}); std::copy(Global_Data.begin(), Global_Data.end(), input_graph.global_data.GetData()); } }; } //TMVA_SOFIE_gnn_core #endif // TMVA_SOFIE_ROOT_TMVA_SOFIE_GNN_CORE