Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ReadSpeed.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, David Poulton 2022
2
3/*************************************************************************
4 * Copyright (C) 1995-2022, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOTREADSPEED
12#define ROOTREADSPEED
13
14#include <TFile.h>
15
16#include <string>
17#include <vector>
18#include <regex>
19
20namespace ReadSpeed {
21
22struct Data {
23 /// Either a single tree name common for all files, or one tree name per file.
24 std::vector<std::string> fTreeNames;
25 /// List of input files.
26 std::vector<std::string> fFileNames;
27 /// Branches to read.
28 std::vector<std::string> fBranchNames;
29 /// If the branch names should use regex matching.
30 bool fUseRegex = false;
31};
32
33struct Result {
34 /// Real time spent reading and decompressing all data, in seconds.
35 double fRealTime;
36 /// CPU time spent reading and decompressing all data, in seconds.
37 double fCpuTime;
38 /// Real time spent preparing the multi-thread workload.
40 /// CPU time spent preparing the multi-thread workload.
42 /// Number of uncompressed bytes read in total from TTree branches.
44 /// Number of compressed bytes read in total from the TFiles.
46 /// Size of ROOT's thread pool for the run (0 indicates a single-thread run with no thread pool present).
47 unsigned int fThreadPoolSize;
48};
49
50struct EntryRange {
53};
54
55struct ByteData {
58};
59
61 std::string text;
62 std::regex regex;
63
64 bool operator<(const ReadSpeedRegex &other) const { return text < other.text; }
65};
66
67std::vector<std::string> GetMatchingBranchNames(const std::string &fileName, const std::string &treeName,
68 const std::vector<ReadSpeedRegex> &regexes);
69
70// Read branches listed in branchNames in tree treeName in file fileName, return number of uncompressed bytes read.
71ByteData ReadTree(TFile *file, const std::string &treeName, const std::vector<std::string> &branchNames,
72 EntryRange range = {-1, -1});
73
74Result EvalThroughputST(const Data &d);
75
76// Return a vector of EntryRanges per file, i.e. a vector of vectors of EntryRanges with outer size equal to
77// d.fFileNames.
78std::vector<std::vector<EntryRange>> GetClusters(const Data &d);
79
80// Mimic the logic of TTreeProcessorMT::MakeClusters: merge entry ranges together such that we
81// run around TTreeProcessorMT::GetTasksPerWorkerHint tasks per worker thread.
82// TODO it would be better to expose TTreeProcessorMT's actual logic and call the exact same method from here
83std::vector<std::vector<EntryRange>>
84MergeClusters(std::vector<std::vector<EntryRange>> &&clusters, unsigned int maxTasksPerFile);
85
86Result EvalThroughputMT(const Data &d, unsigned nThreads);
87
88Result EvalThroughput(const Data &d, unsigned nThreads);
89
90} // namespace ReadSpeed
91
92#endif // ROOTREADSPEED
#define d(i)
Definition RSha256.hxx:102
long long Long64_t
Definition RtypesCore.h:69
unsigned long long ULong64_t
Definition RtypesCore.h:70
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:53
Result EvalThroughputST(const Data &d)
std::vector< std::string > GetMatchingBranchNames(const std::string &fileName, const std::string &treeName, const std::vector< ReadSpeedRegex > &regexes)
Definition ReadSpeed.cxx:37
std::vector< std::vector< EntryRange > > GetClusters(const Data &d)
Result EvalThroughputMT(const Data &d, unsigned nThreads)
Result EvalThroughput(const Data &d, unsigned nThreads)
std::vector< std::vector< EntryRange > > MergeClusters(std::vector< std::vector< EntryRange > > &&clusters, unsigned int maxTasksPerFile)
ByteData ReadTree(TFile *file, const std::string &treeName, const std::vector< std::string > &branchNames, EntryRange range={-1, -1})
ULong64_t fUncompressedBytesRead
Definition ReadSpeed.hxx:56
ULong64_t fCompressedBytesRead
Definition ReadSpeed.hxx:57
std::vector< std::string > fFileNames
List of input files.
Definition ReadSpeed.hxx:26
bool fUseRegex
If the branch names should use regex matching.
Definition ReadSpeed.hxx:30
std::vector< std::string > fBranchNames
Branches to read.
Definition ReadSpeed.hxx:28
std::vector< std::string > fTreeNames
Either a single tree name common for all files, or one tree name per file.
Definition ReadSpeed.hxx:24
bool operator<(const ReadSpeedRegex &other) const
Definition ReadSpeed.hxx:64
double fCpuTime
CPU time spent reading and decompressing all data, in seconds.
Definition ReadSpeed.hxx:37
double fMTSetupRealTime
Real time spent preparing the multi-thread workload.
Definition ReadSpeed.hxx:39
ULong64_t fCompressedBytesRead
Number of compressed bytes read in total from the TFiles.
Definition ReadSpeed.hxx:45
ULong64_t fUncompressedBytesRead
Number of uncompressed bytes read in total from TTree branches.
Definition ReadSpeed.hxx:43
double fRealTime
Real time spent reading and decompressing all data, in seconds.
Definition ReadSpeed.hxx:35
double fMTSetupCpuTime
CPU time spent preparing the multi-thread workload.
Definition ReadSpeed.hxx:41
unsigned int fThreadPoolSize
Size of ROOT's thread pool for the run (0 indicates a single-thread run with no thread pool present).
Definition ReadSpeed.hxx:47