Logo ROOT   6.14/05
Reference Guide
TTreeProcessorMT.cxx
Go to the documentation of this file.
1 // @(#)root/thread:$Id$
2 // Author: Enric Tejedor, CERN 12/09/2016
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /** \class ROOT::TTreeProcessorMT
13  \ingroup Parallelism
14  \brief A class to process the entries of a TTree in parallel.
15 
16 By means of its Process method, ROOT::TTreeProcessorMT provides a way to process the
17 entries of a TTree in parallel. When invoking TTreeProcessor::Process, the user
18 passes a function whose only parameter is a TTreeReader. The function iterates
19 on a subrange of entries by using that TTreeReader.
20 
21 The implementation of ROOT::TTreeProcessorMT parallelizes the processing of the subranges,
22 each corresponding to a cluster in the TTree. This is possible thanks to the use
23 of a ROOT::TThreadedObject, so that each thread works with its own TFile and TTree
24 objects.
25 */
26 
27 #include "TROOT.h"
29 #include "ROOT/TThreadExecutor.hxx"
30 
31 using namespace ROOT;
32 
33 namespace ROOT {
34 namespace Internal {
35 ////////////////////////////////////////////////////////////////////////
36 /// Return a vector of cluster boundaries for the given tree and files.
38 MakeClusters(const std::string &treeName, const std::vector<std::string> &fileNames)
39 {
40  // Note that as a side-effect of opening all files that are going to be used in the
41  // analysis once, all necessary streamers will be loaded into memory.
43  std::vector<EntryCluster> clusters;
44  std::vector<Long64_t> nEntries;
45  const auto nFileNames = fileNames.size();
46  Long64_t offset = 0ll;
47  for (auto i = 0u; i < nFileNames; ++i) {
48  std::unique_ptr<TFile> f(TFile::Open(fileNames[i].c_str())); // need TFile::Open to load plugins if need be
49  TTree *t = nullptr; // not a leak, t will be deleted by f
50  f->GetObject(treeName.c_str(), t);
51  auto clusterIter = t->GetClusterIterator(0);
52  Long64_t start = 0ll, end = 0ll;
53  const Long64_t entries = t->GetEntries();
54  nEntries.emplace_back(entries);
55  // Iterate over the clusters in the current file
56  while ((start = clusterIter()) < entries) {
57  end = clusterIter.GetNextEntry();
58  // Add the current file's offset to start and end to make them (chain) global
59  clusters.emplace_back(EntryCluster{start + offset, end + offset});
60  }
61  offset += entries;
62  }
63 
64  return std::make_pair(std::move(clusters), std::move(nEntries));
65 }
66 
67 ////////////////////////////////////////////////////////////////////////
68 /// Return a vector containing the number of entries of each file of each friend TChain
69 std::vector<std::vector<Long64_t>> GetFriendEntries(const std::vector<std::pair<std::string, std::string>> &friendNames,
70  const std::vector<std::vector<std::string>> &friendFileNames)
71 {
72  std::vector<std::vector<Long64_t>> friendEntries;
73  const auto nFriends = friendNames.size();
74  for (auto i = 0u; i < nFriends; ++i) {
75  std::vector<Long64_t> nEntries;
76  const auto &thisFriendName = friendNames[i].first;
77  const auto &thisFriendFiles = friendFileNames[i];
78  for (const auto &fname : thisFriendFiles) {
79  std::unique_ptr<TFile> f(TFile::Open(fname.c_str()));
80  TTree *t = nullptr; // owned by TFile
81  f->GetObject(thisFriendName.c_str(), t);
82  nEntries.emplace_back(t->GetEntries());
83  }
84  friendEntries.emplace_back(std::move(nEntries));
85  }
86 
87  return friendEntries;
88 }
89 
90 ////////////////////////////////////////////////////////////////////////
91 /// Return the full path of the tree
92 static std::string GetTreeFullPath(const TTree &tree)
93 {
94  // Case 1: this is a TChain: we get the name out of the first TChainElement
95  if (0 == strcmp("TChain", tree.ClassName())) {
96  auto &chain = dynamic_cast<const TChain&>(tree);
97  auto files = chain.GetListOfFiles();
98  if (files && 0 != files->GetEntries()) {
99  return files->At(0)->GetName();
100  }
101  }
102 
103  // Case 2: this is a TTree: we get the full path of it
104  if (auto motherDir = tree.GetDirectory()) {
105  std::string fullPath(motherDir->GetPath());
106  fullPath += "/";
107  fullPath += tree.GetName();
108  return fullPath;
109  }
110 
111  // We do our best and return the name of the tree
112  return tree.GetName();
113 }
114 
115 TTreeView::TTreeView(TTree& tree) : fTreeName(GetTreeFullPath(tree))
116 {
117  static const TClassRef clRefTChain("TChain");
118  if (clRefTChain == tree.IsA()) {
119  TObjArray* filelist = static_cast<TChain&>(tree).GetListOfFiles();
120  if (filelist->GetEntries() > 0) {
121  for (auto f : *filelist)
122  fFileNames.emplace_back(f->GetTitle());
123  StoreFriends(tree, false);
124  }
125  else {
126  auto msg = "The provided chain of files is empty, cannot process tree " + fTreeName;
127  throw std::runtime_error(msg);
128  }
129  }
130  else {
131  TFile *f = tree.GetCurrentFile();
132  if (f) {
133  fFileNames.emplace_back(f->GetName());
134  StoreFriends(tree, true);
135  }
136  else {
137  auto msg = "The specified TTree is not linked to any file, in-memory-only trees are not supported. Cannot process tree " + fTreeName;
138  throw std::runtime_error(msg);
139  }
140  }
141 }
142 
143 }
144 }
145 
146 ////////////////////////////////////////////////////////////////////////
147 /// Constructor based on a file name.
148 /// \param[in] filename Name of the file containing the tree to process.
149 /// \param[in] treename Name of the tree to process. If not provided,
150 /// the implementation will automatically search for a
151 /// tree in the file.
152 TTreeProcessorMT::TTreeProcessorMT(std::string_view filename, std::string_view treename) : treeView(filename, treename) {}
153 
154 ////////////////////////////////////////////////////////////////////////
155 /// Constructor based on a collection of file names.
156 /// \param[in] filenames Collection of the names of the files containing the tree to process.
157 /// \param[in] treename Name of the tree to process. If not provided,
158 /// the implementation will automatically search for a
159 /// tree in the collection of files.
160 TTreeProcessorMT::TTreeProcessorMT(const std::vector<std::string_view> &filenames, std::string_view treename) : treeView(filenames, treename) {}
161 
162 ////////////////////////////////////////////////////////////////////////
163 /// Constructor based on a TTree.
164 /// \param[in] tree Tree or chain of files containing the tree to process.
166 
167 ////////////////////////////////////////////////////////////////////////
168 /// Constructor based on a TTree and a TEntryList.
169 /// \param[in] tree Tree or chain of files containing the tree to process.
170 /// \param[in] entries List of entry numbers to process.
171 TTreeProcessorMT::TTreeProcessorMT(TTree &tree, TEntryList &entries) : treeView(tree, entries) {}
172 
173 //////////////////////////////////////////////////////////////////////////////
174 /// Process the entries of a TTree in parallel. The user-provided function
175 /// receives a TTreeReader which can be used to iterate on a subrange of
176 /// entries
177 /// ~~~{.cpp}
178 /// TTreeProcessorMT::Process([](TTreeReader& readerSubRange) {
179 /// // Select branches to read
180 /// while (readerSubRange.next()) {
181 /// // Use content of current entry
182 /// }
183 /// });
184 /// ~~~
185 /// The user needs to be aware that each of the subranges can potentially
186 /// be processed in parallel. This means that the code of the user function
187 /// should be thread safe.
188 ///
189 /// \param[in] func User-defined function that processes a subrange of entries
191 {
192  // Enable this IMT use case (activate its locks)
194 
195  const auto clustersAndEntries = ROOT::Internal::MakeClusters(treeView->GetTreeName(), treeView->GetFileNames());
196  const auto &clusters = clustersAndEntries.first;
197  const auto &entries = clustersAndEntries.second;
198 
199  const auto friendEntries =
201 
202  auto mapFunction = [this, &func, &entries, &friendEntries](const ROOT::Internal::EntryCluster &c) {
203  // This task will operate with the tree that contains start
204  treeView->PushTaskFirstEntry(c.start);
205 
206  std::unique_ptr<TTreeReader> reader;
207  std::unique_ptr<TEntryList> elist;
208  std::tie(reader, elist) = treeView->GetTreeReader(c.start, c.end, entries, friendEntries);
209  func(*reader);
210 
211  // In case of task interleaving, we need to load here the tree of the parent task
213  };
214 
215  // Assume number of threads has been initialized via ROOT::EnableImplicitMT
216  TThreadExecutor pool;
217  pool.Foreach(mapFunction, clusters);
218 }
void Foreach(F func, unsigned nTimes)
Execute func (with no arguments) nTimes in parallel.
const std::vector< std::string > & GetFileNames() const
Get the filenames for this view.
An array of TObjects.
Definition: TObjArray.h:37
long long Long64_t
Definition: RtypesCore.h:69
TTreeReader is a simple, robust and fast interface to read values from a TTree, TChain or TNtuple...
Definition: TTreeReader.h:43
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
#define f(i)
Definition: RSha256.hxx:104
TObject * At(Int_t idx) const
Definition: TObjArray.h:165
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3976
TObjArray * GetListOfFiles() const
Definition: TChain.h:107
A cluster of entries.
std::string GetTreeName() const
Get the name of the tree of this view.
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:146
const std::vector< NameAlias > & GetFriendNames() const
std::string fTreeName
Name of the tree.
std::vector< std::string > fFileNames
Names of the files.
void StoreFriends(const TTree &tree, bool isTree)
Get and store the names, aliases and file names of the friends of the tree.
This class provides a simple interface to execute the same task multiple times in parallel...
void PopTaskFirstEntry()
Restore the tree of the previous loaded entry, if any.
std::vector< std::vector< Long64_t > > GetFriendEntries(const std::vector< std::pair< std::string, std::string >> &friendNames, const std::vector< std::vector< std::string >> &friendFileNames)
Return a vector containing the number of entries of each file of each friend TChain.
TreeReaderEntryListPair GetTreeReader(Long64_t start, Long64_t end, const std::vector< Long64_t > &nEntries, const std::vector< std::vector< Long64_t >> &friendEntries)
Get a TTreeReader for the current tree of this view.
const std::vector< std::vector< std::string > > & GetFriendFileNames() const
void PushTaskFirstEntry(Long64_t entry)
Push a new loaded entry to the stack.
void Process(std::function< void(TTreeReader &)> func)
Process the entries of a TTree in parallel.
ClustersAndEntries MakeClusters(const std::string &treename, const std::vector< std::string > &filenames)
Return a vector of cluster boundaries for the given tree and files.
basic_string_view< char > string_view
Definition: RStringView.hxx:35
ROOT::TThreadedObject< ROOT::Internal::TTreeView > treeView
! Thread-local TreeViews
std::pair< std::vector< EntryCluster >, std::vector< Long64_t > > ClustersAndEntries
TClassRef is used to implement a permanent reference to a TClass object.
Definition: TClassRef.h:29
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
Int_t GetEntries() const
Return the number of objects in array (i.e.
Definition: TObjArray.cxx:522
#define c(i)
Definition: RSha256.hxx:101
Definition: tree.py:1
TTreeProcessorMT(std::string_view filename, std::string_view treename="")
Constructor based on a file name.
virtual const char * GetName() const
Returns name of object.
Definition: TObject.cxx:357
A List of entry numbers in a TTree or TChain.
Definition: TEntryList.h:25
TTreeView(std::string_view fn, std::string_view tn)
Constructor based on a file name.
static std::string GetTreeFullPath(const TTree &tree)
Return the full path of the tree.