Logo ROOT   6.12/07
Reference Guide
TTreeProcessorMT.hxx
Go to the documentation of this file.
1 // @(#)root/thread:$Id$
2 // Author: Enric Tejedor, CERN 12/09/2016
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #ifndef ROOT_TTreeProcessorMT
13 #define ROOT_TTreeProcessorMT
14 
15 #include "TKey.h"
16 #include "TTree.h"
17 #include "TFile.h"
18 #include "TChain.h"
19 #include "TTreeReader.h"
20 #include "TError.h"
21 #include "TEntryList.h"
22 #include "TFriendElement.h"
23 #include "ROOT/TThreadedObject.hxx"
24 
25 #include <string.h>
26 #include <functional>
27 #include <vector>
28 
29 
30 /** \class TTreeView
31  \brief A helper class that encapsulates a file and a tree.
32 
33 A helper class that encapsulates a TFile and a TTree, along with their names.
34 It is used together with TTProcessor and ROOT::TThreadedObject, so that
35 in the TTProcessor::Process method each thread can work on its own
36 <TFile,TTree> pair.
37 
38 This class can also be used with a collection of file names or a TChain, in case
39 the tree is stored in more than one file. A view will always contain only the
40 current (active) tree and file objects.
41 
42 A copy constructor is defined for TTreeView to work with ROOT::TThreadedObject.
43 The latter makes a copy of a model object every time a new thread accesses
44 the threaded object.
45 */
46 
47 namespace ROOT {
48  namespace Internal {
49 
50  /// A cluster of entries as seen by TTreeView
51  struct TreeViewCluster {
54  };
55 
56  class TTreeView {
57  private:
58  typedef std::pair<std::string, std::string> NameAlias;
59 
60  // NOTE: fFriends must come before fChain to be deleted after it, see ROOT-9281 for more details
61  std::vector<std::unique_ptr<TChain>> fFriends; ///< Friends of the tree/chain
62  std::unique_ptr<TChain> fChain; ///< Chain on which to operate
63  std::vector<std::string> fFileNames; ///< Names of the files
64  std::string fTreeName; ///< Name of the tree
65  TEntryList fEntryList; ///< Entry numbers to be processed
66  std::vector<Long64_t> fLoadedEntries; ///<! Per-task loaded entries (for task interleaving)
67  std::vector<NameAlias> fFriendNames; ///< <name,alias> pairs of the friends of the tree/chain
68  std::vector<std::vector<std::string>> fFriendFileNames; ///< Names of the files where friends are stored
69 
70  ////////////////////////////////////////////////////////////////////////////////
71  /// Initialize TTreeView.
72  void Init()
73  {
74  // If the tree name is empty, look for a tree in the file
75  if (fTreeName.empty()) {
77  std::unique_ptr<TFile> f(TFile::Open(fFileNames[0].c_str()));
78  TIter next(f->GetListOfKeys());
79  while (TKey *key = (TKey*)next()) {
80  const char *className = key->GetClassName();
81  if (strcmp(className, "TTree") == 0) {
82  fTreeName = key->GetName();
83  break;
84  }
85  }
86  if (fTreeName.empty()) {
87  auto msg = "Cannot find any tree in file " + fFileNames[0];
88  throw std::runtime_error(msg);
89  }
90  }
91 
92  fChain.reset(new TChain(fTreeName.c_str()));
93  for (auto &fn : fFileNames) {
94  fChain->Add(fn.c_str());
95  }
97 
98  auto friendNum = 0u;
99  for (auto &na : fFriendNames) {
100  auto &name = na.first;
101  auto &alias = na.second;
102 
103  // Build a friend chain
104  TChain *frChain = new TChain(name.c_str());
105  auto &fileNames = fFriendFileNames[friendNum];
106  for (auto &fn : fileNames)
107  frChain->Add(fn.c_str());
108 
109  // Make it friends with the main chain
110  fFriends.emplace_back(frChain);
111  fChain->AddFriend(frChain, alias.c_str());
112 
113  ++friendNum;
114  }
115  }
116 
117  ////////////////////////////////////////////////////////////////////////////////
118  /// Get and store the names, aliases and file names of the friends of the tree.
119  void StoreFriends(const TTree &tree, bool isTree)
120  {
121  auto friends = tree.GetListOfFriends();
122  if (!friends)
123  return;
124 
125  for (auto fr : *friends) {
126  auto frTree = static_cast<TFriendElement *>(fr)->GetTree();
127 
128  // Check if friend tree has an alias
129  auto realName = frTree->GetName();
130  auto alias = tree.GetFriendAlias(frTree);
131  if (alias) {
132  fFriendNames.emplace_back(std::make_pair(realName, std::string(alias)));
133  } else {
134  fFriendNames.emplace_back(std::make_pair(realName, ""));
135  }
136 
137  // Store the file names of the friend tree
138  fFriendFileNames.emplace_back();
139  auto &fileNames = fFriendFileNames.back();
140  if (isTree) {
141  auto f = frTree->GetCurrentFile();
142  fileNames.emplace_back(f->GetName());
143  } else {
144  auto frChain = static_cast<TChain *>(frTree);
145  for (auto f : *(frChain->GetListOfFiles())) {
146  fileNames.emplace_back(f->GetTitle());
147  }
148  }
149  }
150  }
151 
152  public:
153  //////////////////////////////////////////////////////////////////////////
154  /// Constructor based on a file name.
155  /// \param[in] fn Name of the file containing the tree to process.
156  /// \param[in] tn Name of the tree to process. If not provided,
157  /// the implementation will automatically search for a
158  /// tree in the file.
160  {
161  fFileNames.emplace_back(fn);
162  Init();
163  }
164 
165  //////////////////////////////////////////////////////////////////////////
166  /// Constructor based on a collection of file names.
167  /// \param[in] fns Collection of file names containing the tree to process.
168  /// \param[in] tn Name of the tree to process. If not provided,
169  /// the implementation will automatically search for a
170  /// tree in the collection of files.
171  TTreeView(const std::vector<std::string_view>& fns, std::string_view tn) : fTreeName(tn)
172  {
173  if (fns.size() > 0) {
174  for (auto& fn : fns)
175  fFileNames.emplace_back(fn);
176  Init();
177  }
178  else {
179  auto msg = "The provided list of file names is empty, cannot process tree " + fTreeName;
180  throw std::runtime_error(msg);
181  }
182  }
183 
184  //////////////////////////////////////////////////////////////////////////
185  /// Constructor based on a TTree.
186  /// \param[in] tree Tree or chain of files containing the tree to process.
187  TTreeView(TTree& tree) : fTreeName(tree.GetName())
188  {
189  static const TClassRef clRefTChain("TChain");
190  if (clRefTChain == tree.IsA()) {
191  TObjArray* filelist = dynamic_cast<TChain&>(tree).GetListOfFiles();
192  if (filelist->GetEntries() > 0) {
193  for (auto f : *filelist)
194  fFileNames.emplace_back(f->GetTitle());
195  StoreFriends(tree, false);
196  Init();
197  }
198  else {
199  auto msg = "The provided chain of files is empty, cannot process tree " + fTreeName;
200  throw std::runtime_error(msg);
201  }
202  }
203  else {
204  TFile *f = tree.GetCurrentFile();
205  if (f) {
206  fFileNames.emplace_back(f->GetName());
207  StoreFriends(tree, true);
208  Init();
209  }
210  else {
211  auto msg = "The specified TTree is not linked to any file, in-memory-only trees are not supported. Cannot process tree " + fTreeName;
212  throw std::runtime_error(msg);
213  }
214  }
215  }
216 
217  //////////////////////////////////////////////////////////////////////////
218  /// Constructor based on a TTree and a TEntryList.
219  /// \param[in] tree Tree or chain of files containing the tree to process.
220  /// \param[in] entries List of entry numbers to process.
222  {
223  Long64_t numEntries = entries.GetN();
224  for (Long64_t i = 0; i < numEntries; ++i) {
225  fEntryList.Enter(entries.GetEntry(i));
226  }
227  }
228 
229  //////////////////////////////////////////////////////////////////////////
230  /// Copy constructor.
231  /// \param[in] view Object to copy.
232  TTreeView(const TTreeView &view) : fTreeName(view.fTreeName), fEntryList(view.fEntryList)
233  {
234  for (auto& fn : view.fFileNames)
235  fFileNames.emplace_back(fn);
236 
237  for (auto &fn : view.fFriendNames)
238  fFriendNames.emplace_back(fn);
239 
240  for (auto &ffn : view.fFriendFileNames) {
241  fFriendFileNames.emplace_back();
242  auto &fileNames = fFriendFileNames.back();
243  for (auto &name : ffn) {
244  fileNames.emplace_back(name);
245  }
246  }
247 
248  Init();
249  }
250 
251  //////////////////////////////////////////////////////////////////////////
252  /// Get a TTreeReader for the current tree of this view.
253  using TreeReaderEntryListPair = std::pair<std::unique_ptr<TTreeReader>, std::unique_ptr<TEntryList>>;
255  {
256  std::unique_ptr<TTreeReader> reader;
257  std::unique_ptr<TEntryList> elist;
258  if (fEntryList.GetN() > 0) {
259  // TEntryList and SetEntriesRange do not work together (the former has precedence).
260  // We need to construct a TEntryList that contains only those entry numbers
261  // in our desired range.
262  elist.reset(new TEntryList);
263  Long64_t entry = fEntryList.GetEntry(0);
264  do {
265  if (entry >= start && entry < end) // TODO can quit this loop early when entry >= end
266  elist->Enter(entry);
267  } while ((entry = fEntryList.Next()) >= 0);
268 
269  reader.reset(new TTreeReader(fChain.get(), elist.get()));
270  } else {
271  // If no TEntryList is involved we can safely set the range in the reader
272  reader.reset(new TTreeReader(fChain.get()));
273  fChain->LoadTree(start - 1);
274  reader->SetEntriesRange(start, end);
275  }
276 
277  return std::make_pair(std::move(reader), std::move(elist));
278  }
279 
280  //////////////////////////////////////////////////////////////////////////
281  /// Get the filenames for this view.
282  const std::vector<std::string> &GetFileNames() const
283  {
284  return fFileNames;
285  }
286 
287  //////////////////////////////////////////////////////////////////////////
288  /// Get the name of the tree of this view.
289  std::string GetTreeName() const
290  {
291  return fTreeName;
292  }
293 
294  //////////////////////////////////////////////////////////////////////////
295  /// Push a new loaded entry to the stack.
296  void PushLoadedEntry(Long64_t entry) { fLoadedEntries.push_back(entry); }
297 
298  //////////////////////////////////////////////////////////////////////////
299  /// Restore the tree of the previous loaded entry, if any.
301  {
302  fLoadedEntries.pop_back();
303  if (fLoadedEntries.size() > 0) {
304  fChain->LoadTree(fLoadedEntries.back());
305  }
306  }
307  };
308  } // End of namespace Internal
309 
310 
312  private:
314 
315  std::vector<ROOT::Internal::TreeViewCluster> MakeClusters();
316  public:
317  TTreeProcessorMT(std::string_view filename, std::string_view treename = "");
318  TTreeProcessorMT(const std::vector<std::string_view>& filenames, std::string_view treename = "");
321 
322  void Process(std::function<void(TTreeReader&)> func);
323 
324  };
325 
326 } // End of namespace ROOT
327 
328 #endif // defined TTreeProcessorMT
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:145
const std::vector< std::string > & GetFileNames() const
Get the filenames for this view.
An array of TObjects.
Definition: TObjArray.h:37
virtual Long64_t Next()
Return the next non-zero entry index (next after fLastIndexQueried) this function is faster than GetE...
Definition: TEntryList.cxx:890
TTreeView(const std::vector< std::string_view > &fns, std::string_view tn)
Constructor based on a collection of file names.
long long Long64_t
Definition: RtypesCore.h:69
virtual Long64_t GetN() const
Definition: TEntryList.h:75
TTreeReader is a simple, robust and fast interface to read values from a TTree, TChain or TNtuple...
Definition: TTreeReader.h:43
basic_string_view< char > string_view
Definition: RStringView.h:35
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
std::pair< std::unique_ptr< TTreeReader >, std::unique_ptr< TEntryList > > TreeReaderEntryListPair
Get a TTreeReader for the current tree of this view.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
virtual TList * GetListOfFriends() const
Definition: TTree.h:409
A cluster of entries as seen by TTreeView.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3950
std::pair< std::string, std::string > NameAlias
void Init(TClassEdit::TInterpreterLookupHelper *helper)
Definition: TClassEdit.cxx:119
std::string GetTreeName() const
Get the name of the tree of this view.
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:146
virtual Long64_t LoadTree(Long64_t entry)
Find the tree which contains entry, and set it as the current tree.
Definition: TChain.cxx:1260
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:24
std::string fTreeName
Name of the tree.
void Init()
Initialize TTreeView.
std::vector< std::string > fFileNames
Names of the files.
TTreeView(const TTreeView &view)
Copy constructor.
void StoreFriends(const TTree &tree, bool isTree)
Get and store the names, aliases and file names of the friends of the tree.
EEntryStatus SetEntriesRange(Long64_t beginEntry, Long64_t endEntry)
Sets the entry that Next() will stop iteration on.
TTreeView(TTree &tree, TEntryList &entries)
Constructor based on a TTree and a TEntryList.
void RestoreLoadedEntry()
Restore the tree of the previous loaded entry, if any.
TreeReaderEntryListPair GetTreeReader(Long64_t start, Long64_t end)
TFile * GetCurrentFile() const
Return pointer to the current file.
Definition: TTree.cxx:5172
virtual Long64_t GetEntry(Int_t index)
Return the number of the entry #index of this TEntryList in the TTree or TChain See also Next()...
Definition: TEntryList.cxx:657
TTreeView(TTree &tree)
Constructor based on a TTree.
if object destructor must call RecursiveRemove()
Definition: TObject.h:60
std::unique_ptr< TChain > fChain
Chain on which to operate.
virtual TFriendElement * AddFriend(const char *chainname, const char *dummy="")
Add a TFriendElement to the list of friends of this chain.
Definition: TChain.cxx:631
ROOT::TThreadedObject< ROOT::Internal::TTreeView > treeView
! Thread-local TreeViews
virtual Bool_t Enter(Long64_t entry, TTree *tree=0)
Add entry #entry to the list.
Definition: TEntryList.cxx:562
std::vector< Long64_t > fLoadedEntries
! Per-task loaded entries (for task interleaving)
void PushLoadedEntry(Long64_t entry)
Push a new loaded entry to the stack.
TClassRef is used to implement a permanent reference to a TClass object.
Definition: TClassRef.h:29
virtual TList * GetListOfKeys() const
A TFriendElement TF describes a TTree object TF in a file.
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
Int_t GetEntries() const
Return the number of objects in array (i.e.
Definition: TObjArray.cxx:522
Definition: tree.py:1
A TTree object has a header with a name and a title.
Definition: TTree.h:70
#define gDirectory
Definition: TDirectory.h:213
void ResetBit(UInt_t f)
Definition: TObject.h:171
std::vector< NameAlias > fFriendNames
<name,alias> pairs of the friends of the tree/chain
A List of entry numbers in a TTree or TChain.
Definition: TEntryList.h:25
A class to process the entries of a TTree in parallel.
TEntryList fEntryList
Entry numbers to be processed.
std::vector< std::vector< std::string > > fFriendFileNames
Names of the files where friends are stored.
char name[80]
Definition: TGX11.cxx:109
virtual Int_t Add(TChain *chain)
Add all files referenced by the passed chain to this chain.
Definition: TChain.cxx:221
virtual const char * GetFriendAlias(TTree *) const
If the &#39;tree&#39; is a friend, this method returns its alias name.
Definition: TTree.cxx:5714
TTreeView(std::string_view fn, std::string_view tn)
Constructor based on a file name.
std::vector< std::unique_ptr< TChain > > fFriends
Friends of the tree/chain.