Logo ROOT   6.08/07
Reference Guide
TSimpleAnalysis.cxx
Go to the documentation of this file.
1 // @(#)root/treeplayer:$Id$
2 // Author: Luca Giommi 22/08/16
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #include "TSimpleAnalysis.h"
13 
14 #include "TFile.h"
15 #include "TChain.h"
16 #include "TChainElement.h"
17 #include "TH1.h"
18 #include "TError.h"
19 #include "TKey.h"
20 #ifdef R__USE_IMT
21 #include "ROOT/TThreadExecutor.hxx"
22 #endif
23 #include "TROOT.h"
24 
25 #include <string>
26 #include <fstream>
27 #include <vector>
28 #include <map>
29 #include <iostream>
30 
31 /** \class TSimpleAnalysis
32 
33 A TSimpleAnalysis object creates histograms from a TChain. These histograms
34 are stored to an output file. The histogrammed (TTreeFormula) expressions,
35 their cuts, the input and output files are configured through a simple config
36 file that allows comments starting with '#'.
37 Here an example of configuration file:
38 ```
39 # This is an example of configuration file
40 file_output.root #the output file in which histograms are stored
41 
42 # The next line has the name of the tree of the input data. It is
43 # optional if there is exactly one tree in the first input file.
44 ntuple #name of the input tree
45 
46 # The lines of the next block correspond to .root input files that
47 # contain the tree
48 hsimple1.root #first .root input file
49 hsimple2.root #second .root input file
50 
51 # The next block is composed by lines that allow to configure the
52 # histograms. They have the following syntax:
53 # NAME = EXPRESSION if CUT
54 # which corresponds to chain->Draw("EXPRESSION >> NAME", "CUT")
55 # i.e. it will create a histogram called NAME and store it in
56 # file_output.root.
57 # "if CUT" is optional
58 hpx=px if px<-3 #first histogram
59 hpxpy=px:py #second histogram
60 
61 # End of the configuration file
62 ```
63 It is possible to use the script rootdrawtree that allows to use the class
64 just in command line through the bash shell.
65 */
66 
67 ////////////////////////////////////////////////////////////////////////////////
68 /// Delete comments, leading and trailing white spaces in a string.
69 ///
70 /// param[in] line - line read from the input file
71 
72 static void DeleteCommentsAndSpaces(std::string& line)
73 {
74  // Delete comments
75  std::size_t comment = line.find('#');
76  line = line.substr(0, comment);
77  // Delete leading spaces
78  std::size_t firstNotSpace = line.find_first_not_of(" \t");
79  if (firstNotSpace != std::string::npos)
80  line = line.substr(firstNotSpace);
81  else {
82  line.clear();
83  return;
84  }
85  // Delete trailing spaces
86  std::size_t lastNotSpace = line.find_last_not_of(" \t");
87  if (lastNotSpace != std::string::npos)
88  line = line.substr(0, lastNotSpace + 1);
89 }
90 
91 ////////////////////////////////////////////////////////////////////////////////
92 /// Handle the expression lines of the input file in order to pass the
93 /// elements to the members of the object.
94 ///
95 /// param[in] line - TTreeFormula expression, either read form the configuration
96 /// file or passed as expression to the constructor
97 
98 std::string TSimpleAnalysis::HandleExpressionConfig(const std::string& line)
99 {
100  static const std::string kCutIntr = " if ";
101 
102  std::size_t equal = line.find("=");
103  if (equal == std::string::npos)
104  return "Error: missing '='";
105 
106  // Set the histName value
107  std::string histName = line.substr(0, equal);
108  DeleteCommentsAndSpaces(histName);
109  if (histName.empty())
110  return "Error: no histName found";
111 
112  //Set the histExpression value
113  std::size_t cutPos = line.find(kCutIntr, equal);
114  std::string histExpression;
115  if (cutPos == std::string::npos)
116  histExpression = line.substr(equal + 1);
117  else
118  histExpression = line.substr(equal + 1, cutPos - equal - 1);
119  DeleteCommentsAndSpaces(histExpression);
120  if (histExpression.empty())
121  return "Error: no expression found";
122 
123  // Set the histCut value
124  std::string histCut;
125  if (cutPos != std::string::npos) {
126  histCut = line.substr(cutPos + kCutIntr.size());
127  DeleteCommentsAndSpaces(histCut);
128  if (histCut.empty())
129  return "Error: missing cut expression after 'if'";
130  }
131  else
132  histCut = "";
133 
134  // Set the map that contains the histName, histExpressions and histCut values
135  auto check = fHists.insert(std::make_pair((const std::string&)histName,
136  std::make_pair(histExpression, histCut)));
137 
138  // Check if there are histograms with the same name
139  if (!check.second)
140  return "Duplicate histogram name";
141  return "";
142 }
143 
144 ////////////////////////////////////////////////////////////////////////////////
145 /// Constructor for the case of command line parsing arguments. It sets the members
146 /// of the object.
147 ///
148 /// \param[in] output - name of the output file
149 /// \param[in] inputFiles - name of the input .root files
150 /// \param[in] expressions - what is shown in the histograms
151 /// \param[in] treeName - name of the tree
152 /// \throws std::runtime_error in case of ill-formed expressions
153 
155  const std::vector<std::string>& inputFiles,
156  const std::vector<std::string>& expressions,
157  const std::string& treeName = ""):
158  fInputFiles(inputFiles), fOutputFile(output), fTreeName(treeName)
159 {
160  for (const std::string& expr: expressions) {
161  std::string errMessage = HandleExpressionConfig(expr);
162  if (!errMessage.empty())
163  throw std::runtime_error(errMessage + " in " + expr);
164  }
165 }
166 
167 ////////////////////////////////////////////////////////////////////////////////
168 /// Extract the name of the tree from the first input file when the tree name
169 /// isn't in the configuration file. Returns the name of the tree.
170 
171 static std::string ExtractTreeName(std::string& firstInputFile)
172 {
173  std::string treeName = "";
174  std::unique_ptr<TFile> inputFile{TFile::Open(firstInputFile.c_str())};
175 
176  // Loop over all the keys inside the first input file
177  for (TObject* keyAsObj : *inputFile->GetListOfKeys()) {
178  TKey* key = static_cast<TKey*>(keyAsObj);
179  TClass* clObj = TClass::GetClass(key->GetClassName());
180  if (!clObj)
181  continue;
182  // If the key is releted to and object that inherits from TTree::Class we
183  // set treeName with the name of this key if treeName is empty, otherwise
184  // error occours
185  if (clObj->InheritsFrom(TTree::Class())) {
186  if (treeName.empty())
187  treeName = key->GetName();
188  else {
189  ::Error("TSimpleAnalysis::Analyze", "Multiple trees inside %s", firstInputFile.c_str());
190  return "";
191  }
192  }
193  }
194  // If treeName is yet empty, error occours
195  if (treeName.empty()) {
196  ::Error("TSimpleAnalysis::Analyze", "No tree inside %s", firstInputFile.c_str());
197  return "";
198  }
199  return treeName;
200 }
201 
202 ////////////////////////////////////////////////////////////////////////////////
203 /// Returns true if there are no errors in TChain::LoadTree()
204 
205 static bool CheckChainLoadResult(TChain* chain)
206 {
207  // Possible return values of TChain::LoadTree()
208  static const char* errors[] {
209  "all good", // 0
210  "empty chain", // -1
211  "invalid entry number", // -2
212  "cannot open the file", // -3
213  "missing tree", // -4
214  "internal error" // -5
215  };
216 
217  bool ret = true;
218  TObjArray *fileElements = chain->GetListOfFiles();
219  TIter next(fileElements);
220  while (TChainElement* chEl = (TChainElement*)next()) {
221  if (chEl->GetLoadResult() < 0) {
222  ::Error("TSimpleAnalysis::Run", "Load failure in file %s: %s",
223  chEl->GetTitle(), errors[-(chEl->GetLoadResult())]);
224  ret = false;
225  }
226  }
227  return ret;
228 }
229 
230 ////////////////////////////////////////////////////////////////////////////////
231 /// Disambiguate tree name from first input file and set up fTreeName if it is
232 /// empty
233 
235 {
236  // Disambiguate tree name from first input file:
237  // just try to open it, if that works it's an input file.
238  if (!fTreeName.empty()) {
239  // Silence possible error message from TFile constructor if this is a tree name.
240  int oldLevel = gErrorIgnoreLevel;
242  if (TFile* probe = TFile::Open(fTreeName.c_str())) {
243  if (!probe->IsZombie()) {
244  fInputFiles.insert(fInputFiles.begin(), fTreeName);
245  fTreeName.clear();
246  }
247  delete probe;
248  }
249  gErrorIgnoreLevel = oldLevel;
250  }
251  // If fTreeName is empty we try to find the name of the tree through reading
252  // of the first input file
253  if (fTreeName.empty())
255  if (fTreeName.empty()) // No tree name found
256  return false;
257  return true;
258 }
259 
260 ////////////////////////////////////////////////////////////////////////////////
261 /// Execute all the TChain::Draw() as configured and stores the output histograms.
262 /// Returns true if the analysis succeeds.
263 
265 {
266  if (!SetTreeName())
267  return false;
268 
269  // Create the output file and check if it fails
270  TFile ofile(fOutputFile.c_str(), "RECREATE");
271  if (ofile.IsZombie()) {
272  ::Error("TSimpleAnalysis::Run", "Impossible to create %s", fOutputFile.c_str());
273  return false;
274  }
275 
276  // Store the histograms into a vector
277  auto generateHisto = [&](const std::pair<TChain*, TDirectory*>& job) {
278  TChain* chain = job.first;
279  TDirectory* taskDir = job.second;
280  taskDir->cd();
281  std::vector<TH1F *> vPtrHisto(fHists.size());
282  // Index for a correct set up of vPtrHisto
283  int i = 0;
284 
285  // Loop over all the histograms
286  for (const auto &histo : fHists) {
287  const std::string& expr = histo.second.first;
288  const std::string& histoName = histo.first;
289  const std::string& cut = histo.second.second;
290 
291  chain->Draw((expr + ">>" + histoName).c_str(), cut.c_str(), "goff");
292  TH1F *ptrHisto = (TH1F*)taskDir->Get(histoName.c_str());
293 
294  // Check if there are errors inside the chain
295  if (!CheckChainLoadResult(chain))
296  return std::vector<TH1F *>();
297 
298  vPtrHisto[i] = ptrHisto;
299  ++i;
300  }
301  return vPtrHisto;
302  };
303 
304 #if 0
305  // The MT version is currently disabled because reading emulated objects
306  // triggers a lock for every object read. This in turn increases the run
307  // time way beyond the serial case.
308 
309 
311  ROOT::TThreadExecutor pool(8);
312 
313  // Do the chain of the fInputFiles
314  std::vector<std::pair<TChain*, TDirectory*>> vChains;
315  for (size_t i = 0; i < fInputFiles.size(); ++i){
316  const std::string& inputfile = fInputFiles[i];
317  TChain *ch;
318  ch = new TChain(fTreeName.c_str());
319  ch->Add(inputfile.c_str());
320 
321  // Create task-specific TDirectory, so avoid parallel tasks to interfere
322  // in gDirectory with histogram registration.
323  TDirectory* taskDir = gROOT->mkdir(TString::Format("TSimpleAnalysis_taskDir_%d", (int)i));
324 
325  vChains.emplace_back(std::make_pair(ch, taskDir));
326  }
327 
328  auto vFileswHists = pool.Map(generateHisto, vChains);
329 
330  // If a file does not exist, one of the vFileswHists
331  // will be a vector of length 0. Detect that.
332  for (auto&& histsOfJob: vFileswHists) {
333  if (histsOfJob.empty())
334  return false;
335  }
336 
337  // Merge the results. Initialize the result with the first task's results,
338  // then add the other tasks.
339  std::vector<TH1F *> vPtrHisto{vFileswHists[0]};
340  ofile.cd();
341  for (unsigned j = 0; j < fHists.size(); j++) {
342  for (unsigned i = 1; i < vFileswHists.size(); i++) {
343  if (!vFileswHists[i][j]) {
344  // ignore that sum histogram:
345  delete vPtrHisto[j];
346  vPtrHisto[j] = nullptr;
347  continue;
348  }
349  if (vPtrHisto[j])
350  vPtrHisto[j]->Add(vFileswHists[i][j]);
351  }
352  if (vPtrHisto[j])
353  vPtrHisto[j]->Write();
354  }
355  return true;
356 
357 #else
358 
359  // Do the chain of the fInputFiles
360  TChain* chain = new TChain(fTreeName.c_str());
361  for (const std::string& inputfile: fInputFiles)
362  chain->Add(inputfile.c_str());
363 
364  // Generate histograms
365  auto vHisto = generateHisto({chain, gDirectory});
366  if (vHisto.empty())
367  return false;
368  ofile.cd();
369  // Store the histograms
370  for (auto histo: vHisto) {
371  if (histo)
372  histo->Write();
373  }
374  return true;
375 
376 #endif
377 }
378 
379 ////////////////////////////////////////////////////////////////////////////////
380 /// Returns false if not a tree name, otherwise sets the name of the tree.
381 ///
382 /// param[in] line - line read from the input file
383 
385 {
386  if (line.find("=") == std::string::npos) {
387  fInputFiles.push_back(line);
388  return true;
389  }
390  return false; // It's an expression
391 }
392 
393 ////////////////////////////////////////////////////////////////////////////////
394 /// Skip subsequent empty lines read from fIn and returns the next not empty line.
395 ///
396 /// param[in] numbLine - number of the input file line
397 
398 std::string TSimpleAnalysis::GetLine(int& numbLine)
399 {
400  std::string notEmptyLine;
401 
402  do {
403  getline(fIn, notEmptyLine);
404  DeleteCommentsAndSpaces(notEmptyLine);
405  numbLine++;
406  } while (fIn && notEmptyLine.empty());
407 
408  return notEmptyLine;
409 }
410 
411 ////////////////////////////////////////////////////////////////////////////////
412 /// This function has the aim of setting the arguments read from the input file.
413 
415 {
416  int readingSection = kReadingOutput;
417  std::string line;
418  int numbLine = 0;
419 
420  // Error if the input file does not exist
421  fIn.open(fConfigFile);
422  if (!fIn) {
423  ::Error("TSimpleAnalysis", "File %s not found", fConfigFile.c_str());
424  return false;
425  }
426 
427  while (!fIn.eof()) {
428  line = GetLine(numbLine);
429  if (line.empty()) // It can happen if fIn.eof()
430  continue;
431  std::string errMessage;
432 
433  switch (readingSection) {
434 
435  // Set the name of the output file
436  case kReadingOutput:
437  fOutputFile = line;
438  readingSection++;
439  break;
440 
441  // Set the name of the tree
442  case kReadingTreeName:
443  fTreeName = line;
444  readingSection++;
445  break;
446 
447  // Set the input files
448  case kReadingInput:
449  if (!HandleInputFileNameConfig(line)) {
450  // Not an input file name; try to parse as an expression
451  errMessage = HandleExpressionConfig(line);
452  readingSection = kReadingExpressions;
453  }
454  break;
455 
456  // Set the expressions
457  case kReadingExpressions:
458  errMessage = HandleExpressionConfig(line);
459  break;
460  }
461 
462  // Report any errors if occour during the configuration proceedings
463  if (!errMessage.empty()) {
464  ::Error("TSimpleAnalysis::Configure", "%s in %s:%d", errMessage.c_str(),
465  fConfigFile.c_str(), numbLine);
466  return false;
467  }
468  } // while (!fIn.eof())
469  return true;
470 }
471 
472 ////////////////////////////////////////////////////////////////////////////////
473 /// Function that allows to create the TSimpleAnalysis object and execute its
474 /// Configure and Analyze functions.
475 ///
476 /// param[in] configurationFile - name of the input file used to create the TSimpleAnalysis object
477 
478 bool RunSimpleAnalysis (const char* configurationFile) {
479  TSimpleAnalysis obj(configurationFile);
480  if (!obj.Configure())
481  return false;
482  if (!obj.Run())
483  return false;
484  return true; // Return true only if Configure() and Run() functions were performed correctly
485 }
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
std::string fConfigFile
Name of the configuration file.
std::map< std::string, std::pair< std::string, std::string > > fHists
An array of TObjects.
Definition: TObjArray.h:39
bool Run()
Execute all the TChain::Draw() as configured and stores the output histograms.
R__EXTERN Int_t gErrorIgnoreLevel
Definition: TError.h:107
A TSimpleAnalysis object creates histograms from a TChain.
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
Definition: TDirectory.cxx:729
TLine * line
std::string HandleExpressionConfig(const std::string &line)
Handle the expression lines of the input file in order to pass the elements to the members of the obj...
static const std::string comment("comment")
bool SetTreeName()
Disambiguate tree name from first input file and set up fTreeName if it is empty. ...
bool RunSimpleAnalysis(const char *configurationFile)
Function that allows to create the TSimpleAnalysis object and execute its Configure and Analyze funct...
bool equal(double d1, double d2, double stol=10000)
virtual const char * GetClassName() const
Definition: TKey.h:77
static bool CheckChainLoadResult(TChain *chain)
Returns true if there are no errors in TChain::LoadTree()
bool Configure()
This function has the aim of setting the arguments read from the input file.
Reading the name of the .root input files.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:50
#define gROOT
Definition: TROOT.h:364
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
const char * Class
Definition: TXMLSetup.cxx:64
A TChainElement describes a component of a TChain.
Definition: TChainElement.h:30
std::string GetLine(int &numbLine)
Skip subsequent empty lines read from fIn and returns the next not empty line.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3907
TObjArray * GetListOfFiles() const
Definition: TChain.h:109
virtual Long64_t Draw(const char *varexp, const TCut &selection, Option_t *option="", Long64_t nentries=kMaxEntries, Long64_t firstentry=0)
Draw expression varexp for selected entries.
Definition: TChain.cxx:758
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString...
Definition: TString.cxx:2335
const Int_t kFatal
Definition: TError.h:44
std::string fTreeName
Name of the input tree.
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:30
void Error(const char *location, const char *msgfmt,...)
std::ifstream fIn
Stream for the input file.
The ROOT global object gROOT contains a list of all defined classes.
Definition: TClass.h:81
Bool_t InheritsFrom(const char *cl) const
Return kTRUE if this class inherits from a class with name "classname".
Definition: TClass.cxx:4610
Reading the name of the tree.
bool HandleInputFileNameConfig(const std::string &line)
Returns false if not a tree name, otherwise sets the name of the tree.
void EnableThreadSafety()
Enables the global mutex to make ROOT thread safe/aware.
Definition: TROOT.cxx:498
static std::string ExtractTreeName(std::string &firstInputFile)
Extract the name of the tree from the first input file when the tree name isn&#39;t in the configuration ...
Describe directory structure in memory.
Definition: TDirectory.h:44
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2893
std::vector< std::string > fInputFiles
.root input files
Mother of all ROOT objects.
Definition: TObject.h:37
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:435
A chain is a collection of files containg TTree objects.
Definition: TChain.h:35
static void DeleteCommentsAndSpaces(std::string &line)
Delete comments, leading and trailing white spaces in a string.
Reading the name of the output file.
TSimpleAnalysis(const std::string &file)
#define gDirectory
Definition: TDirectory.h:221
std::string fOutputFile
Output file in which are stored the histograms.
virtual Int_t Add(TChain *chain)
Add all files referenced by the passed chain to this chain.
Definition: TChain.cxx:214