ROOT  6.06/09
Reference Guide
TFileCollection.cxx
Go to the documentation of this file.
1 // @(#)root/base:$Id$
2 // Author: Gerhard Erich Bruckner, Jan Fiete Grosse-Oetringhaus 04/06/07
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2007, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /** \class TFileCollection
13 
14 Class that contains a list of TFileInfo's and accumulated meta
15 data information about its entries. This class is used to describe
16 file sets as stored by Grid file catalogs, by PROOF or any other
17 collection of TFile names.
18 */
19 
20 #include "TFileCollection.h"
21 #include "THashList.h"
22 #include "TFileInfo.h"
23 #include "TIterator.h"
24 #include "TMap.h"
25 #include "TObjString.h"
26 #include "TUri.h"
27 #include "TUrl.h"
28 #include "TSystem.h"
29 #include "Riostream.h"
30 #include "TRegexp.h"
31 #include "TPRegexp.h"
32 #include "TError.h"
33 
34 
36 
37 ////////////////////////////////////////////////////////////////////////////////
38 /// TFileCollection constructor. Specify a name and title describing
39 /// the list. If textfile is specified the file is opened and a
40 /// TFileCollection is created containing the files in the textfile.
41 
42 TFileCollection::TFileCollection(const char *name, const char *title,
43  const char *textfile, Int_t nfiles, Int_t firstfile)
44  : TNamed(name, title), fList(0), fMetaDataList(0), fDefaultTree(),
45  fTotalSize(0), fNFiles(0), fNStagedFiles(0), fNCorruptFiles(0)
46 {
47  fList = new THashList();
48  fList->SetOwner();
49 
50  fMetaDataList = new TList;
51  fMetaDataList->SetOwner();
52 
53  AddFromFile(textfile, nfiles, firstfile);
54 }
55 
56 ////////////////////////////////////////////////////////////////////////////////
57 /// Cleanup.
58 
60 {
61  delete fList;
62  delete fMetaDataList;
63 }
64 
65 ////////////////////////////////////////////////////////////////////////////////
66 /// Add TFileInfo to the collection.
67 
69 {
70  if (fList && info) {
71  if (!fList->FindObject(info->GetName())) {
72  fList->Add(info);
73  if (info->GetIndex() < 0) info->SetIndex(fList->GetSize());
74  return 1;
75  } else {
76  Warning("Add", "file: '%s' already in the list - ignoring",
77  info->GetCurrentUrl()->GetUrl());
78  }
79  }
80  return 0;
81 }
82 
83 ////////////////////////////////////////////////////////////////////////////////
84 /// Add content of the TFileCollection to this collection.
85 
87 {
88  if (fList && coll && coll->GetList()) {
89  TIter nxfi(coll->GetList());
90  TFileInfo *fi = 0;
91  while ((fi = (TFileInfo *) nxfi())) {
92  TFileInfo *info = new TFileInfo(*fi);
93  fList->Add(info);
94  if (fi->GetIndex() < 0) info->SetIndex(fList->GetSize());
95  }
96  return 1;
97  } else {
98  return 0;
99  }
100 }
101 
102 ////////////////////////////////////////////////////////////////////////////////
103 /// Add file names contained in the specified text file.
104 /// The file should contain one url per line; empty lines or lines starting with '#'
105 /// (commented lines) are ignored.
106 /// If nfiles > 0 only nfiles files are added, starting from file 'firstfile' (>= 1).
107 /// The method returns the number of added files.
108 
109 Int_t TFileCollection::AddFromFile(const char *textfile, Int_t nfiles, Int_t firstfile)
110 {
111  if (!fList)
112  return 0;
113 
114  Int_t nf = 0;
115  TString fn(textfile);
116  if (!fn.IsNull() && !gSystem->ExpandPathName(fn)) {
117  std::ifstream f;
118  f.open(fn);
119  if (f.is_open()) {
120  Bool_t all = (nfiles <= 0) ? kTRUE : kFALSE;
121  Int_t ff = (!all && (firstfile < 1)) ? 1 : firstfile;
122  Int_t nn = 0;
123  while (f.good() && (all || nf < nfiles)) {
124  TString line;
125  line.ReadToDelim(f);
126  // Skip commented or empty lines
127  if (!line.IsWhitespace() && !line.BeginsWith("#")) {
128  nn++;
129  if (all || nn >= ff) {
130  TFileInfo *info = new TFileInfo(line);
131  fList->Add(info);
132  if (info->GetIndex() < 0) info->SetIndex(fList->GetSize());
133  nf++;
134  }
135  }
136  }
137  f.close();
138  Update();
139  } else
140  Error("AddFromFile", "unable to open file %s (%s)", textfile, fn.Data());
141  }
142  return nf;
143 }
144 
145 ////////////////////////////////////////////////////////////////////////////////
146 /// Add all files matching the specified pattern to the collection.
147 /// 'dir' can include wildcards after the last slash, which causes all
148 /// matching files in that directory to be added.
149 /// If dir is the full path of a file, only one element is added.
150 /// Return value is the number of added files.
151 
152 Int_t TFileCollection::Add(const char *dir)
153 {
154  Int_t nf = 0;
155 
156  if (!fList)
157  return nf;
158 
159  if (!dir || !*dir) {
160  Error("Add", "input dir undefined");
161  return nf;
162  }
163 
164  FileStat_t st;
165  FileStat_t tmp;
166  TString baseDir = gSystem->DirName(dir);
167  // if the 'dir' or its base dir exist
168  if (gSystem->GetPathInfo(dir, st) == 0 ||
169  gSystem->GetPathInfo(baseDir, tmp) == 0) {
170  // If 'dir' points to a single file, add to the list and exit
171  if (R_ISREG(st.fMode)) {
172  // regular, single file
173  TFileInfo *info = new TFileInfo(dir);
174  info->SetBit(TFileInfo::kStaged);
175  Add(info);
176  nf++;
177  Update();
178  return nf;
179  } else {
180  void *dataSetDir = gSystem->OpenDirectory(gSystem->DirName(dir));
181  if (!dataSetDir) {
182  // directory cannot be opened
183  Error("Add", "directory %s cannot be opened",
184  gSystem->DirName(dir));
185  } else {
186  const char *ent;
187  TString filesExp(TString("^") + gSystem->BaseName(dir) + "$");
188  filesExp.ReplaceAll("*",".*");
189  TRegexp rg(filesExp);
190  while ((ent = gSystem->GetDirEntry(dataSetDir))) {
191  TString entryString(ent);
192  if (entryString.Index(rg) != kNPOS) {
193  // matching dir entry
194  TString fn = gSystem->DirName(dir);
195  fn += "/";
196  fn += ent;
197  gSystem->GetPathInfo(fn, st);
198  if (R_ISREG(st.fMode)) {
199  // regular file
200  TFileInfo *info = new TFileInfo(fn);
201  info->SetBit(TFileInfo::kStaged);
202  Add(info);
203  nf++;
204  }
205  }
206  }
207  // close the directory
208  gSystem->FreeDirectory(dataSetDir);
209  Update();
210  }
211  }
212  }
213  return nf;
214 }
215 
216 ////////////////////////////////////////////////////////////////////////////////
217 /// Remove duplicates based on the UUID, typically after a verification.
218 /// Return the number of entries removed.
219 
221 {
222  THashList *hl = new THashList;
223  hl->SetOwner();
224 
225  Int_t n0 = fList->GetSize();
226  TIter nxfi(fList);
227  TFileInfo *fi = 0;
228  while ((fi = (TFileInfo *)nxfi())) {
229  if (!(hl->FindObject(fi->GetUUID()->AsString()))) {
230  // We hash on the UUID
231  fList->Remove(fi);
232  fi->SetName(fi->GetUUID()->AsString());
233  hl->Add(fi);
234  }
235  }
236  delete fList;
237  fList = hl;
238  // How many removed?
239  Int_t nr = n0 - fList->GetSize();
240  if (nr > 0)
241  Info("RemoveDuplicates", "%d duplicates found and removed", nr);
242  // Done
243  return nr;
244 }
245 
246 ////////////////////////////////////////////////////////////////////////////////
247 /// Creates a subset of the files that have the kStaged & !kCorrupted bit set.
248 
250 {
251  if (!fList)
252  return 0;
253 
254  TFileCollection *subset = new TFileCollection(GetName(), GetTitle());
255 
256  TIter iter(fList);
257  TFileInfo *fileInfo = 0;
258  while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next()))) {
259  if (fileInfo->TestBit(TFileInfo::kStaged) && !fileInfo->TestBit(TFileInfo::kCorrupted))
260  subset->Add(fileInfo);
261  }
262 
263  subset->Update();
264 
265  return subset;
266 }
267 
268 ////////////////////////////////////////////////////////////////////////////////
269 /// Merge all TFileCollection objects in li into this TFileCollection object.
270 /// Updates counters at the end.
271 /// Returns the number of merged collections or -1 in case of error.
272 
274 {
275 
276  if (!li) return 0;
277  if (li->IsEmpty()) return 0;
278 
279  Long64_t nentries=0;
280  TIter next(li);
281  while (TObject *o = next()) {
282  TFileCollection* coll = dynamic_cast<TFileCollection*> (o);
283  if (!coll) {
284  Error("Add", "attempt to add object of class: %s to a %s",
285  o->ClassName(),this->ClassName());
286  return -1;
287  }
288  Add(coll);
289  nentries++;
290  }
291  Update();
292 
293  return nentries;
294 }
295 
296 ////////////////////////////////////////////////////////////////////////////////
297 /// Update accumulated information about the elements of the collection
298 /// (e.g. fTotalSize). If 'avgsize' > 0, use an average file size of 'avgsize'
299 /// bytes when the size info is not available.
300 /// Also updates the meta data information by summarizing
301 /// the meta data of the contained objects.
302 /// Return -1 in case of any failure, 0 if the total size is exact, 1 if
303 /// incomplete, 2 if complete but (at least partially) estimated.
304 
306 {
307  if (!fList)
308  return -1;
309 
310  Int_t rc = 0;
311 
312  fTotalSize = 0;
313  fNStagedFiles = 0;
314  fNCorruptFiles = 0;
315 
316  // Clear internal meta information which is going to be rebuilt in this
317  // function
318  TIter nxm(fMetaDataList);
319  TFileInfoMeta *m = 0;
320  while ((m = (TFileInfoMeta *)nxm())) {
321  if (!(m->TestBit(TFileInfoMeta::kExternal))) {
322  fMetaDataList->Remove(m);
323  delete m;
324  }
325  }
326 
327  fNFiles = fList->GetEntries();
328 
329  TIter iter(fList);
330  TFileInfo *fileInfo = 0;
331  while ((fileInfo = dynamic_cast<TFileInfo*> (iter.Next()))) {
332 
333  if (fileInfo->GetSize() > 0) {
334  fTotalSize += fileInfo->GetSize();
335  } else {
336  rc = 1;
337  if (avgsize > 0) {
338  rc = 2;
339  fTotalSize += avgsize;
340  }
341  }
342 
343  if (fileInfo->TestBit(TFileInfo::kStaged) && !fileInfo->TestBit(TFileInfo::kCorrupted)) {
344  fNStagedFiles++;
345 
346  if (fileInfo->GetMetaDataList()) {
347  TIter metaDataIter(fileInfo->GetMetaDataList());
348  // other than TFileInfoMeta is also allowed in list
349  TObject *obj = 0;
350  while ((obj = metaDataIter.Next())) {
351  TFileInfoMeta *metaData = dynamic_cast<TFileInfoMeta*>(obj);
352  if (!metaData)
353  continue;
354  if (!metaData->IsTree())
355  continue;
356 
357  // find corresponding entry in TFileCollection's meta data
358  TFileInfoMeta *metaDataSum = dynamic_cast<TFileInfoMeta*>(fMetaDataList->FindObject(metaData->GetName()));
359  Bool_t newObj = kFALSE;
360  if (!metaDataSum) {
361  // create explicitly, there are some values that do not make sense for the sum
362  metaDataSum = new TFileInfoMeta(metaData->GetName(), metaData->GetTitle());
363  fMetaDataList->Add(metaDataSum);
364  newObj = kTRUE;
365  }
366 
367  // sum the values
368  if (newObj)
369  metaDataSum->SetEntries(metaData->GetEntries());
370  else
371  metaDataSum->SetEntries(metaDataSum->GetEntries() + metaData->GetEntries());
372  }
373  }
374  }
375  if (fileInfo->TestBit(TFileInfo::kCorrupted))
376  fNCorruptFiles++;
377  }
378 
379  // Done
380  return rc;
381 }
382 
383 ////////////////////////////////////////////////////////////////////////////////
384 /// Prints the contents of the TFileCollection.
385 /// If option contains:
386 ///
387 /// - 'M' print global meta information
388 /// - 'F' print all the files in the collection in compact form
389 /// (current url, default tree name|class|entries, md5)
390 /// - 'L' together with 'F', print all the files in the collection
391 /// in long form (uuid, md5, all URLs, all meta objects; on
392 /// many lines)
393 /// - "filter:[SsCc]" invokes PrintDetailed() which prints out dataset
394 /// content in a formatted fashion by filtering on files
395 /// which are (S)taged or not (s), (C)orrupted or not (c)
396 
397 void TFileCollection::Print(Option_t *option) const
398 {
399  TString opt(option);
400  TPMERegexp re("(^|;| )filter:([SsCc]+)( |;|$)", 4);
401  if (re.Match(option) == 4) {
402  TString showOnly = re[2];
403  PrintDetailed(showOnly);
404  return;
405  }
406 
407  Printf("TFileCollection %s - %s contains: %lld files with a size of"
408  " %lld bytes, %.1f %% staged - default tree name: '%s'",
411 
412  if (opt.Contains("M", TString::kIgnoreCase)) {
413  Printf("The files contain the following trees:");
414 
415  TIter metaDataIter(fMetaDataList);
416  TFileInfoMeta* metaData = 0;
417  while ((metaData = dynamic_cast<TFileInfoMeta*>(metaDataIter.Next()))) {
418  if (!metaData->IsTree())
419  continue;
420 
421  Printf("Tree %s: %lld events", metaData->GetName(), metaData->GetEntries());
422  }
423  }
424 
425  if (fList && opt.Contains("F", TString::kIgnoreCase)) {
426  Printf("The collection contains the following files:");
427  if (!opt.Contains("L") && !fDefaultTree.IsNull())
428  opt += TString::Format(" T:%s", fDefaultTree.Data());
429  fList->Print(opt);
430  }
431 }
432 
433 ////////////////////////////////////////////////////////////////////////////////
434 /// Print detailed.
435 
437 {
438  Bool_t bS, bs, bC, bc;
439  bS = bs = bC = bc = kFALSE;
440 
441  if (showOnly.Index('S') >= 0) bS = kTRUE;
442  if (showOnly.Index('s') >= 0) bs = kTRUE;
443  if (showOnly.Index('C') >= 0) bC = kTRUE;
444  if (showOnly.Index('c') >= 0) bc = kTRUE;
445 
446  // If Ss (or Cc) omitted, show both Ss (or Cc)
447  if (!bc && !bC) bc = bC = kTRUE;
448  if (!bs && !bS) bs = bS = kTRUE;
449 
450  TIter it(fList);
451  TFileInfo *info;
452  UInt_t countAll = 0;
453  UInt_t countMatch = 0;
454 
455  Printf("\033[1m #. SC | Entries | Size | URL\033[m");
456 
457  TString um;
458  Double_t sz;
459 
460  while ((info = dynamic_cast<TFileInfo *>(it.Next()))) {
461 
462  Bool_t s = info->TestBit(TFileInfo::kStaged);
464 
465  TUrl *url;
466 
467  countAll++;
468 
469  if ( ((s && bS) || (!s && bs)) && ((c && bC) || (!c && bc)) ) {
470 
471  TFileInfoMeta *meta = info->GetMetaData(); // gets the first one
472  Int_t entries = -1;
473 
474  if (meta) entries = meta->GetEntries();
475 
476  FormatSize(info->GetSize(), um, sz);
477 
478  // First line: current URL with all information
479  info->ResetUrl();
480  TUrl *curUrl = info->GetCurrentUrl();
481  const char *curUrlStr = curUrl ? curUrl->GetUrl() : "n.a.";
482  Printf("\033[1m%4u.\033[m %c%c | %-7s | %6.1lf %s | %s",
483  ++countMatch,
484  (s ? 'S' : 's'), (c ? 'C' : 'c'),
485  ((entries > 0) ? Form("% 7d", entries) : "n.a."),
486  sz, um.Data(), curUrlStr);
487  info->NextUrl();
488 
489  // Every other URL shown below current one
490  while ((url = info->NextUrl())) {
491  Printf(" | | | %s", url->GetUrl());
492  }
493  info->ResetUrl();
494 
495  } // end match filters
496 
497  } // end loop over entries
498 
499  if (countAll) {
500 
501  Printf(">> There are \033[1m%u\033[m file(s) in dataset: "
502  "\033[1m%u (%5.1f%%)\033[m matched your criteria (%s)",
503  countAll, countMatch,
504  100.*(Float_t)countMatch/(Float_t)countAll, showOnly.Data());
505 
506  FormatSize(fTotalSize, um, sz);
507  Printf(">> Total size : \033[1m%.1f %s\033[m", sz, um.Data());
508  Printf(">> Staged (S) : \033[1m%5.1f %%\033[m", GetStagedPercentage());
509  Printf(">> Corrupted (C) : \033[1m%5.1f %%\033[m",
511 
512  }
513  else {
514  Printf(">> No files in dataset");
515  }
516 
517  const char *treeName = GetDefaultTreeName();
518  Printf(">> Default tree : \033[1m%s\033[m",
519  (treeName ? treeName : "(no default tree)"));
520 
521 }
522 
523 ////////////////////////////////////////////////////////////////////////////////
524 /// Format size.
525 
527  Double_t &size) const
528 {
529  static const char *ums[] = { "byt", "KiB", "MiB", "GiB", "TiB" };
530  Int_t maxDiv = sizeof(ums)/sizeof(const char *);
531  Int_t nDiv = 0;
532  Double_t b = bytes;
533 
534  while ((b >= 1024.) && (nDiv+1 < maxDiv)) {
535  b /= 1024.;
536  nDiv++;
537  }
538 
539  um = ums[nDiv];
540  size = b;
541 }
542 
543 ////////////////////////////////////////////////////////////////////////////////
544 /// Calls TUrl::SetAnchor() for all URLs contained in all TFileInfos.
545 
546 void TFileCollection::SetAnchor(const char *anchor)
547 {
548  if (!fList)
549  return;
550 
551  TIter iter(fList);
552  TFileInfo *fileInfo = 0;
553  while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next()))) {
554  fileInfo->ResetUrl();
555  TUrl *url = 0;
556  while ((url = fileInfo->NextUrl()))
557  url->SetAnchor(anchor);
558  fileInfo->ResetUrl();
559  }
560 }
561 
562 ////////////////////////////////////////////////////////////////////////////////
563 /// Set the bit for all TFileInfos
564 
566 {
567  if (!fList)
568  return;
569 
570  TIter iter(fList);
571  TFileInfo *fileInfo = 0;
572  while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
573  fileInfo->SetBit(f);
574 }
575 
576 ////////////////////////////////////////////////////////////////////////////////
577 /// Reset the bit for all TFileInfos
578 
580 {
581  if (!fList)
582  return;
583 
584  TIter iter(fList);
585  TFileInfo *fileInfo = 0;
586  while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
587  fileInfo->ResetBit(f);
588 }
589 
590 ////////////////////////////////////////////////////////////////////////////////
591 /// Returns the tree set with SetDefaultTreeName if set
592 /// Returns the name of the first tree in the meta data list.
593 /// Returns 0 in case no trees are found in the meta data list.
594 
596 {
597  if (fDefaultTree.Length() > 0)
598  return fDefaultTree;
599 
600  TIter metaDataIter(fMetaDataList);
601  TFileInfoMeta *metaData = 0;
602  while ((metaData = dynamic_cast<TFileInfoMeta*>(metaDataIter.Next()))) {
603  if (!metaData->IsTree())
604  continue;
605  return metaData->GetName();
606  }
607  return 0;
608 }
609 
610 ////////////////////////////////////////////////////////////////////////////////
611 /// Returns the number of entries for the specified tree (retrieved from meta data).
612 /// If tree is not specified, use the default tree name.
613 /// Returns -1 in case the specified tree is not found.
614 
616 {
617  if (!tree || !*tree) {
618  tree = GetDefaultTreeName();
619  if (!tree)
620  return -1;
621  }
622 
623  TFileInfoMeta *metaData = dynamic_cast<TFileInfoMeta*>(fMetaDataList->FindObject(tree));
624  if (!metaData)
625  return -1;
626 
627  return metaData->GetEntries();
628 }
629 
630 ////////////////////////////////////////////////////////////////////////////////
631 /// Returns the meta data object with the specified meta name.
632 /// Returns 0 in case specified meta data is not found.
633 
635 {
636  if (!meta || !*meta)
637  return 0;
638 
639  return dynamic_cast<TFileInfoMeta*>(fMetaDataList->FindObject(meta));
640 }
641 
642 ////////////////////////////////////////////////////////////////////////////////
643 /// Moves the indicated meta data in the first position, so that
644 /// it becomes effectively the default.
645 
647 {
648  TFileInfoMeta *fim = GetMetaData(meta);
649  if (fim) {
650  fMetaDataList->Remove(fim);
651  fMetaDataList->AddFirst(fim);
652  }
653 }
654 
655 ////////////////////////////////////////////////////////////////////////////////
656 /// Removes the indicated meta data object in all TFileInfos and this object
657 /// If no name is given all metadata is removed
658 
659 void TFileCollection::RemoveMetaData(const char *meta)
660 {
661  if (fList) {
662  TIter iter(fList);
663  TFileInfo *fileInfo = 0;
664  while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
665  fileInfo->RemoveMetaData(meta);
666  }
667 
668  if (meta) {
669  TObject* obj = fMetaDataList->FindObject("meta");
670  if (obj) {
671  fMetaDataList->Remove(obj);
672  delete obj;
673  }
674  } else
675  fMetaDataList->Clear();
676 }
677 
678 ////////////////////////////////////////////////////////////////////////////////
679 /// Sort the collection.
680 
682 {
683  if (!fList)
684  return;
685 
686  // Make sure the relevant bit has the wanted value
687  if (useindex) {
689  } else {
691  }
692 
693  fList->Sort();
694 }
695 
696 ////////////////////////////////////////////////////////////////////////////////
697 /// Export the relevant info as a string; use 'name' as collection name,
698 /// if defined, else use GetName().
699 /// The output object must be destroyed by the caller
700 
702 {
703  TString treeInfo;
704  if (GetDefaultTreeName()) {
706  if (popt == 1) {
707  treeInfo = GetDefaultTreeName();
708  if (meta)
709  treeInfo += TString::Format(", %lld entries", meta->GetEntries());
710  TFileInfoMeta *frac = GetMetaData("/FractionOfTotal");
711  if (frac)
712  treeInfo += TString::Format(", %3.1f %% of total", frac->GetEntries() / 10.);
713  } else {
714  treeInfo.Form(" %s ", GetDefaultTreeName());
715  if (treeInfo.Length() > 14) treeInfo.Replace(13, 1, '>');
716  treeInfo.Resize(14);
717  if (meta) {
718  if (meta->GetEntries() > 99999999) {
719  treeInfo += TString::Format("| %8lld ", meta->GetEntries());
720  } else {
721  treeInfo += TString::Format("| %8.4g ", (Double_t) meta->GetEntries());
722  }
723  }
724  }
725  } else {
726  treeInfo = " N/A";
727  }
728  if (popt == 0) treeInfo.Resize(25);
729 
730  // Renormalize the size to kB, MB or GB
731  const char *unit[4] = {"kB", "MB", "GB", "TB"};
732  Int_t k = 0;
733  Long64_t refsz = 1024;
734  Long64_t xsz = (Long64_t) (GetTotalSize() / refsz);
735  while (xsz > 1024 && k < 3) {
736  k++;
737  refsz *= 1024;
738  xsz = (Long64_t) (GetTotalSize() / refsz);
739  }
740 
741  // The name
742  TString dsname(name);
743  if (dsname.IsNull()) dsname = GetName();
744 
745  // Create the output string
746  TObjString *outs = 0;
747  if (popt == 1) {
748  outs = new TObjString(Form("%s %lld files, %lld %s, staged %d %%, tree: %s", dsname.Data(),
749  GetNFiles(), xsz, unit[k],
750  (Int_t)GetStagedPercentage(), treeInfo.Data()));
751  } else {
752  outs = new TObjString(Form("%s| %7lld |%s| %5lld %s | %3d %%", dsname.Data(),
753  GetNFiles(), treeInfo.Data(), xsz, unit[k],
755  }
756  // Done
757  return outs;
758 }
759 
760 ////////////////////////////////////////////////////////////////////////////////
761 /// Return the subset of files served by 'server'. The syntax for 'server' is
762 /// the standard URI one, i.e. `[<scheme>://]<host>[:port]`
763 
765 {
767 
768  // Server specification is mandatory
769  if (!server || strlen(server) <= 0) {
770  Info("GetFilesOnServer", "server undefined - do nothing");
771  return fc;
772  }
773 
774  // Nothing to do for empty lists
775  if (!fList || fList->GetSize() <= 0) {
776  Info("GetFilesOnServer", "the list is empty - do nothing");
777  return fc;
778  }
779 
780  // Define the server reference string
781  TUri uri(server);
782  TString srv, scheme("root"), port;
783  if (uri.GetScheme() != "") scheme = uri.GetScheme();
784  if (uri.GetPort() != "") port.Form(":%s", uri.GetPort().Data());
785  srv.Form("%s://%s%s", scheme.Data(), TUrl(server).GetHostFQDN(), port.Data());
786  if (gDebug > 0)
787  Info("GetFilesOnServer", "searching for files on server: '%s' (input: '%s')",
788  srv.Data(), server);
789 
790  // Prepare the output
791  fc = new TFileCollection(GetName());
792  TString title;
793  if (GetTitle() && strlen(GetTitle()) > 0) {
794  title.Form("%s (subset on server %s)", GetTitle(), srv.Data());
795  } else {
796  title.Form("subset of '%s' on server %s", GetName(), srv.Data());
797  }
798  fc->SetTitle(title.Data());
799  // The default tree name
800  fc->SetDefaultTreeName(GetDefaultTreeName());
801 
802  // We look for URL starting with srv
803  srv.Insert(0, "^");
804 
805  // Go through the list
806  TIter nxf(fList);
807  TFileInfo *fi = 0;
808  while ((fi = (TFileInfo *)nxf())) {
809  TUrl *xu = 0;
810  if ((xu = fi->FindByUrl(srv.Data()))) {
811  // Create a new TFileInfo object
812  TFileInfo *nfi = new TFileInfo(xu->GetUrl(), fi->GetSize(),
813  fi->GetUUID() ? fi->GetUUID()->AsString() : 0,
814  fi->GetMD5() ? fi->GetMD5()->AsString() : 0);
815  if (fi->GetMetaDataList()) {
816  TIter nxm(fi->GetMetaDataList());
817  TFileInfoMeta *md = 0;
818  while ((md = (TFileInfoMeta *) nxm())) {
819  nfi->AddMetaData(new TFileInfoMeta(*md));
820  }
821  }
824  if (gDebug > 1)
825  Info("GetFilesOnServer", "adding: %s", xu->GetUrl());
826  fc->Add(nfi);
827  }
828  }
829 
830  // If nothing found, delete the object
831  if (fc->GetList()->GetSize() <= 0) {
832  delete fc;
833  fc = 0;
834  Info("GetFilesOnServer", "dataset '%s' has no files on server: '%s' (searched for: '%s')",
835  GetName(), server, srv.Data());
836  }
837 
838  // Fill up sums on the sub file collection
839  if (fc) {
840  fc->Update();
841  // Fraction of total in permille
842  Long64_t xf = (fc->GetTotalSize() * 1000) / GetTotalSize();
843  TFileInfoMeta *m = new TFileInfoMeta("FractionOfTotal", "External Info", xf);
845  fc->AddMetaData(m);
846  }
847 
848  // Done
849  return fc;
850 }
851 
852 ////////////////////////////////////////////////////////////////////////////////
853 /// Return a map of TFileCollections with the files on each data server,
854 /// excluding servers in the comma-separated list 'exclude'.
855 /// If curronly is kTRUE, only the URL flagged as current in the TFileInfo
856 /// are considered.
857 
858 TMap *TFileCollection::GetFilesPerServer(const char *exclude, Bool_t curronly)
859 {
860  TMap *dsmap = 0;
861 
862  // Nothing to do for empty lists
863  if (!fList || fList->GetSize() <= 0) {
864  Info("GetFilesPerServer", "the list is empty - do nothing");
865  return dsmap;
866  }
867 
868  // List of servers to be ignored
869  THashList *excl = 0;
870  if (exclude && strlen(exclude) > 0) {
871  excl = new THashList;
872  excl->SetOwner();
873  TUri uri;
874  TString srvs(exclude), s, srv, scheme, port;
875  Int_t from = 0;
876  while (srvs.Tokenize(s, from, ",")) {
877  uri.SetUri(s.Data());
878  scheme = "root";
879  port = "";
880  if (uri.GetScheme() != "") scheme = uri.GetScheme();
881  if (uri.GetPort() != "") port.Form(":%s", uri.GetPort().Data());
882  srv.Form("%s://%s%s", scheme.Data(), TUrl(s.Data()).GetHostFQDN(), port.Data());
883  // Add
884  excl->Add(new TObjString(srv.Data()));
885  }
886  }
887 
888  // Prepare the output
889  dsmap = new TMap();
890 
891  // Go through the list
892  TIter nxf(fList);
893  TFileInfo *fi = 0;
894  TUri uri;
895  TString key;
896  TFileCollection *fc = 0;
897  while ((fi = (TFileInfo *)nxf())) {
898  // Save current URL
899  TUrl *curl = fi->GetCurrentUrl();
900  // Loop over URLs
901  if (!curronly) fi->ResetUrl();
902  TUrl *xurl = 0;
903  while ((xurl = (curronly) ? curl : fi->NextUrl())) {
904  // Find the key for this server
905  key.Form("%s://%s", xurl->GetProtocol(), xurl->GetHostFQDN());
906  // Check if this has to be ignored
907  if (excl && excl->FindObject(key.Data())) {
908  if (curronly) break;
909  continue;
910  } else if (excl && xurl->GetPort() > 0) {
911  // Complete the key, if needed, and recheck
912  key += TString::Format(":%d", xurl->GetPort());
913  if (excl->FindObject(key.Data())) {
914  if (curronly) break;
915  continue;
916  }
917  }
918  // Get the map entry for this key
919  TPair *ent = 0;
920  if (!(ent = (TPair *) dsmap->FindObject(key.Data()))) {
921  // Create the TFileCollection
922  fc = new TFileCollection(GetName());
923  TString title;
924  if (GetTitle() && strlen(GetTitle()) > 0) {
925  title.Form("%s (subset on server %s)", GetTitle(), key.Data());
926  } else {
927  title.Form("subset of '%s' on server %s", GetName(), key.Data());
928  }
929  fc->SetTitle(title.Data());
930  // The default tree name
932  // Add it to the map
933  dsmap->Add(new TObjString(key.Data()), fc);
934  // Notify
935  if (gDebug > 0)
936  Info("GetFilesPerServer", "found server: '%s' (fc: %p)", key.Data(), fc);
937  } else {
938  // Attach to the TFileCollection
939  fc = (TFileCollection *) ent->Value();
940  }
941  // Create a new TFileInfo object
942  TFileInfo *nfi = new TFileInfo(xurl->GetUrl(kTRUE), fi->GetSize(),
943  fi->GetUUID() ? fi->GetUUID()->AsString() : 0,
944  fi->GetMD5() ? fi->GetMD5()->AsString() : 0);
945  if (fi->GetMetaDataList()) {
946  TIter nxm(fi->GetMetaDataList());
947  TFileInfoMeta *md = 0;
948  while ((md = (TFileInfoMeta *) nxm())) {
949  nfi->AddMetaData(new TFileInfoMeta(*md));
950  }
951  }
954  fc->Add(nfi);
955  // In current_only mode we are done
956  if (curronly) break;
957  }
958  // Restore current URL
959  fi->SetCurrentUrl(curl);
960  }
961 
962  // Fill up sums on the sub file collections
963  TIter nxk(dsmap);
964  TObject *k = 0;
965  while ((k = nxk()) && (fc = (TFileCollection *) dsmap->GetValue(k))) {
966  fc->Update();
967  // Fraction of total in permille
968  Long64_t xf = (fc->GetTotalSize() * 1000) / GetTotalSize();
969  TFileInfoMeta *m = new TFileInfoMeta("FractionOfTotal", "External Info", xf);
971  fc->AddMetaData(m);
972  }
973 
974  // Cleanup
975  if (excl) delete excl;
976 
977  // Done
978  return dsmap;
979 }
980 
981 ////////////////////////////////////////////////////////////////////////////////
982 /// Add's a meta data object to the file collection object. The object will be
983 /// adopted by the TFileCollection and should not be deleted by the user.
984 /// Typically objects of class TFileInfoMeta or derivatives should be added,
985 /// but any class is accepted.
986 ///
987 /// NB : a call to TFileCollection::Update will remove these objects unless the
988 /// bit TFileInfoMeta::kExternal is set.
989 /// Returns kTRUE if successful, kFALSE otherwise.
990 
992 {
993  if (meta) {
994  if (!fMetaDataList) {
995  fMetaDataList = new TList;
997  }
998  fMetaDataList->Add(meta);
999  return kTRUE;
1000  }
1001  return kFALSE;
1002 }
const char * GetDefaultTreeName() const
Returns the tree set with SetDefaultTreeName if set Returns the name of the first tree in the meta da...
virtual const char * BaseName(const char *pathname)
Base name of a file name. Base name of /user/root is root.
Definition: TSystem.cxx:928
virtual Int_t GetEntries() const
Definition: TCollection.h:92
virtual const char * GetTitle() const
Returns title of object.
Definition: TNamed.h:52
Long64_t GetNFiles() const
ClassImp(TFileCollection) TFileCollection
TFileCollection constructor.
TFileCollection * GetFilesOnServer(const char *server)
Return the subset of files served by 'server'.
long long Long64_t
Definition: RtypesCore.h:69
void SetAnchor(const char *anchor)
Calls TUrl::SetAnchor() for all URLs contained in all TFileInfos.
const TString GetScheme() const
Definition: TUri.h:84
ClassImp(TSeqCollection) Int_t TSeqCollection TIter next(this)
Return index of object in collection.
Ssiz_t Length() const
Definition: TString.h:390
TLine * line
Collectable string class.
Definition: TObjString.h:32
float Float_t
Definition: RtypesCore.h:53
const char Option_t
Definition: RtypesCore.h:62
TObject * FindObject(const char *name) const
Find object using its name.
Definition: THashList.cxx:212
This class represents a WWW compatible URL.
Definition: TUrl.h:41
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
int GetPathInfo(const char *path, Long_t *id, Long_t *size, Long_t *flags, Long_t *modtime)
Get info about a file: id, size, flags, modification time.
Definition: TSystem.cxx:1363
virtual void SetName(const char *name)
Change (i.e.
Definition: TNamed.cxx:128
virtual void SetOwner(Bool_t enable=kTRUE)
Set whether this collection is the owner (enable==true) of its content.
const char * GetProtocol() const
Definition: TUrl.h:73
TUrl * NextUrl()
Iterator function, start iteration by calling ResetUrl().
Definition: TFileInfo.cxx:258
void SetEntries(Long64_t entries)
Definition: TFileInfo.h:158
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
Definition: TObject.cxx:892
virtual void AddFirst(TObject *obj)
Add object at the beginning of the list.
Definition: TList.cxx:92
Regular expression class.
Definition: TRegexp.h:35
void Add(TObject *obj)
This function may not be used (but we need to provide it since it is a pure virtual in TCollection)...
Definition: TMap.cxx:52
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
virtual const char * DirName(const char *pathname)
Return the directory name in pathname.
Definition: TSystem.cxx:996
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
virtual TObject * FindObject(const char *name) const
Find an object in this list using its name.
Definition: TList.cxx:496
This class represents a RFC 3986 compatible URI.
Definition: TUri.h:39
Long64_t GetTotalEntries(const char *tree) const
Returns the number of entries for the specified tree (retrieved from meta data).
void SetIndex(Int_t idx)
Definition: TFileInfo.h:111
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:558
TString & Insert(Ssiz_t pos, const char *s)
Definition: TString.h:592
Bool_t R_ISREG(Int_t mode)
Definition: TSystem.h:129
void SetBit(UInt_t f, Bool_t set)
Set or unset the user status bits as specified in f.
Definition: TObject.cxx:732
TFileInfoMeta * GetMetaData(const char *meta=0) const
Returns the meta data object with the specified meta name.
TString & Replace(Ssiz_t pos, Ssiz_t n, const char *s)
Definition: TString.h:625
Int_t fMode
Definition: TSystem.h:138
virtual void Sort(Bool_t order=kSortAscending)
Sort linked list.
Definition: TList.cxx:770
const char * Data() const
Definition: TString.h:349
Long64_t fNStagedFiles
static struct mg_connection * fc(struct mg_context *ctx)
Definition: civetweb.c:839
Float_t GetStagedPercentage() const
Int_t Update(Long64_t avgsize=-1)
Update accumulated information about the elements of the collection (e.g.
Bool_t SetUri(const TString &uri)
Parse URI and set the member variables accordingly, returns kTRUE if URI validates, and kFALSE otherwise: URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] hier-part = "//" authority path-abempty / path-absolute / path-rootless / path-empty.
Definition: TUri.cxx:600
virtual const char * GetDirEntry(void *dirp)
Get a directory entry. Returns 0 if no more entries.
Definition: TSystem.cxx:847
void FormatSize(Long64_t bytes, TString &um, Double_t &size) const
Format size.
const TString GetPort() const
Definition: TUri.h:90
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString...
Definition: TString.cxx:2334
Bool_t AddMetaData(TObject *meta)
Add's a meta data object to the file info object.
Definition: TFileInfo.cxx:382
const char * AsString() const
Return message digest as string.
Definition: TMD5.cxx:218
THashList implements a hybrid collection class consisting of a hash table and a list to store TObject...
Definition: THashList.h:36
The TNamed class is the base class for all named ROOT classes.
Definition: TNamed.h:33
TFileCollection * GetStagedSubset()
Creates a subset of the files that have the kStaged & !kCorrupted bit set.
virtual Bool_t IsEmpty() const
Definition: TCollection.h:99
std::map< std::string, std::string >::const_iterator iter
Definition: TAlienJob.cxx:54
Bool_t SetCurrentUrl(const char *url)
Set 'url' as current URL, if in the list Return kFALSE if not in the list.
Definition: TFileInfo.cxx:352
TObject * GetValue(const char *keyname) const
Returns a pointer to the value associated with keyname as name of the key.
Definition: TMap.cxx:234
void RemoveMetaData(const char *meta=0)
Removes the indicated meta data object in all TFileInfos and this object If no name is given all meta...
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:918
TUrl * FindByUrl(const char *url, Bool_t withDeflt=kFALSE)
Find an element from a URL. Returns 0 if not found.
Definition: TFileInfo.cxx:274
Long64_t Merge(TCollection *list)
Merge all TFileCollection objects in li into this TFileCollection object.
TObject * FindObject(const char *keyname) const
Check if a (key,value) pair exists with keyname as name of the key.
Definition: TMap.cxx:213
A doubly linked list.
Definition: TList.h:47
TList * GetMetaDataList() const
Definition: TFileInfo.h:94
void Sort(Bool_t useindex=kFALSE)
Sort the collection.
Int_t GetPort() const
Definition: TUrl.h:87
Bool_t AddMetaData(TObject *meta)
Add's a meta data object to the file collection object.
TObjString * ExportInfo(const char *name=0, Int_t popt=0)
Export the relevant info as a string; use 'name' as collection name, if defined, else use GetName()...
TFileInfoMeta * GetMetaData(const char *meta=0) const
Get meta data object with specified name.
Definition: TFileInfo.cxx:422
R__EXTERN TSystem * gSystem
Definition: TSystem.h:549
Int_t RemoveDuplicates()
Remove duplicates based on the UUID, typically after a verification.
THashList * GetList()
Int_t Add(TFileInfo *info)
Add TFileInfo to the collection.
virtual const char * ClassName() const
Returns name of class to which the object belongs.
Definition: TObject.cxx:187
virtual TObject * Remove(TObject *obj)
Remove object from the list.
Definition: TList.cxx:674
TObject * Next()
Definition: TCollection.h:158
TObject * Remove(TObject *obj)
Remove object from the list.
Definition: THashList.cxx:284
Collection abstract base class.
Definition: TCollection.h:48
const char * GetHostFQDN() const
Return fully qualified domain name of url host.
Definition: TUrl.cxx:467
TObject * Value() const
Definition: TMap.h:125
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition: TString.cxx:2321
unsigned int UInt_t
Definition: RtypesCore.h:42
Bool_t TestBit(UInt_t f) const
Definition: TObject.h:173
TMarker * m
Definition: textangle.C:8
char * Form(const char *fmt,...)
Long64_t GetEntries() const
Definition: TFileInfo.h:151
const char * AsString() const
Return UUID as string. Copy string immediately since it will be reused.
Definition: TUUID.cxx:531
Bool_t IsTree() const
Definition: TFileInfo.h:154
Bool_t IsWhitespace() const
Definition: TString.h:388
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
TUUID * GetUUID() const
Definition: TFileInfo.h:92
Bool_t IsNull() const
Definition: TString.h:387
virtual void FreeDirectory(void *dirp)
Free a directory.
Definition: TSystem.cxx:839
std::istream & ReadToDelim(std::istream &str, char delim= '\n')
Read up to an EOF, or a delimiting character, whichever comes first.
Definition: Stringio.cxx:89
#define Printf
Definition: TGeoToOCC.h:18
THashList * fList
Long64_t GetTotalSize() const
const char * GetUrl(Bool_t withDeflt=kFALSE) const
Return full URL.
Definition: TUrl.cxx:385
virtual void Print(Option_t *option="") const
Default print for collections, calls Print(option, 1).
TFileCollection(const TFileCollection &)
Class used by TMap to store (key,value) pairs.
Definition: TMap.h:106
void SetAnchor(const char *anchor)
Definition: TUrl.h:95
virtual Int_t GetSize() const
Definition: TCollection.h:95
void SetDefaultTreeName(const char *treeName)
double f(double x)
double Double_t
Definition: RtypesCore.h:55
TMap implements an associative array of (key,value) pairs using a THashTable for efficient retrieval ...
Definition: TMap.h:44
int nentries
Definition: THbookFile.cxx:89
virtual void Clear(Option_t *option="")
Remove all objects from the list.
Definition: TList.cxx:348
void ResetUrl()
Definition: TFileInfo.h:80
Int_t Match(const TString &s, UInt_t start=0)
Runs a match on s against the regex 'this' was created with.
Definition: TPRegexp.cxx:704
#define name(a, b)
Definition: linkTestLib0.cpp:5
Mother of all ROOT objects.
Definition: TObject.h:58
void Print(Option_t *option="") const
Prints the contents of the TFileCollection.
Int_t AddFromFile(const char *file, Int_t nfiles=-1, Int_t firstfile=1)
Add file names contained in the specified text file.
void SetDefaultMetaData(const char *meta)
Moves the indicated meta data in the first position, so that it becomes effectively the default...
TUrl * GetCurrentUrl() const
Return the current url.
Definition: TFileInfo.cxx:246
Bool_t RemoveMetaData(const char *meta=0)
Remove the metadata object.
Definition: TFileInfo.cxx:399
Long64_t fNCorruptFiles
virtual void Add(TObject *obj)
Definition: TList.h:81
const Ssiz_t kNPOS
Definition: Rtypes.h:115
virtual ~TFileCollection()
Cleanup.
Wrapper for PCRE library (Perl Compatible Regular Expressions).
Definition: TPRegexp.h:103
Class that contains a list of TFileInfo's and accumulated meta data information about its entries...
Float_t GetCorruptedPercentage() const
void ResetBitAll(UInt_t f)
Reset the bit for all TFileInfos.
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:567
virtual void * OpenDirectory(const char *name)
Open a directory. Returns 0 if directory does not exist.
Definition: TSystem.cxx:830
R__EXTERN Int_t gDebug
Definition: Rtypes.h:128
TMD5 * GetMD5() const
Definition: TFileInfo.h:93
Int_t GetIndex() const
Definition: TFileInfo.h:110
Class describing a generic file including meta information.
Definition: TFileInfo.h:50
void ResetBit(UInt_t f)
Definition: TObject.h:172
void PrintDetailed(TString &showOnly) const
Print detailed.
virtual Bool_t ExpandPathName(TString &path)
Expand a pathname getting rid of special shell characters like ~.
Definition: TSystem.cxx:1243
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:582
const Bool_t kTRUE
Definition: Rtypes.h:91
virtual void SetTitle(const char *title="")
Change (i.e. set) the title of the TNamed.
Definition: TNamed.cxx:152
TObject * obj
TMap * GetFilesPerServer(const char *exclude=0, Bool_t curronly=kFALSE)
Return a map of TFileCollections with the files on each data server, excluding servers in the comma-s...
void SetBitAll(UInt_t f)
Set the bit for all TFileInfos.
Long64_t GetSize() const
Definition: TFileInfo.h:91
void Resize(Ssiz_t n)
Resize the string. Truncate or add blanks as necessary.
Definition: TString.cxx:1058
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:904