Logo ROOT  
Reference Guide
TFileCollection.cxx
Go to the documentation of this file.
1// @(#)root/base:$Id$
2// Author: Gerhard Erich Bruckner, Jan Fiete Grosse-Oetringhaus 04/06/07
3
4/*************************************************************************
5 * Copyright (C) 1995-2007, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/** \class TFileCollection
13\ingroup Base
14
15Class that contains a list of TFileInfo's and accumulated meta
16data information about its entries. This class is used to describe
17file sets as stored by Grid file catalogs, by PROOF or any other
18collection of TFile names.
19*/
20
21#include "TFileCollection.h"
22#include "THashList.h"
23#include "TFileInfo.h"
24#include "TIterator.h"
25#include "TMap.h"
26#include "TObjString.h"
27#include "TUri.h"
28#include "TUrl.h"
29#include "TUUID.h"
30#include "TMD5.h"
31#include "TSystem.h"
32#include "TRegexp.h"
33#include "TPRegexp.h"
34#include "TError.h"
35
36#include <iostream>
37#include <fstream>
38
39
41
42////////////////////////////////////////////////////////////////////////////////
43/// TFileCollection constructor. Specify a name and title describing
44/// the list. If textfile is specified the file is opened and a
45/// TFileCollection is created containing the files in the textfile.
46
47TFileCollection::TFileCollection(const char *name, const char *title,
48 const char *textfile, Int_t nfiles, Int_t firstfile)
49 : TNamed(name, title), fList(nullptr), fMetaDataList(nullptr), fDefaultTree(),
50 fTotalSize(0), fNFiles(0), fNStagedFiles(0), fNCorruptFiles(0)
51{
52 fList = new THashList();
53 fList->SetOwner();
54
55 fMetaDataList = new TList;
57
58 AddFromFile(textfile, nfiles, firstfile);
59}
60
61////////////////////////////////////////////////////////////////////////////////
62/// Cleanup.
63
65{
66 delete fList;
67 delete fMetaDataList;
68}
69
70////////////////////////////////////////////////////////////////////////////////
71/// Add TFileInfo to the collection.
72
74{
75 if (fList && info) {
76 if (!fList->FindObject(info->GetName())) {
77 fList->Add(info);
78 if (info->GetIndex() < 0) info->SetIndex(fList->GetSize());
79 Update();
80 return 1;
81 } else {
82 Warning("Add", "file: '%s' already in the list - ignoring",
83 info->GetCurrentUrl()->GetUrl());
84 }
85 }
86 return 0;
87}
88
89////////////////////////////////////////////////////////////////////////////////
90/// Add content of the TFileCollection to this collection.
91
93{
94 if (fList && coll && coll->GetList()) {
95 TIter nxfi(coll->GetList());
96 TFileInfo *fi = nullptr;
97 while ((fi = (TFileInfo *) nxfi())) {
98 TFileInfo *info = new TFileInfo(*fi);
99 fList->Add(info);
100 if (fi->GetIndex() < 0) info->SetIndex(fList->GetSize());
101 }
102 Update();
103 return 1;
104 } else {
105 return 0;
106 }
107}
108
109////////////////////////////////////////////////////////////////////////////////
110/// Add file names contained in the specified text file.
111/// The file should contain one url per line; empty lines or lines starting with '#'
112/// (commented lines) are ignored.
113/// If nfiles > 0 only nfiles files are added, starting from file 'firstfile' (>= 1).
114/// The method returns the number of added files.
115
116Int_t TFileCollection::AddFromFile(const char *textfile, Int_t nfiles, Int_t firstfile)
117{
118 if (!fList)
119 return 0;
120
121 Int_t nf = 0;
122 TString fn(textfile);
123 if (!fn.IsNull() && !gSystem->ExpandPathName(fn)) {
124 std::ifstream f;
125 f.open(fn);
126 if (f.is_open()) {
127 Bool_t all = (nfiles <= 0) ? kTRUE : kFALSE;
128 Int_t ff = (!all && (firstfile < 1)) ? 1 : firstfile;
129 Int_t nn = 0;
130 while (f.good() && (all || nf < nfiles)) {
132 line.ReadToDelim(f);
133 // Skip commented or empty lines
134 if (!line.IsWhitespace() && !line.BeginsWith("#")) {
135 nn++;
136 if (all || nn >= ff) {
137 TFileInfo *info = new TFileInfo(line);
138 fList->Add(info);
139 if (info->GetIndex() < 0) info->SetIndex(fList->GetSize());
140 nf++;
141 }
142 }
143 }
144 f.close();
145 Update();
146 } else
147 Error("AddFromFile", "unable to open file %s (%s)", textfile, fn.Data());
148 }
149 return nf;
150}
151
152////////////////////////////////////////////////////////////////////////////////
153/// Add all files matching the specified pattern to the collection.
154/// 'dir' can include wildcards after the last slash, which causes all
155/// matching files in that directory to be added.
156/// If dir is the full path of a file, only one element is added.
157/// Return value is the number of added files.
158
160{
161 Int_t nf = 0;
162
163 if (!fList)
164 return nf;
165
166 if (!dir || !*dir) {
167 Error("Add", "input dir undefined");
168 return nf;
169 }
170
171 FileStat_t st;
172 FileStat_t tmp;
173 TString baseDir = gSystem->GetDirName(dir);
174 // if the 'dir' or its base dir exist
175 if (gSystem->GetPathInfo(dir, st) == 0 ||
176 gSystem->GetPathInfo(baseDir, tmp) == 0) {
177 // If 'dir' points to a single file, add to the list and exit
178 if (R_ISREG(st.fMode)) {
179 // regular, single file
180 TFileInfo *info = new TFileInfo(dir);
182 Add(info);
183 nf++;
184 Update();
185 return nf;
186 } else {
187 void *dataSetDir = gSystem->OpenDirectory(gSystem->GetDirName(dir).Data());
188 if (!dataSetDir) {
189 // directory cannot be opened
190 Error("Add", "directory %s cannot be opened",
191 gSystem->GetDirName(dir).Data());
192 } else {
193 const char *ent;
194 TString filesExp(TString("^") + gSystem->BaseName(dir) + "$");
195 filesExp.ReplaceAll("*",".*");
196 TRegexp rg(filesExp);
197 while ((ent = gSystem->GetDirEntry(dataSetDir))) {
198 TString entryString(ent);
199 if (entryString.Index(rg) != kNPOS) {
200 // matching dir entry
201 TString fn = gSystem->GetDirName(dir);
202 fn += "/";
203 fn += ent;
204 gSystem->GetPathInfo(fn, st);
205 if (R_ISREG(st.fMode)) {
206 // regular file
207 TFileInfo *info = new TFileInfo(fn);
209 Add(info);
210 nf++;
211 }
212 }
213 }
214 // close the directory
215 gSystem->FreeDirectory(dataSetDir);
216 Update();
217 }
218 }
219 }
220 return nf;
221}
222
223////////////////////////////////////////////////////////////////////////////////
224/// Remove duplicates based on the UUID, typically after a verification.
225/// Return the number of entries removed.
226
228{
229 THashList *hl = new THashList;
230 hl->SetOwner();
231
232 Int_t n0 = fList->GetSize();
233 TIter nxfi(fList);
234 TFileInfo *fi = nullptr;
235 while ((fi = (TFileInfo *)nxfi())) {
236 if (!(hl->FindObject(fi->GetUUID()->AsString()))) {
237 // We hash on the UUID
238 fList->Remove(fi);
239 fi->SetName(fi->GetUUID()->AsString());
240 hl->Add(fi);
241 }
242 }
243 delete fList;
244 fList = hl;
245 // How many removed?
246 Int_t nr = n0 - fList->GetSize();
247 if (nr > 0)
248 Info("RemoveDuplicates", "%d duplicates found and removed", nr);
249 // Done
250 return nr;
251}
252
253////////////////////////////////////////////////////////////////////////////////
254/// Creates a subset of the files that have the kStaged & !kCorrupted bit set.
255
257{
258 if (!fList)
259 return nullptr;
260
262
263 TIter iter(fList);
264 TFileInfo *fileInfo = nullptr;
265 while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next()))) {
266 if (fileInfo->TestBit(TFileInfo::kStaged) && !fileInfo->TestBit(TFileInfo::kCorrupted))
267 subset->Add(fileInfo);
268 }
269
270 subset->Update();
271
272 return subset;
273}
274
275////////////////////////////////////////////////////////////////////////////////
276/// Merge all TFileCollection objects in li into this TFileCollection object.
277/// Updates counters at the end.
278/// Returns the number of merged collections or -1 in case of error.
279
281{
282
283 if (!li) return 0;
284 if (li->IsEmpty()) return 0;
285
287 TIter next(li);
288 while (TObject *o = next()) {
289 TFileCollection* coll = dynamic_cast<TFileCollection*> (o);
290 if (!coll) {
291 Error("Add", "attempt to add object of class: %s to a %s",
292 o->ClassName(),this->ClassName());
293 return -1;
294 }
295 Add(coll);
296 nentries++;
297 }
298 Update();
299
300 return nentries;
301}
302
303////////////////////////////////////////////////////////////////////////////////
304/// Update accumulated information about the elements of the collection
305/// (e.g. fTotalSize). If 'avgsize' > 0, use an average file size of 'avgsize'
306/// bytes when the size info is not available.
307/// Also updates the meta data information by summarizing
308/// the meta data of the contained objects.
309/// Return -1 in case of any failure, 0 if the total size is exact, 1 if
310/// incomplete, 2 if complete but (at least partially) estimated.
311
313{
314 if (!fList)
315 return -1;
316
317 Int_t rc = 0;
318
319 fTotalSize = 0;
320 fNStagedFiles = 0;
321 fNCorruptFiles = 0;
322
323 // Clear internal meta information which is going to be rebuilt in this
324 // function
325 TIter nxm(fMetaDataList);
326 TFileInfoMeta *m = nullptr;
327 while ((m = (TFileInfoMeta *)nxm())) {
328 if (!(m->TestBit(TFileInfoMeta::kExternal))) {
330 delete m;
331 }
332 }
333
335
336 TIter iter(fList);
337 TFileInfo *fileInfo = nullptr;
338 while ((fileInfo = dynamic_cast<TFileInfo*> (iter.Next()))) {
339
340 if (fileInfo->GetSize() > 0) {
341 fTotalSize += fileInfo->GetSize();
342 } else {
343 rc = 1;
344 if (avgsize > 0) {
345 rc = 2;
346 fTotalSize += avgsize;
347 }
348 }
349
350 if (fileInfo->TestBit(TFileInfo::kStaged) && !fileInfo->TestBit(TFileInfo::kCorrupted)) {
352
353 if (fileInfo->GetMetaDataList()) {
354 TIter metaDataIter(fileInfo->GetMetaDataList());
355 // other than TFileInfoMeta is also allowed in list
356 TObject *obj = nullptr;
357 while ((obj = metaDataIter.Next())) {
358 TFileInfoMeta *metaData = dynamic_cast<TFileInfoMeta*>(obj);
359 if (!metaData)
360 continue;
361 if (!metaData->IsTree())
362 continue;
363
364 // find corresponding entry in TFileCollection's meta data
365 TFileInfoMeta *metaDataSum = dynamic_cast<TFileInfoMeta*>(fMetaDataList->FindObject(metaData->GetName()));
366 Bool_t newObj = kFALSE;
367 if (!metaDataSum) {
368 // create explicitly, there are some values that do not make sense for the sum
369 metaDataSum = new TFileInfoMeta(metaData->GetName(), metaData->GetTitle());
370 fMetaDataList->Add(metaDataSum);
371 newObj = kTRUE;
372 }
373
374 // sum the values
375 if (newObj)
376 metaDataSum->SetEntries(metaData->GetEntries());
377 else
378 metaDataSum->SetEntries(metaDataSum->GetEntries() + metaData->GetEntries());
379 }
380 }
381 }
382 if (fileInfo->TestBit(TFileInfo::kCorrupted))
384 }
385
386 // Done
387 return rc;
388}
389
390////////////////////////////////////////////////////////////////////////////////
391/// Prints the contents of the TFileCollection.
392/// If option contains:
393///
394/// - 'M' print global meta information
395/// - 'F' print all the files in the collection in compact form
396/// (current url, default tree name|class|entries, md5)
397/// - 'L' together with 'F', print all the files in the collection
398/// in long form (uuid, md5, all URLs, all meta objects; on
399/// many lines)
400/// - "filter:[SsCc]" invokes PrintDetailed() which prints out dataset
401/// content in a formatted fashion by filtering on files
402/// which are (S)taged or not (s), (C)orrupted or not (c)
403
405{
406 TString opt(option);
407 TPMERegexp re("(^|;| )filter:([SsCc]+)( |;|$)", 4);
408 if (re.Match(option) == 4) {
409 TString showOnly = re[2];
410 PrintDetailed(showOnly);
411 return;
412 }
413
414 Printf("TFileCollection %s - %s contains: %lld files with a size of"
415 " %lld bytes, %.1f %% staged - default tree name: '%s'",
418
419 if (opt.Contains("M", TString::kIgnoreCase)) {
420 Printf("The files contain the following trees:");
421
422 TIter metaDataIter(fMetaDataList);
423 TFileInfoMeta* metaData = nullptr;
424 while ((metaData = dynamic_cast<TFileInfoMeta*>(metaDataIter.Next()))) {
425 if (!metaData->IsTree())
426 continue;
427
428 Printf("Tree %s: %lld events", metaData->GetName(), metaData->GetEntries());
429 }
430 }
431
432 if (fList && opt.Contains("F", TString::kIgnoreCase)) {
433 Printf("The collection contains the following files:");
434 if (!opt.Contains("L") && !fDefaultTree.IsNull())
435 opt += TString::Format(" T:%s", fDefaultTree.Data());
436 fList->Print(opt);
437 }
438}
439
440////////////////////////////////////////////////////////////////////////////////
441/// Print detailed.
442
444{
445 Bool_t bS, bs, bC, bc;
446 bS = bs = bC = bc = kFALSE;
447
448 if (showOnly.Index('S') >= 0) bS = kTRUE;
449 if (showOnly.Index('s') >= 0) bs = kTRUE;
450 if (showOnly.Index('C') >= 0) bC = kTRUE;
451 if (showOnly.Index('c') >= 0) bc = kTRUE;
452
453 // If Ss (or Cc) omitted, show both Ss (or Cc)
454 if (!bc && !bC) bc = bC = kTRUE;
455 if (!bs && !bS) bs = bS = kTRUE;
456
457 TIter it(fList);
458 TFileInfo *info;
459 UInt_t countAll = 0;
460 UInt_t countMatch = 0;
461
462 Printf("\033[1m #. SC | Entries | Size | URL\033[m");
463
464 TString um;
465 Double_t sz;
466
467 while ((info = dynamic_cast<TFileInfo *>(it.Next()))) {
468
471
472 TUrl *url;
473
474 countAll++;
475
476 if ( ((s && bS) || (!s && bs)) && ((c && bC) || (!c && bc)) ) {
477
478 TFileInfoMeta *meta = info->GetMetaData(); // gets the first one
479 Int_t entries = -1;
480
481 if (meta) entries = meta->GetEntries();
482
483 FormatSize(info->GetSize(), um, sz);
484
485 // First line: current URL with all information
486 info->ResetUrl();
487 TUrl *curUrl = info->GetCurrentUrl();
488 const char *curUrlStr = curUrl ? curUrl->GetUrl() : "n.a.";
489 Printf("\033[1m%4u.\033[m %c%c | %-7s | %6.1lf %s | %s",
490 ++countMatch,
491 (s ? 'S' : 's'), (c ? 'C' : 'c'),
492 ((entries > 0) ? Form("% 7d", entries) : "n.a."),
493 sz, um.Data(), curUrlStr);
494 info->NextUrl();
495
496 // Every other URL shown below current one
497 while ((url = info->NextUrl())) {
498 Printf(" | | | %s", url->GetUrl());
499 }
500 info->ResetUrl();
501
502 } // end match filters
503
504 } // end loop over entries
505
506 if (countAll) {
507
508 Printf(">> There are \033[1m%u\033[m file(s) in dataset: "
509 "\033[1m%u (%5.1f%%)\033[m matched your criteria (%s)",
510 countAll, countMatch,
511 100.*(Float_t)countMatch/(Float_t)countAll, showOnly.Data());
512
514 Printf(">> Total size : \033[1m%.1f %s\033[m", sz, um.Data());
515 Printf(">> Staged (S) : \033[1m%5.1f %%\033[m", GetStagedPercentage());
516 Printf(">> Corrupted (C) : \033[1m%5.1f %%\033[m",
518
519 }
520 else {
521 Printf(">> No files in dataset");
522 }
523
524 const char *treeName = GetDefaultTreeName();
525 Printf(">> Default tree : \033[1m%s\033[m",
526 (treeName ? treeName : "(no default tree)"));
527
528}
529
530////////////////////////////////////////////////////////////////////////////////
531/// Format size.
532
534 Double_t &size) const
535{
536 static const char *ums[] = { "byt", "KiB", "MiB", "GiB", "TiB" };
537 Int_t maxDiv = sizeof(ums)/sizeof(const char *);
538 Int_t nDiv = 0;
539 Double_t b = bytes;
540
541 while ((b >= 1024.) && (nDiv+1 < maxDiv)) {
542 b /= 1024.;
543 nDiv++;
544 }
545
546 um = ums[nDiv];
547 size = b;
548}
549
550////////////////////////////////////////////////////////////////////////////////
551/// Calls TUrl::SetAnchor() for all URLs contained in all TFileInfos.
552
553void TFileCollection::SetAnchor(const char *anchor)
554{
555 if (!fList)
556 return;
557
558 TIter iter(fList);
559 TFileInfo *fileInfo = nullptr;
560 while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next()))) {
561 fileInfo->ResetUrl();
562 TUrl *url = nullptr;
563 while ((url = fileInfo->NextUrl()))
564 url->SetAnchor(anchor);
565 fileInfo->ResetUrl();
566 }
567}
568
569////////////////////////////////////////////////////////////////////////////////
570/// Set the bit for all TFileInfos
571
573{
574 if (!fList)
575 return;
576
577 TIter iter(fList);
578 TFileInfo *fileInfo = nullptr;
579 while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
580 fileInfo->SetBit(f);
581}
582
583////////////////////////////////////////////////////////////////////////////////
584/// Reset the bit for all TFileInfos
585
587{
588 if (!fList)
589 return;
590
591 TIter iter(fList);
592 TFileInfo *fileInfo = nullptr;
593 while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
594 fileInfo->ResetBit(f);
595}
596
597////////////////////////////////////////////////////////////////////////////////
598/// Returns the tree set with SetDefaultTreeName if set
599/// Returns the name of the first tree in the meta data list.
600/// Returns 0 in case no trees are found in the meta data list.
601
603{
604 if (fDefaultTree.Length() > 0)
605 return fDefaultTree;
606
607 TIter metaDataIter(fMetaDataList);
608 TFileInfoMeta *metaData = nullptr;
609 while ((metaData = dynamic_cast<TFileInfoMeta*>(metaDataIter.Next()))) {
610 if (!metaData->IsTree())
611 continue;
612 return metaData->GetName();
613 }
614 return nullptr;
615}
616
617////////////////////////////////////////////////////////////////////////////////
618/// Returns the number of entries for the specified tree (retrieved from meta data).
619/// If tree is not specified, use the default tree name.
620/// Returns -1 in case the specified tree is not found.
621
623{
624 if (!tree || !*tree) {
626 if (!tree)
627 return -1;
628 }
629
630 TFileInfoMeta *metaData = dynamic_cast<TFileInfoMeta*>(fMetaDataList->FindObject(tree));
631 if (!metaData)
632 return -1;
633
634 return metaData->GetEntries();
635}
636
637////////////////////////////////////////////////////////////////////////////////
638/// Returns the meta data object with the specified meta name.
639/// Returns 0 in case specified meta data is not found.
640
642{
643 if (!meta || !*meta)
644 return nullptr;
645
646 return dynamic_cast<TFileInfoMeta*>(fMetaDataList->FindObject(meta));
647}
648
649////////////////////////////////////////////////////////////////////////////////
650/// Moves the indicated meta data in the first position, so that
651/// it becomes effectively the default.
652
654{
655 TFileInfoMeta *fim = GetMetaData(meta);
656 if (fim) {
657 fMetaDataList->Remove(fim);
659 }
660}
661
662////////////////////////////////////////////////////////////////////////////////
663/// Removes the indicated meta data object in all TFileInfos and this object
664/// If no name is given all metadata is removed
665
667{
668 if (fList) {
669 TIter iter(fList);
670 TFileInfo *fileInfo = nullptr;
671 while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
672 fileInfo->RemoveMetaData(meta);
673 }
674
675 if (meta) {
676 TObject* obj = fMetaDataList->FindObject("meta");
677 if (obj) {
678 fMetaDataList->Remove(obj);
679 delete obj;
680 }
681 } else
683}
684
685////////////////////////////////////////////////////////////////////////////////
686/// Sort the collection.
687
689{
690 if (!fList)
691 return;
692
693 // Make sure the relevant bit has the wanted value
694 if (useindex) {
696 } else {
698 }
699
700 fList->Sort();
701}
702
703////////////////////////////////////////////////////////////////////////////////
704/// Export the relevant info as a string; use 'name' as collection name,
705/// if defined, else use GetName().
706/// The output object must be destroyed by the caller
707
709{
710 TString treeInfo;
711 if (GetDefaultTreeName()) {
713 if (popt == 1) {
714 treeInfo = GetDefaultTreeName();
715 if (meta)
716 treeInfo += TString::Format(", %lld entries", meta->GetEntries());
717 TFileInfoMeta *frac = GetMetaData("/FractionOfTotal");
718 if (frac)
719 treeInfo += TString::Format(", %3.1f %% of total", frac->GetEntries() / 10.);
720 } else {
721 treeInfo.Form(" %s ", GetDefaultTreeName());
722 if (treeInfo.Length() > 14) treeInfo.Replace(13, 1, '>');
723 treeInfo.Resize(14);
724 if (meta) {
725 if (meta->GetEntries() > 99999999) {
726 treeInfo += TString::Format("| %8lld ", meta->GetEntries());
727 } else {
728 treeInfo += TString::Format("| %8.4g ", (Double_t) meta->GetEntries());
729 }
730 }
731 }
732 } else {
733 treeInfo = " N/A";
734 }
735 if (popt == 0) treeInfo.Resize(25);
736
737 // Renormalize the size to kB, MB or GB
738 const char *unit[4] = {"kB", "MB", "GB", "TB"};
739 Int_t k = 0;
740 Long64_t refsz = 1024;
741 Long64_t xsz = (Long64_t) (GetTotalSize() / refsz);
742 while (xsz > 1024 && k < 3) {
743 k++;
744 refsz *= 1024;
745 xsz = (Long64_t) (GetTotalSize() / refsz);
746 }
747
748 // The name
749 TString dsname(name);
750 if (dsname.IsNull()) dsname = GetName();
751
752 // Create the output string
753 TObjString *outs = nullptr;
754 if (popt == 1) {
755 outs = new TObjString(Form("%s %lld files, %lld %s, staged %d %%, tree: %s", dsname.Data(),
756 GetNFiles(), xsz, unit[k],
757 (Int_t)GetStagedPercentage(), treeInfo.Data()));
758 } else {
759 outs = new TObjString(Form("%s| %7lld |%s| %5lld %s | %3d %%", dsname.Data(),
760 GetNFiles(), treeInfo.Data(), xsz, unit[k],
762 }
763 // Done
764 return outs;
765}
766
767////////////////////////////////////////////////////////////////////////////////
768/// Return the subset of files served by 'server'. The syntax for 'server' is
769/// the standard URI one, i.e. `[<scheme>://]<host>[:port]`
770
772{
773 TFileCollection *fc = nullptr;
774
775 // Server specification is mandatory
776 if (!server || strlen(server) <= 0) {
777 Info("GetFilesOnServer", "server undefined - do nothing");
778 return fc;
779 }
780
781 // Nothing to do for empty lists
782 if (!fList || fList->GetSize() <= 0) {
783 Info("GetFilesOnServer", "the list is empty - do nothing");
784 return fc;
785 }
786
787 // Define the server reference string
788 TUri uri(server);
789 TString srv, scheme("root"), port;
790 if (uri.GetScheme() != "") scheme = uri.GetScheme();
791 if (uri.GetPort() != "") port.Form(":%s", uri.GetPort().Data());
792 srv.Form("%s://%s%s", scheme.Data(), TUrl(server).GetHostFQDN(), port.Data());
793 if (gDebug > 0)
794 Info("GetFilesOnServer", "searching for files on server: '%s' (input: '%s')",
795 srv.Data(), server);
796
797 // Prepare the output
798 fc = new TFileCollection(GetName());
799 TString title;
800 if (GetTitle() && strlen(GetTitle()) > 0) {
801 title.Form("%s (subset on server %s)", GetTitle(), srv.Data());
802 } else {
803 title.Form("subset of '%s' on server %s", GetName(), srv.Data());
804 }
805 fc->SetTitle(title.Data());
806 // The default tree name
807 fc->SetDefaultTreeName(GetDefaultTreeName());
808
809 // We look for URL starting with srv
810 srv.Insert(0, "^");
811
812 // Go through the list
813 TIter nxf(fList);
814 TFileInfo *fi = nullptr;
815 while ((fi = (TFileInfo *)nxf())) {
816 TUrl *xu = nullptr;
817 if ((xu = fi->FindByUrl(srv.Data()))) {
818 // Create a new TFileInfo object
819 TFileInfo *nfi = new TFileInfo(xu->GetUrl(), fi->GetSize(),
820 fi->GetUUID() ? fi->GetUUID()->AsString() : nullptr,
821 fi->GetMD5() ? fi->GetMD5()->AsString() : nullptr);
822 if (fi->GetMetaDataList()) {
823 TIter nxm(fi->GetMetaDataList());
824 TFileInfoMeta *md = nullptr;
825 while ((md = (TFileInfoMeta *) nxm())) {
826 nfi->AddMetaData(new TFileInfoMeta(*md));
827 }
828 }
831 if (gDebug > 1)
832 Info("GetFilesOnServer", "adding: %s", xu->GetUrl());
833 fc->Add(nfi);
834 }
835 }
836
837 // If nothing found, delete the object
838 if (fc->GetList()->GetSize() <= 0) {
839 delete fc;
840 fc = nullptr;
841 Info("GetFilesOnServer", "dataset '%s' has no files on server: '%s' (searched for: '%s')",
842 GetName(), server, srv.Data());
843 }
844
845 // Fill up sums on the sub file collection
846 if (fc) {
847 fc->Update();
848 // Fraction of total in permille
849 Long64_t xf = (fc->GetTotalSize() * 1000) / GetTotalSize();
850 TFileInfoMeta *m = new TFileInfoMeta("FractionOfTotal", "External Info", xf);
852 fc->AddMetaData(m);
853 }
854
855 // Done
856 return fc;
857}
858
859////////////////////////////////////////////////////////////////////////////////
860/// Return a map of TFileCollections with the files on each data server,
861/// excluding servers in the comma-separated list 'exclude'.
862/// If curronly is kTRUE, only the URL flagged as current in the TFileInfo
863/// are considered.
864
865TMap *TFileCollection::GetFilesPerServer(const char *exclude, Bool_t curronly)
866{
867 TMap *dsmap = nullptr;
868
869 // Nothing to do for empty lists
870 if (!fList || fList->GetSize() <= 0) {
871 Info("GetFilesPerServer", "the list is empty - do nothing");
872 return dsmap;
873 }
874
875 // List of servers to be ignored
876 THashList *excl = nullptr;
877 if (exclude && strlen(exclude) > 0) {
878 excl = new THashList;
879 excl->SetOwner();
880 TUri uri;
881 TString srvs(exclude), s, srv, scheme, port;
882 Int_t from = 0;
883 while (srvs.Tokenize(s, from, ",")) {
884 uri.SetUri(s.Data());
885 scheme = "root";
886 port = "";
887 if (uri.GetScheme() != "") scheme = uri.GetScheme();
888 if (uri.GetPort() != "") port.Form(":%s", uri.GetPort().Data());
889 srv.Form("%s://%s%s", scheme.Data(), TUrl(s.Data()).GetHostFQDN(), port.Data());
890 // Add
891 excl->Add(new TObjString(srv.Data()));
892 }
893 }
894
895 // Prepare the output
896 dsmap = new TMap();
897
898 // Go through the list
899 TIter nxf(fList);
900 TFileInfo *fi = nullptr;
901 TUri uri;
902 TString key;
903 TFileCollection *fc = nullptr;
904 while ((fi = (TFileInfo *)nxf())) {
905 // Save current URL
906 TUrl *curl = fi->GetCurrentUrl();
907 // Loop over URLs
908 if (!curronly) fi->ResetUrl();
909 TUrl *xurl = nullptr;
910 while ((xurl = (curronly) ? curl : fi->NextUrl())) {
911 // Find the key for this server
912 key.Form("%s://%s", xurl->GetProtocol(), xurl->GetHostFQDN());
913 // Check if this has to be ignored
914 if (excl && excl->FindObject(key.Data())) {
915 if (curronly) break;
916 continue;
917 } else if (excl && xurl->GetPort() > 0) {
918 // Complete the key, if needed, and recheck
919 key += TString::Format(":%d", xurl->GetPort());
920 if (excl->FindObject(key.Data())) {
921 if (curronly) break;
922 continue;
923 }
924 }
925 // Get the map entry for this key
926 TPair *ent = nullptr;
927 if (!(ent = (TPair *) dsmap->FindObject(key.Data()))) {
928 // Create the TFileCollection
929 fc = new TFileCollection(GetName());
930 TString title;
931 if (GetTitle() && strlen(GetTitle()) > 0) {
932 title.Form("%s (subset on server %s)", GetTitle(), key.Data());
933 } else {
934 title.Form("subset of '%s' on server %s", GetName(), key.Data());
935 }
936 fc->SetTitle(title.Data());
937 // The default tree name
938 fc->SetDefaultTreeName(GetDefaultTreeName());
939 // Add it to the map
940 dsmap->Add(new TObjString(key.Data()), fc);
941 // Notify
942 if (gDebug > 0)
943 Info("GetFilesPerServer", "found server: '%s' (fc: %p)", key.Data(), fc);
944 } else {
945 // Attach to the TFileCollection
946 fc = (TFileCollection *) ent->Value();
947 }
948 // Create a new TFileInfo object
949 TFileInfo *nfi = new TFileInfo(xurl->GetUrl(kTRUE), fi->GetSize(),
950 fi->GetUUID() ? fi->GetUUID()->AsString() : nullptr,
951 fi->GetMD5() ? fi->GetMD5()->AsString() : nullptr);
952 if (fi->GetMetaDataList()) {
953 TIter nxm(fi->GetMetaDataList());
954 TFileInfoMeta *md = nullptr;
955 while ((md = (TFileInfoMeta *) nxm())) {
956 nfi->AddMetaData(new TFileInfoMeta(*md));
957 }
958 }
961 fc->Add(nfi);
962 // In current_only mode we are done
963 if (curronly) break;
964 }
965 // Restore current URL
966 fi->SetCurrentUrl(curl);
967 }
968
969 // Fill up sums on the sub file collections
970 TIter nxk(dsmap);
971 TObject *k = nullptr;
972 while ((k = nxk()) && (fc = (TFileCollection *) dsmap->GetValue(k))) {
973 fc->Update();
974 // Fraction of total in permille
975 Long64_t xf = (fc->GetTotalSize() * 1000) / GetTotalSize();
976 TFileInfoMeta *m = new TFileInfoMeta("FractionOfTotal", "External Info", xf);
978 fc->AddMetaData(m);
979 }
980
981 // Cleanup
982 if (excl) delete excl;
983
984 // Done
985 return dsmap;
986}
987
988////////////////////////////////////////////////////////////////////////////////
989/// Add's a meta data object to the file collection object. The object will be
990/// adopted by the TFileCollection and should not be deleted by the user.
991/// Typically objects of class TFileInfoMeta or derivatives should be added,
992/// but any class is accepted.
993///
994/// NB : a call to TFileCollection::Update will remove these objects unless the
995/// bit TFileInfoMeta::kExternal is set.
996/// Returns kTRUE if successful, kFALSE otherwise.
997
999{
1000 if (meta) {
1001 if (!fMetaDataList) {
1002 fMetaDataList = new TList;
1004 }
1005 fMetaDataList->Add(meta);
1006 return kTRUE;
1007 }
1008 return kFALSE;
1009}
#define b(i)
Definition: RSha256.hxx:100
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
const Ssiz_t kNPOS
Definition: RtypesCore.h:124
int Int_t
Definition: RtypesCore.h:45
unsigned int UInt_t
Definition: RtypesCore.h:46
const Bool_t kFALSE
Definition: RtypesCore.h:101
bool Bool_t
Definition: RtypesCore.h:63
double Double_t
Definition: RtypesCore.h:59
long long Long64_t
Definition: RtypesCore.h:80
float Float_t
Definition: RtypesCore.h:57
const Bool_t kTRUE
Definition: RtypesCore.h:100
const char Option_t
Definition: RtypesCore.h:66
#define ClassImp(name)
Definition: Rtypes.h:364
char name[80]
Definition: TGX11.cxx:110
int nentries
Definition: THbookFile.cxx:91
Int_t gDebug
Definition: TROOT.cxx:592
char * Form(const char *fmt,...)
void Printf(const char *fmt,...)
Bool_t R_ISREG(Int_t mode)
Definition: TSystem.h:118
R__EXTERN TSystem * gSystem
Definition: TSystem.h:559
static struct mg_connection * fc(struct mg_context *ctx)
Definition: civetweb.c:3728
Collection abstract base class.
Definition: TCollection.h:65
virtual void Print(Option_t *option="") const
Default print for collections, calls Print(option, 1).
virtual Int_t GetEntries() const
Definition: TCollection.h:179
virtual void SetOwner(Bool_t enable=kTRUE)
Set whether this collection is the owner (enable==true) of its content.
virtual Bool_t IsEmpty() const
Definition: TCollection.h:188
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Definition: TCollection.h:184
Class that contains a list of TFileInfo's and accumulated meta data information about its entries.
Bool_t AddMetaData(TObject *meta)
Add's a meta data object to the file collection object.
TFileCollection * GetStagedSubset()
Creates a subset of the files that have the kStaged & !kCorrupted bit set.
Int_t RemoveDuplicates()
Remove duplicates based on the UUID, typically after a verification.
Int_t Update(Long64_t avgsize=-1)
Update accumulated information about the elements of the collection (e.g.
THashList * GetList()
TFileCollection(const TFileCollection &)=delete
Long64_t GetTotalEntries(const char *tree) const
Returns the number of entries for the specified tree (retrieved from meta data).
void SetBitAll(UInt_t f)
Set the bit for all TFileInfos.
void Sort(Bool_t useindex=kFALSE)
Sort the collection.
void SetAnchor(const char *anchor)
Calls TUrl::SetAnchor() for all URLs contained in all TFileInfos.
void ResetBitAll(UInt_t f)
Reset the bit for all TFileInfos.
TMap * GetFilesPerServer(const char *exclude=0, Bool_t curronly=kFALSE)
Return a map of TFileCollections with the files on each data server, excluding servers in the comma-s...
Float_t GetStagedPercentage() const
void FormatSize(Long64_t bytes, TString &um, Double_t &size) const
Format size.
void SetDefaultMetaData(const char *meta)
Moves the indicated meta data in the first position, so that it becomes effectively the default.
virtual ~TFileCollection()
Cleanup.
Long64_t GetNFiles() const
void Print(Option_t *option="") const
Prints the contents of the TFileCollection.
void RemoveMetaData(const char *meta=0)
Removes the indicated meta data object in all TFileInfos and this object If no name is given all meta...
TObjString * ExportInfo(const char *name=0, Int_t popt=0)
Export the relevant info as a string; use 'name' as collection name, if defined, else use GetName().
Long64_t GetTotalSize() const
Long64_t fNStagedFiles
Long64_t Merge(TCollection *list)
Merge all TFileCollection objects in li into this TFileCollection object.
Int_t AddFromFile(const char *file, Int_t nfiles=-1, Int_t firstfile=1)
Add file names contained in the specified text file.
Long64_t fNCorruptFiles
TFileInfoMeta * GetMetaData(const char *meta=0) const
Returns the meta data object with the specified meta name.
const char * GetDefaultTreeName() const
Returns the tree set with SetDefaultTreeName if set Returns the name of the first tree in the meta da...
Float_t GetCorruptedPercentage() const
THashList * fList
Int_t Add(TFileInfo *info)
Add TFileInfo to the collection.
void PrintDetailed(TString &showOnly) const
Print detailed.
TFileCollection * GetFilesOnServer(const char *server)
Return the subset of files served by 'server'.
Bool_t IsTree() const
Definition: TFileInfo.h:143
Long64_t GetEntries() const
Definition: TFileInfo.h:140
void SetEntries(Long64_t entries)
Definition: TFileInfo.h:147
Class describing a generic file including meta information.
Definition: TFileInfo.h:39
Int_t GetIndex() const
Definition: TFileInfo.h:99
@ kSortWithIndex
Definition: TFileInfo.h:60
@ kCorrupted
Definition: TFileInfo.h:59
TUrl * NextUrl()
Iterator function, start iteration by calling ResetUrl().
Definition: TFileInfo.cxx:261
Bool_t AddMetaData(TObject *meta)
Add's a meta data object to the file info object.
Definition: TFileInfo.cxx:385
TList * GetMetaDataList() const
Definition: TFileInfo.h:83
Bool_t RemoveMetaData(const char *meta=0)
Remove the metadata object.
Definition: TFileInfo.cxx:402
Long64_t GetSize() const
Definition: TFileInfo.h:80
TUUID * GetUUID() const
Definition: TFileInfo.h:81
TMD5 * GetMD5() const
Definition: TFileInfo.h:82
Bool_t SetCurrentUrl(const char *url)
Set 'url' as current URL, if in the list Return kFALSE if not in the list.
Definition: TFileInfo.cxx:355
TUrl * FindByUrl(const char *url, Bool_t withDeflt=kFALSE)
Find an element from a URL. Returns 0 if not found.
Definition: TFileInfo.cxx:277
void ResetUrl()
Definition: TFileInfo.h:69
void SetIndex(Int_t idx)
Definition: TFileInfo.h:100
TUrl * GetCurrentUrl() const
Return the current url.
Definition: TFileInfo.cxx:249
TFileInfoMeta * GetMetaData(const char *meta=0) const
Get meta data object with specified name.
Definition: TFileInfo.cxx:425
THashList implements a hybrid collection class consisting of a hash table and a list to store TObject...
Definition: THashList.h:34
TObject * FindObject(const char *name) const
Find object using its name.
Definition: THashList.cxx:262
TObject * Remove(TObject *obj)
Remove object from the list.
Definition: THashList.cxx:378
TObject * Next()
Definition: TCollection.h:251
A doubly linked list.
Definition: TList.h:44
virtual void Add(TObject *obj)
Definition: TList.h:87
virtual TObject * Remove(TObject *obj)
Remove object from the list.
Definition: TList.cxx:822
virtual void AddFirst(TObject *obj)
Add object at the beginning of the list.
Definition: TList.cxx:100
virtual TObject * FindObject(const char *name) const
Find an object in this list using its name.
Definition: TList.cxx:578
virtual void Clear(Option_t *option="")
Remove all objects from the list.
Definition: TList.cxx:402
virtual void Sort(Bool_t order=kSortAscending)
Sort linked list.
Definition: TList.cxx:937
const char * AsString() const
Return message digest as string.
Definition: TMD5.cxx:220
TMap implements an associative array of (key,value) pairs using a THashTable for efficient retrieval ...
Definition: TMap.h:40
void Add(TObject *obj)
This function may not be used (but we need to provide it since it is a pure virtual in TCollection).
Definition: TMap.cxx:54
TObject * GetValue(const char *keyname) const
Returns a pointer to the value associated with keyname as name of the key.
Definition: TMap.cxx:236
TObject * FindObject(const char *keyname) const
Check if a (key,value) pair exists with keyname as name of the key.
Definition: TMap.cxx:215
The TNamed class is the base class for all named ROOT classes.
Definition: TNamed.h:29
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition: TNamed.cxx:140
virtual const char * GetTitle() const
Returns title of object.
Definition: TNamed.h:48
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Collectable string class.
Definition: TObjString.h:28
Mother of all ROOT objects.
Definition: TObject.h:37
R__ALWAYS_INLINE Bool_t TestBit(UInt_t f) const
Definition: TObject.h:187
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:879
void SetBit(UInt_t f, Bool_t set)
Set or unset the user status bits as specified in f.
Definition: TObject.cxx:696
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:893
void ResetBit(UInt_t f)
Definition: TObject.h:186
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
Definition: TObject.cxx:867
Wrapper for PCRE library (Perl Compatible Regular Expressions).
Definition: TPRegexp.h:97
Int_t Match(const TString &s, UInt_t start=0)
Runs a match on s against the regex 'this' was created with.
Definition: TPRegexp.cxx:706
Class used by TMap to store (key,value) pairs.
Definition: TMap.h:102
TObject * Value() const
Definition: TMap.h:121
Regular expression class.
Definition: TRegexp.h:31
Basic string class.
Definition: TString.h:136
Ssiz_t Length() const
Definition: TString.h:410
TString & Insert(Ssiz_t pos, const char *s)
Definition: TString.h:649
TString & Replace(Ssiz_t pos, Ssiz_t n, const char *s)
Definition: TString.h:682
const char * Data() const
Definition: TString.h:369
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:692
void Resize(Ssiz_t n)
Resize the string. Truncate or add blanks as necessary.
Definition: TString.cxx:1120
@ kIgnoreCase
Definition: TString.h:268
Bool_t IsNull() const
Definition: TString.h:407
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2336
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition: TString.cxx:2314
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:624
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:639
virtual Bool_t ExpandPathName(TString &path)
Expand a pathname getting rid of special shell characters like ~.
Definition: TSystem.cxx:1274
virtual void FreeDirectory(void *dirp)
Free a directory.
Definition: TSystem.cxx:846
virtual void * OpenDirectory(const char *name)
Open a directory. Returns 0 if directory does not exist.
Definition: TSystem.cxx:837
int GetPathInfo(const char *path, Long_t *id, Long_t *size, Long_t *flags, Long_t *modtime)
Get info about a file: id, size, flags, modification time.
Definition: TSystem.cxx:1398
virtual const char * GetDirEntry(void *dirp)
Get a directory entry. Returns 0 if no more entries.
Definition: TSystem.cxx:854
virtual const char * BaseName(const char *pathname)
Base name of a file name. Base name of /user/root is root.
Definition: TSystem.cxx:935
virtual TString GetDirName(const char *pathname)
Return the directory name in pathname.
Definition: TSystem.cxx:1032
const char * AsString() const
Return UUID as string. Copy string immediately since it will be reused.
Definition: TUUID.cxx:570
This class represents a RFC 3986 compatible URI.
Definition: TUri.h:35
const TString GetPort() const
Definition: TUri.h:86
Bool_t SetUri(const TString &uri)
const TString GetScheme() const
Definition: TUri.h:80
This class represents a WWW compatible URL.
Definition: TUrl.h:33
const char * GetUrl(Bool_t withDeflt=kFALSE) const
Return full URL.
Definition: TUrl.cxx:389
void SetAnchor(const char *anchor)
Definition: TUrl.h:86
const char * GetHostFQDN() const
Return fully qualified domain name of url host.
Definition: TUrl.cxx:471
const char * GetProtocol() const
Definition: TUrl.h:64
Int_t GetPort() const
Definition: TUrl.h:78
TLine * line
static constexpr double um
static constexpr double s
Definition: tree.py:1
Int_t fMode
Definition: TSystem.h:127
auto * m
Definition: textangle.C:8