Logo ROOT   master
Reference Guide
TDataSetManager.cxx
Go to the documentation of this file.
1 // @(#)root/base:$Id$
2 // Author: Jan Fiete Grosse-Oetringhaus, 04.06.07
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////////////////////
13 // //
14 // TDataSetManager //
15 // //
16 // This class contains functions to handle datasets in PROOF //
17 // It is the layer between TProofServ and the file system that stores //
18 // the datasets. //
19 // //
20 //////////////////////////////////////////////////////////////////////////
21 
22 
23 #include "TDataSetManager.h"
24 
25 #include "Riostream.h"
26 
27 #include "TEnv.h"
28 #include "TError.h"
29 #include "TFile.h"
30 #include "TFileCollection.h"
31 #include "TFileInfo.h"
32 #include "TFileStager.h"
33 #include "TMD5.h"
34 #include "THashList.h"
35 #include "TKey.h"
36 #include "TObjString.h"
37 #include "TParameter.h"
38 #include "TPRegexp.h"
39 #include "TRegexp.h"
40 #include "TSystem.h"
41 #include "TTree.h"
42 #include "TUrl.h"
43 #include "TVirtualMonitoring.h"
44 
45 // One Gigabyte
46 #define DSM_ONE_GB (1073741824)
47 
48 // Name for common datasets
51 
53 
54 ////////////////////////////////////////////////////////////////////////////////
55 ///
56 /// Main constructor
57 
58 TDataSetManager::TDataSetManager(const char *group, const char *user,
59  const char *options)
60  : fGroup(group),
61  fUser(user), fCommonUser(), fCommonGroup(),
62  fGroupQuota(), fGroupUsed(),
63  fUserUsed(), fNTouchedFiles(0), fNOpenedFiles(0),
64  fNDisappearedFiles(0), fMTimeGroupConfig(-1)
65 {
66  // Fill default group and user if none is given
67  if (fGroup.IsNull())
68  fGroup = "default";
69  if (fUser.IsNull()) {
70  fUser = "--nouser--";
71  // Get user logon name
73  if (pw) {
74  fUser = pw->fUser;
75  delete pw;
76  }
77  }
78 
82 
83  fCommonUser = "COMMON";
84  fCommonGroup = "COMMON";
85 
86  fNTouchedFiles = -1;
87  fNOpenedFiles = -1;
88  fNDisappearedFiles = -1;
89  fMTimeGroupConfig = -1;
90 
91  fAvgFileSize = 50000000; // Default 50 MB per file
92 
93  // Parse options
94  ParseInitOpts(options);
95 
96  if (!fUser.IsNull() && !fGroup.IsNull()) {
97 
98  // If not in sandbox, construct the base URI using session defaults
99  // (group, user) (syntax: /group/user/dsname[#[subdir/]objname])
101  fBase.SetUri(TString(Form("/%s/%s/", fGroup.Data(), fUser.Data())));
102 
103  }
104 
105  // List of dataset server mapping instructions
106  TString srvmaps(gEnv->GetValue("DataSet.SrvMaps",""));
107  TString srvmapsenv(gSystem->Getenv("DATASETSRVMAPS"));
108  if (!(srvmapsenv.IsNull())) {
109  if (srvmapsenv.BeginsWith("+")) {
110  if (!(srvmaps.IsNull())) srvmaps += ",";
111  srvmaps += srvmapsenv(1,srvmapsenv.Length());
112  } else {
113  srvmaps = srvmapsenv;
114  }
115  }
116  if (!(srvmaps.IsNull()) && !(fgDataSetSrvMaps = ParseDataSetSrvMaps(srvmaps)))
117  Warning("TDataSetManager", "problems parsing DataSet.SrvMaps input info (%s)"
118  " - ignoring", srvmaps.Data());
119 
120  // Read config file
121  ReadGroupConfig(gEnv->GetValue("Proof.GroupFile", ""));
122 }
123 
124 ////////////////////////////////////////////////////////////////////////////////
125 /// Destructor
126 
128 {
129  // Clear used space
133 }
134 
135 ////////////////////////////////////////////////////////////////////////////////
136 /// Parse the opts string and set the init bits accordingly
137 /// Available options:
138 /// Cq: set kCheckQuota
139 /// Ar: set kAllowRegister
140 /// Av: set kAllowVerify
141 /// Ti: set kTrustInfo
142 /// Sb: set kIsSandbox
143 /// Ca: set kUseCache or kDoNotUseCache
144 /// The opts string may also contain additional unrelated info: in such a case
145 /// the field delimited by the prefix "opt:" is analyzed, e.g. if opts is
146 /// "/tmp/dataset opt:Cq:-Ar: root://lxb6046.cern.ch" only the substring
147 /// "Cq:-Ar:" will be parsed .
148 
149 void TDataSetManager::ParseInitOpts(const char *opts)
150 {
151  // Default option bits
159 
160  if (opts && strlen(opts) > 0) {
161  TString opt(opts);
162  // If it contains the prefix "opt:", isolate the related field
163  Int_t ip = opt.Index("opt:");
164  if (ip != kNPOS) opt.Remove(0, ip + 4);
165  ip = opt.Index(" ");
166  if (ip != kNPOS) opt.Remove(ip);
167  // Check the content, now
168  if (opt.Contains("Cq:") && !opt.Contains("-Cq:"))
170  if (opt.Contains("-Ar:"))
172  if (opt.Contains("-Av:"))
174  if (opt.Contains("-Ti:"))
176  if (opt.Contains("Sb:") && !opt.Contains("-Sb:"))
178  if (opt.Contains("Ca:"))
180  if (opt.Contains("-Ca:"))
182  }
183 
184  // Check dependencies
186  // Dataset verification or requires registration permition
188  }
189  // UseCache has priority
192 }
193 
194 ////////////////////////////////////////////////////////////////////////////////
195 /// Read group config file 'cf'.
196 /// If cf == 0 re-read, if changed, the file pointed by fGroupConfigFile .
197 ///
198 /// expects the following directives:
199 /// Group definition:
200 /// group <groupname> <user>+
201 /// disk quota
202 /// property <groupname> diskquota <quota in GB>
203 /// average filesize (to be used when the file size is not available)
204 /// averagefilesize <average size>{G,g,M,m,K,k}
205 
207 {
208  // Validate input
209  FileStat_t st;
210  if (!cf || (strlen(cf) <= 0) || !strcmp(cf, fGroupConfigFile.Data())) {
211  // If this is the first time we cannot do anything
212  if (fGroupConfigFile.IsNull()) {
213  if (gDebug > 0)
214  Info("ReadGroupConfig", "path to config file undefined - nothing to do");
215  return kFALSE;
216  }
217  // Check if fGroupConfigFile has changed
219  Error("ReadGroupConfig", "could not stat %s", fGroupConfigFile.Data());
220  return kFALSE;
221  }
222  if (st.fMtime <= fMTimeGroupConfig) {
223  if (gDebug > 0)
224  Info("ReadGroupConfig","file has not changed - do nothing");
225  return kTRUE;
226  }
227  }
228 
229  // Either new file or the file has changed
230  if (cf && (strlen(cf) > 0)) {
231  // The file must exist and be readable
232  if (gSystem->GetPathInfo(cf, st)) {
233  Error("ReadGroupConfig", "could not stat %s", cf);
234  return kFALSE;
235  }
237  Error("ReadGroupConfig", "cannot read %s", cf);
238  return kFALSE;
239  }
240  // Ok
241  fGroupConfigFile = cf;
243  }
244 
245  if (gDebug > 0)
246  Info("ReadGroupConfig","reading group config from %s", cf);
247 
248  // Open the config file
249  std::ifstream in;
250  in.open(cf);
251  if (!in.is_open()) {
252  Error("ReadGroupConfig", "could not open config file %s", cf);
253  return kFALSE;
254  }
255 
256  // Container for the global common user
257  TString tmpCommonUser;
258 
259  // Go through
260  TString line;
261  while (in.good()) {
262  // Read new line
263  line.ReadLine(in);
264  // Explicitely skip comment lines
265  if (line[0] == '#') continue;
266  // Parse it
267  Ssiz_t from = 0;
268  TString key;
269  if (!line.Tokenize(key, from, " ")) // No token
270  continue;
271  // Parsing depends on the key
272  if (key == "property") {
273  // Read group
274  TString grp;
275  if (!line.Tokenize(grp, from, " ")) {// No token
276  if (gDebug > 0)
277  Info("ReadGroupConfig","incomplete line: '%s'", line.Data());
278  continue;
279  }
280  // Read type of property
281  TString type;
282  if (!line.Tokenize(type, from, " ")) // No token
283  continue;
284  if (type == "diskquota") {
285  // Read diskquota
286  TString sdq;
287  if (!line.Tokenize(sdq, from, " ")) // No token
288  continue;
289  // Enforce GigaBytes as default
290  if (sdq.IsDigit()) sdq += "G";
291  Long64_t quota = ToBytes(sdq);
292  if (quota > -1) {
293  fGroupQuota.Add(new TObjString(grp),
294  new TParameter<Long64_t> ("group quota", quota));
295  } else {
296  Warning("ReadGroupConfig",
297  "problems parsing string: wrong or unsupported suffix? %s",
298  sdq.Data());
299  }
300  } else if (type == "commonuser") {
301  // Read common user for this group
302  TString comusr;
303  if (!line.Tokenize(comusr, from, " ")) // No token
304  continue;
305 
306  }
307 
308  } else if (key == "dataset") {
309  // Read type
310  TString type;
311  if (!line.Tokenize(type, from, " ")) {// No token
312  if (gDebug > 0)
313  Info("ReadGroupConfig","incomplete line: '%s'", line.Data());
314  continue;
315  }
316  if (type == "commonuser") {
317  // Read global common user
318  TString comusr;
319  if (!line.Tokenize(comusr, from, " ")) // No token
320  continue;
321  fCommonUser = comusr;
322  } else if (type == "commongroup") {
323  // Read global common group
324  TString comgrp;
325  if (!line.Tokenize(comgrp, from, " ")) // No token
326  continue;
327  fCommonGroup = comgrp;
328  } else if (type == "diskquota") {
329  // Quota check switch
330  TString on;
331  if (!line.Tokenize(on, from, " ")) // No token
332  continue;
333  if (on == "on") {
335  } else if (on == "off") {
337  }
338  }
339 
340  } else if (key == "averagefilesize") {
341 
342  // Read average size
343  TString avgsize;
344  if (!line.Tokenize(avgsize, from, " ")) {// No token
345  if (gDebug > 0)
346  Info("ReadGroupConfig","incomplete line: '%s'", line.Data());
347  continue;
348  }
349  Long64_t avgsz = ToBytes(avgsize);
350  if (avgsz > -1) {
351  fAvgFileSize = avgsz;
352  } else {
353  Warning("ReadGroupConfig",
354  "problems parsing string: wrong or unsupported suffix? %s",
355  avgsize.Data());
356  }
357  } else if (key == "include") {
358 
359  // Read file to include
360  TString subfn;
361  if (!line.Tokenize(subfn, from, " ")) {// No token
362  if (gDebug > 0)
363  Info("ReadGroupConfig","incomplete line: '%s'", line.Data());
364  continue;
365  }
366  // The file must be readable
367  if (gSystem->AccessPathName(subfn, kReadPermission)) {
368  Error("ReadGroupConfig", "request to parse file '%s' which is not readable",
369  subfn.Data());
370  continue;
371  }
372  if (!ReadGroupConfig(subfn))
373  Error("ReadGroupConfig", "problems parsing include file '%s'", subfn.Data());
374  }
375  }
376  in.close();
377 
378  return kTRUE;
379 }
380 
381 ////////////////////////////////////////////////////////////////////////////////
382 /// Static utility function to gt the number of bytes from a string
383 /// representation in the form "<digit><sfx>" with <sfx> = {"", "k", "M", "G",
384 /// "T", "P"} (case insensitive).
385 /// Returns -1 if the format is wrong.
386 
388 {
389  Long64_t lsize = -1;
390 
391  // Check if valid
392  if (!size || strlen(size) <= 0) return lsize;
393 
394  TString s(size);
395  // Determine factor
396  Long64_t fact = 1;
397  if (!s.IsDigit()) {
398  const char *unit[5] = { "k", "M", "G", "T", "P"};
399  fact = 1024;
400  Int_t jj = 0;
401  while (jj <= 4) {
402  if (s.EndsWith(unit[jj], TString::kIgnoreCase)) {
403  s.Remove(s.Length()-1);
404  break;
405  }
406  fact *= 1024;
407  jj++;
408  }
409  }
410  // Apply factor now
411  if (s.IsDigit())
412  lsize = s.Atoi() * fact;
413 
414  // Done
415  return lsize;
416 }
417 
418 ////////////////////////////////////////////////////////////////////////////////
419 /// Utility function used in various methods for user dataset upload.
420 
421 TFileCollection *TDataSetManager::GetDataSet(const char *, const char *)
422 {
423  AbstractMethod("GetDataSet");
424  return (TFileCollection *)0;
425 }
426 
427 ////////////////////////////////////////////////////////////////////////////////
428 /// Removes the indicated dataset
429 
431 {
432  AbstractMethod("RemoveDataSet");
433  return kFALSE;
434 }
435 
436 ////////////////////////////////////////////////////////////////////////////////
437 /// Checks if the indicated dataset exits
438 
440 {
441  AbstractMethod("ExistsDataSet");
442  return kFALSE;
443 }
444 
445 ////////////////////////////////////////////////////////////////////////////////
446 ///
447 /// Returns all datasets for the <group> and <user> specified by <uri>.
448 /// If <user> is 0, it returns all datasets for the given <group>.
449 /// If <group> is 0, it returns all datasets.
450 /// The returned TMap contains:
451 /// <group> --> <map of users> --> <map of datasets> --> <dataset> (TFileCollection)
452 ///
453 /// The unsigned int 'option' is forwarded to GetDataSet and BrowseDataSet.
454 /// Available options (to be .or.ed):
455 /// kShowDefault a default selection is shown that include the ones from
456 /// the current user, the ones from the group and the common ones
457 /// kPrint print the dataset content
458 /// kQuotaUpdate update quotas
459 /// kExport use export naming
460 ///
461 /// NB1: options "kPrint", "kQuoatUpdate" and "kExport" are mutually exclusive
462 /// NB2: for options "kPrint" and "kQuoatUpdate" return is null.
463 
465 {
466  AbstractMethod("GetDataSets");
467 
468  return (TMap *)0;
469 }
470 ////////////////////////////////////////////////////////////////////////////////
471 /// Scans the dataset indicated by 'uri' following the 'opts' directives
472 ///
473 /// The 'opts' string contains up to 4 directive fields separated by ':'
474 ///
475 /// 'selection' field :
476 /// A, allfiles: process all files
477 /// D, staged: process only staged (on Disk) files (if 'allfiles:' is not specified
478 /// the default is to process only files marked as non-staged)
479 /// 'pre-action field':
480 /// O, open: open the files marked as staged when processing only files
481 /// marked as non-staged
482 /// T, touch: open and touch the files marked as staged when processing
483 /// only files marked as non-staged
484 /// I, nostagedcheck: do not check the actual stage status on selected files
485 ///
486 /// 'process' field:
487 /// N, noaction: do nothing on the selected files
488 /// P, fullproc: open the selected files and extract the meta information
489 /// L, locateonly: only locate the selected files
490 /// S, stageonly: issue a stage request for the selected files not yet staged
491 ///
492 /// 'auxiliary' field
493 /// V, verbose: notify the actions
494 ///
495 /// Returns 0 on success, -1 if any failure occurs.
496 
497 Int_t TDataSetManager::ScanDataSet(const char *uri, const char *opts)
498 {
499  // Extract the directives
500  UInt_t o = 0;
501  if (opts && strlen(opts) > 0) {
502  // Selection options
503  if (strstr(opts, "allfiles:") || strchr(opts, 'A'))
504  o |= kAllFiles;
505  else if (strstr(opts, "staged:") || strchr(opts, 'D'))
506  o |= kStagedFiles;
507  // Pre-action options
508  if (strstr(opts, "open:") || strchr(opts, 'O'))
509  o |= kReopen;
510  if (strstr(opts, "touch:") || strchr(opts, 'T'))
511  o |= kTouch;
512  if (strstr(opts, "nostagedcheck:") || strchr(opts, 'I'))
513  o |= kNoStagedCheck;
514  // Process options
515  if (strstr(opts, "noaction:") || strchr(opts, 'N'))
516  o |= kNoAction;
517  if (strstr(opts, "locateonly:") || strchr(opts, 'L'))
518  o |= kLocateOnly;
519  if (strstr(opts, "stageonly:") || strchr(opts, 'S'))
520  o |= kStageOnly;
521  // Auxilliary options
522  if (strstr(opts, "verbose:") || strchr(opts, 'V'))
523  o |= kDebug;
524  } else {
525  // Default
526  o = kReopen | kDebug;
527  }
528 
529  // Run
530  return ScanDataSet(uri, o);
531 }
532 
533 ////////////////////////////////////////////////////////////////////////////////
534 /// Scans the dataset indicated by <uri> and returns the number of missing files.
535 /// Returns -1 if any failure occurs.
536 /// For more details, see documentation of
537 /// ScanDataSet(TFileCollection *dataset, const char *option)
538 
540 {
541  AbstractMethod("ScanDataSet");
542 
543  return -1;
544 }
545 
546 ////////////////////////////////////////////////////////////////////////////////
547 ///
548 /// Gets quota information from this dataset
549 
550 void TDataSetManager::GetQuota(const char *group, const char *user,
551  const char *dsName, TFileCollection *dataset)
552 {
553  if (gDebug > 0)
554  Info("GetQuota", "processing dataset %s %s %s", group, user, dsName);
555 
556  if (dataset->GetTotalSize() > 0) {
557  TParameter<Long64_t> *size =
558  dynamic_cast<TParameter<Long64_t>*> (fGroupUsed.GetValue(group));
559  if (!size) {
560  size = new TParameter<Long64_t> ("group used", 0);
561  fGroupUsed.Add(new TObjString(group), size);
562  }
563 
564  size->SetVal(size->GetVal() + dataset->GetTotalSize());
565 
566  TMap *userMap = dynamic_cast<TMap*> (fUserUsed.GetValue(group));
567  if (!userMap) {
568  userMap = new TMap;
569  fUserUsed.Add(new TObjString(group), userMap);
570  }
571 
572  size = dynamic_cast<TParameter<Long64_t>*> (userMap->GetValue(user));
573  if (!size) {
574  size = new TParameter<Long64_t> ("user used", 0);
575  userMap->Add(new TObjString(user), size);
576  }
577 
578  size->SetVal(size->GetVal() + dataset->GetTotalSize());
579  }
580 }
581 
582 ////////////////////////////////////////////////////////////////////////////////
583 /// Display quota information
584 
585 void TDataSetManager::ShowQuota(const char *opt)
586 {
587  UpdateUsedSpace();
588 
589  TMap *groupQuotaMap = GetGroupQuotaMap();
590  TMap *userUsedMap = GetUserUsedMap();
591  if (!groupQuotaMap || !userUsedMap)
592  return;
593 
594  Bool_t noInfo = kTRUE;
595  TIter iter(groupQuotaMap);
596  TObjString *group = 0;
597  while ((group = dynamic_cast<TObjString*> (iter.Next()))) {
598  noInfo = kFALSE;
599  Long64_t groupQuota = GetGroupQuota(group->String());
600  Long64_t groupUsed = GetGroupUsed(group->String());
601 
602  Printf(" +++ Group %s uses %.1f GB out of %.1f GB", group->String().Data(),
603  (Float_t) groupUsed / DSM_ONE_GB,
604  (Float_t) groupQuota / DSM_ONE_GB);
605 
606  // display also user information
607  if (opt && !TString(opt).Contains("U", TString::kIgnoreCase))
608  continue;
609 
610  TMap *userMap = dynamic_cast<TMap*> (userUsedMap->GetValue(group->String()));
611  if (!userMap)
612  continue;
613 
614  TIter iter2(userMap);
615  TObjString *user = 0;
616  while ((user = dynamic_cast<TObjString*> (iter2.Next()))) {
617  TParameter<Long64_t> *size2 =
618  dynamic_cast<TParameter<Long64_t>*> (userMap->GetValue(user->String().Data()));
619  if (!size2)
620  continue;
621 
622  Printf(" +++ User %s uses %.1f GB", user->String().Data(),
623  (Float_t) size2->GetVal() / DSM_ONE_GB);
624  }
625 
626  Printf("------------------------------------------------------");
627  }
628  // Check if something has been printed
629  if (noInfo) {
630  Printf(" +++ Quota check enabled but no quota info available +++ ");
631  }
632 }
633 
634 ////////////////////////////////////////////////////////////////////////////////
635 ///
636 /// Prints the quota
637 
639 {
640  Info("PrintUsedSpace", "listing used space");
641 
642  TIter iter(&fUserUsed);
643  TObjString *group = 0;
644  while ((group = dynamic_cast<TObjString*> (iter.Next()))) {
645  TMap *userMap = dynamic_cast<TMap*> (fUserUsed.GetValue(group->String()));
646 
647  TParameter<Long64_t> *size =
648  dynamic_cast<TParameter<Long64_t>*> (fGroupUsed.GetValue(group->String()));
649 
650  if (userMap && size) {
651  Printf("Group %s: %lld B = %.2f GB", group->String().Data(), size->GetVal(),
652  (Float_t) size->GetVal() / DSM_ONE_GB);
653 
654  TIter iter2(userMap);
655  TObjString *user = 0;
656  while ((user = dynamic_cast<TObjString*> (iter2.Next()))) {
657  TParameter<Long64_t> *size2 =
658  dynamic_cast<TParameter<Long64_t>*> (userMap->GetValue(user->String().Data()));
659  if (size2)
660  Printf(" User %s: %lld B = %.2f GB", user->String().Data(), size2->GetVal(),
661  (Float_t) size2->GetVal() / DSM_ONE_GB);
662  }
663 
664  Printf("------------------------------------------------------");
665  }
666  }
667 }
668 
669 ////////////////////////////////////////////////////////////////////////////////
670 ///
671 /// Log info to the monitoring server
672 
674 {
675  Info("MonitorUsedSpace", "sending used space to monitoring server");
676 
677  TIter iter(&fUserUsed);
678  TObjString *group = 0;
679  while ((group = dynamic_cast<TObjString*> (iter.Next()))) {
680  TMap *userMap = dynamic_cast<TMap*> (fUserUsed.GetValue(group->String()));
681  TParameter<Long64_t> *size =
682  dynamic_cast<TParameter<Long64_t>*> (fGroupUsed.GetValue(group->String()));
683 
684  if (!userMap || !size)
685  continue;
686 
687  TList *list = new TList;
688  list->SetOwner();
689  list->Add(new TParameter<Long64_t>("_TOTAL_", size->GetVal()));
690  Long64_t groupQuota = GetGroupQuota(group->String());
691  if (groupQuota != -1)
692  list->Add(new TParameter<Long64_t>("_QUOTA_", groupQuota));
693 
694  TIter iter2(userMap);
695  TObjString *user = 0;
696  while ((user = dynamic_cast<TObjString*> (iter2.Next()))) {
697  TParameter<Long64_t> *size2 =
698  dynamic_cast<TParameter<Long64_t>*> (userMap->GetValue(user->String().Data()));
699  if (!size2)
700  continue;
701  list->Add(new TParameter<Long64_t>(user->String().Data(), size2->GetVal()));
702  }
703 
704  if (!monitoring->SendParameters(list, group->String()))
705  Warning("MonitorUsedSpace", "problems sending monitoring parameters");
706  delete list;
707  }
708 }
709 
710 ////////////////////////////////////////////////////////////////////////////////
711 ///
712 /// Returns the used space of that group
713 
715 {
716  if (fgCommonDataSetTag == group)
718 
719  TParameter<Long64_t> *size =
720  dynamic_cast<TParameter<Long64_t>*> (fGroupUsed.GetValue(group));
721  if (!size) {
722  if (gDebug > 0)
723  Info("GetGroupUsed", "group %s not found", group);
724  return 0;
725  }
726 
727  return size->GetVal();
728 }
729 
730 ////////////////////////////////////////////////////////////////////////////////
731 ///
732 /// returns the quota a group is allowed to have
733 
735 {
736  if (fgCommonDataSetTag == group)
738 
739  TParameter<Long64_t> *value =
741  if (!value) {
742  if (gDebug > 0)
743  Info("GetGroupQuota", "group %s not found", group);
744  return 0;
745  }
746  return value->GetVal();
747 }
748 
749 ////////////////////////////////////////////////////////////////////////////////
750 /// updates the used space maps
751 
753 {
754  AbstractMethod("UpdateUsedSpace");
755 }
756 
757 ////////////////////////////////////////////////////////////////////////////////
758 /// Register a dataset, perfoming quota checkings, if needed.
759 /// Returns 0 on success, -1 on failure
760 
762  TFileCollection *, const char *)
763 {
764  AbstractMethod("RegisterDataSet");
765  return -1;
766 }
767 
768 ////////////////////////////////////////////////////////////////////////////////
769 /// Save into the <datasetdir>/dataset.list file the name of the last updated
770 /// or created or modified dataset
771 /// Returns 0 on success, -1 on error
772 
773 Int_t TDataSetManager::NotifyUpdate(const char * /*group*/,
774  const char * /*user*/,
775  const char * /*dspath*/,
776  Long_t /*mtime*/,
777  const char * /*checksum*/)
778 {
779  AbstractMethod("NotifyUpdate");
780  return -1;
781 }
782 
783 ////////////////////////////////////////////////////////////////////////////////
784 /// Clear cached information matching uri
785 
786 Int_t TDataSetManager::ClearCache(const char * /*uri*/)
787 {
788  AbstractMethod("ClearCache");
789  return -1;
790 }
791 
792 ////////////////////////////////////////////////////////////////////////////////
793 /// Show cached information matching uri
794 
795 Int_t TDataSetManager::ShowCache(const char * /*uri*/)
796 {
797  AbstractMethod("ShowCache");
798  return -1;
799 }
800 
801 ////////////////////////////////////////////////////////////////////////////////
802 /// Creates URI for the dataset manger in the form '[[/dsGroup/]dsUser/]dsName[#dsObjPath]',
803 /// The optional dsObjPath can be in the form [subdir/]objname]'.
804 
805 TString TDataSetManager::CreateUri(const char *dsGroup, const char *dsUser,
806  const char *dsName, const char *dsObjPath)
807 {
808  TString uri;
809 
810  if (dsGroup && strlen(dsGroup) > 0) {
811  if (dsUser && strlen(dsUser) > 0) {
812  uri += Form("/%s/%s/", dsGroup, dsUser);
813  } else {
814  uri += Form("/%s/*/", dsGroup);
815  }
816  } else if (dsUser && strlen(dsUser) > 0) {
817  uri += Form("%s/", dsUser);
818  }
819  if (dsName && strlen(dsName) > 0)
820  uri += dsName;
821  if (dsObjPath && strlen(dsObjPath) > 0)
822  uri += Form("#%s", dsObjPath);
823 
824  // Done
825  return uri;
826 }
827 
828 ////////////////////////////////////////////////////////////////////////////////
829 /// Parses a (relative) URI that describes a DataSet on the cluster.
830 /// The input 'uri' should be in the form '[[/group/]user/]dsname[#[subdir/]objname]',
831 /// where 'objname' is the name of the object (e.g. the tree name) and the 'subdir'
832 /// is the directory in the file wher it should be looked for.
833 /// After resolving against a base URI consisting of proof://masterhost/group/user/
834 /// - meaning masterhost, group and user of the current session -
835 /// the path is checked to contain exactly three elements separated by '/':
836 /// group/user/dsname
837 /// If wildcards, '*' is allowed in group and user and dsname is allowed to be empty.
838 /// If onlyCurrent, only group and user of current session are allowed.
839 /// Only non-null parameters are filled by this function.
840 /// Returns kTRUE in case of success.
841 
843  TString *dsGroup, TString *dsUser,
844  TString *dsName, TString *dsTree,
845  Bool_t onlyCurrent, Bool_t wildcards)
846 {
847  TString uristr(uri);
848 
849  // If URI contains fields in the form "Field=Value;" it is a virtual URI and
850  // should be treated differently
851  if ((uristr.Index('=') >= 0) && (uristr.Index(';') >= 0)) {
852 
853  // URI is composed of two parts: a name (dsName), and the tree after the
854  // pound sign
855 
856  Warning("ParseUri",
857  "Dataset URI looks like a virtual URI, treating it as such. "
858  "No group and user will be parsed!");
859 
860  TPMERegexp reVirtualUri("^([^#]+)(#(.*))?$");
861  Int_t nm = reVirtualUri.Match(uristr);
862 
863  if (nm >= 2) {
864  if (dsGroup) *dsGroup = "";
865  if (dsUser) *dsUser = "";
866  if (dsName) *dsName = reVirtualUri[1];
867  if (dsTree) {
868  if (nm == 4) *dsTree = reVirtualUri[3];
869  else *dsTree = "";
870  }
871  }
872  else return kFALSE; // should never happen!
873 
874  return kTRUE;
875  }
876 
877  // Append trailing slash if missing when wildcards are enabled
878  Int_t pc = 0;
879  if (wildcards && uristr.Length() > 0) {
880  pc = uristr.CountChar('/');
881  Bool_t endsl = uristr.EndsWith("/") ? kTRUE : kFALSE;
882  Bool_t beginsl = uristr.BeginsWith("/") ? kTRUE : kFALSE;
883  if (beginsl) {
884  if (pc == 1) uristr += "/*/";
885  if (pc == 2 && endsl) uristr += "*/";
886  if (pc == 2 && !endsl) uristr += "/";
887  }
888  }
889 
890  // Resolve given URI agains the base
891  TUri resolved = TUri::Transform(uristr, fBase);
892  if (resolved.HasQuery())
893  Info ("ParseUri", "URI query part <%s> ignored", resolved.GetQuery().Data());
894 
895  TString path(resolved.GetPath());
896  // Must be in the form /group/user/dsname
897  if ((pc = path.CountChar('/')) != 3) {
899  Error ("ParseUri", "illegal dataset path: '%s'", uri);
900  return kFALSE;
901  } else if (pc >= 0 && pc < 3) {
902  // Add missing slashes
903  TString sls("/");
904  if (pc == 2) {
905  sls = "/";
906  } else if (pc == 1) {
907  sls.Form("/%s/", fGroup.Data());
908  } else if (pc == 0) {
909  sls.Form("/%s/%s/", fGroup.Data(), fUser.Data());
910  }
911  path.Insert(0, sls);
912  }
913  }
914  if (gDebug > 1)
915  Info("ParseUri", "path: '%s'", path.Data());
916 
917  // Get individual values from tokens
918  Int_t from = 1;
919  TString group, user, name;
920  if (path.Tokenize(group, from, "/")) {
921  if (path.Tokenize(user, from, "/")) {
922  if (!path.Tokenize(name, from, "/"))
923  if (gDebug > 0) Info("ParseUri", "'name' missing");
924  } else {
925  if (gDebug > 0) Info("ParseUri", "'user' missing");
926  }
927  } else {
928  if (gDebug > 1) Info("ParseUri", "'group' missing");
929  }
930 
931  // The fragment may contain the subdir and the object name in the form '[subdir/]objname'
932  TString tree = resolved.GetFragment();
933  if (tree.EndsWith("/"))
934  tree.Remove(tree.Length()-1);
935 
936  if (gDebug > 1)
937  Info("ParseUri", "group: '%s', user: '%s', dsname:'%s', seg: '%s'",
938  group.Data(), user.Data(), name.Data(), tree.Data());
939 
940  // Check for unwanted use of wildcards
941  if ((user == "*" || group == "*") && !wildcards) {
942  Error ("ParseUri", "no wildcards allowed for user/group in this context (uri: '%s')", uri);
943  return kFALSE;
944  }
945 
946  // dsname may only be empty if wildcards expected
947  if (name.IsNull() && !wildcards) {
948  Error ("ParseUri", "DataSet name is empty");
949  return kFALSE;
950  }
951 
952  // Construct regexp whitelist for checking illegal characters in user/group
953  TPRegexp wcExp (wildcards ? "^(?:[A-Za-z0-9-*_.]*|[*])$" : "^[A-Za-z0-9-_.]*$");
954 
955  // Check for illegal characters in all components
956  if (!wcExp.Match(group)) {
957  Error("ParseUri", "illegal characters in group (uri: '%s', group: '%s')", uri, group.Data());
958  return kFALSE;
959  }
960 
961  if (!wcExp.Match(user)) {
962  Error("ParseUri", "illegal characters in user (uri: '%s', user: '%s')", uri, user.Data());
963  return kFALSE;
964  }
965 
966  // Construct regexp whitelist for checking illegal characters in name
967  if (!wcExp.Match(name)) {
968  Error("ParseUri", "illegal characters in name (uri: '%s', name: '%s')", uri, name.Data());
969  return kFALSE;
970  }
971 
972  if (tree.Contains(TRegexp("[^A-Za-z0-9-/_]"))) {
973  Error("ParseUri", "Illegal characters in subdir/object name (uri: '%s', obj: '%s')", uri, tree.Data());
974  return kFALSE;
975  }
976 
977  // Check user & group
978  if (onlyCurrent && (group.CompareTo(fGroup) || user.CompareTo(fUser))) {
979  Error("ParseUri", "only datasets from your group/user allowed");
980  return kFALSE;
981  }
982 
983  // fill parameters passed by reference, if defined
984  if (dsGroup)
985  *dsGroup = group;
986  if (dsUser)
987  *dsUser = user;
988  if (dsName)
989  *dsName = name;
990  if (dsTree)
991  *dsTree = tree;
992 
993  return kTRUE;
994 }
995 
996 ////////////////////////////////////////////////////////////////////////////////
997 /// Partition dataset 'ds' accordingly to the servers.
998 /// The returned TMap contains:
999 /// <server> --> <subdataset> (TFileCollection)
1000 /// where <subdataset> is the subset of 'ds' on <server>
1001 /// The partitioning is done using all the URLs in the TFileInfo's, so the
1002 /// resulting datasets are not mutually exclusive.
1003 /// The string 'exclude' contains a comma-separated list of servers to exclude
1004 /// from the map.
1005 
1006 TMap *TDataSetManager::GetSubDataSets(const char *ds, const char *exclude)
1007 {
1008  TMap *map = (TMap *)0;
1009 
1010  if (!ds || strlen(ds) <= 0) {
1011  Info("GetDataSets", "dataset name undefined!");
1012  return map;
1013  }
1014 
1015  // Get the dataset
1016  TFileCollection *fc = GetDataSet(ds);
1017  if (!fc) {
1018  Info("GetDataSets", "could not retrieve the dataset '%s'", ds);
1019  return map;
1020  }
1021 
1022  // Get the subset
1023  if (!(map = fc->GetFilesPerServer(exclude))) {
1024  if (gDebug > 0)
1025  Info("GetDataSets", "could not get map for '%s'", ds);
1026  }
1027 
1028  // Cleanup
1029  delete fc;
1030 
1031  // Done
1032  return map;
1033 }
1034 
1035 ////////////////////////////////////////////////////////////////////////////////
1036 /// Formatted printout of the content of TFileCollection 'fc'.
1037 /// Options in the form
1038 /// popt = u * 10 + f
1039 /// f 0 => header only, 1 => header + files
1040 /// when printing files
1041 /// u 0 => print file name only, 1 => print full URL
1042 
1044 {
1045  if (!fc) return;
1046 
1047  Int_t f = popt%10;
1048  Int_t u = popt - 10 * f;
1049 
1050  Printf("+++");
1051  if (fc->GetTitle() && (strlen(fc->GetTitle()) > 0)) {
1052  Printf("+++ Dumping: %s: ", fc->GetTitle());
1053  } else {
1054  Printf("+++ Dumping: %s: ", fc->GetName());
1055  }
1056  Printf("%s", fc->ExportInfo("+++ Summary:", 1)->GetName());
1057  if (f == 1) {
1058  Printf("+++ Files:");
1059  Int_t nf = 0;
1060  TIter nxfi(fc->GetList());
1061  TFileInfo *fi = 0;
1062  while ((fi = (TFileInfo *)nxfi())) {
1063  if (u == 1)
1064  Printf("+++ %5d. %s", ++nf, fi->GetCurrentUrl()->GetUrl());
1065  else
1066  Printf("+++ %5d. %s", ++nf, fi->GetCurrentUrl()->GetFile());
1067  }
1068  }
1069  Printf("+++");
1070 }
1071 
1072 ////////////////////////////////////////////////////////////////////////////////
1073 /// Prints formatted information about the dataset 'uri'.
1074 /// The type and format of output is driven by 'opt':
1075 ///
1076 /// 1. opt = "server:srv1[,srv2[,srv3[,...]]]"
1077 /// Print info about the subsets of 'uri' on servers srv1, srv2, ...
1078 /// 2. opt = "servers[:exclude:srv1[,srv2[,srv3[,...]]]]"
1079 /// Print info about the subsets of 'uri' on all servers, except
1080 /// the ones in the exclude list srv1, srv2, ...
1081 /// 3. opt = <any>
1082 /// Print info about all datasets matching 'uri'
1083 ///
1084 /// If 'opt' contains 'full:' the list of files in the datasets are also printed.
1085 /// In case 3. this is enabled only if 'uri' matches a single dataset.
1086 ///
1087 /// In case 3, if 'opt' contains
1088 /// 'full:' the list of files in the datasets are also printed.
1089 /// 'forcescan:' the dataset are open to get the information; otherwise the
1090 /// pre-processed information is used.
1091 /// 'noheader:' the labelling header is not printed; usefull when to chain
1092 /// several printouts
1093 /// 'noupdate:' do not update the cache (which may be slow on very remote
1094 /// servers)
1095 /// 'refresh:' refresh the information (requires appropriate credentials;
1096 /// typically it can be done only for owned datasets)
1097 
1098 void TDataSetManager::ShowDataSets(const char *uri, const char *opt)
1099 {
1100  TFileCollection *fc = 0;
1101  TString o(opt);
1102  Int_t popt = 0;
1103  if (o.Contains("full:")) {
1104  o.ReplaceAll("full:","");
1105  popt = 1;
1106  }
1107  if (o.BeginsWith("server:")) {
1108  o.ReplaceAll("server:", "");
1109  TString srv;
1110  Int_t from = 0;
1111  while ((o.Tokenize(srv, from, ","))) {
1112  fc = GetDataSet(uri, srv.Data());
1113  PrintDataSet(fc, popt);
1114  delete fc;
1115  }
1116  } else if (o.BeginsWith("servers")) {
1117  o.ReplaceAll("servers", "");
1118  if (o.BeginsWith(":exclude:"))
1119  o.ReplaceAll(":exclude:", "");
1120  else
1121  o = "";
1122  TMap *dsmap = GetSubDataSets(uri, o.Data());
1123  if (dsmap) {
1124  TIter nxk(dsmap);
1125  TObject *k = 0;
1126  while ((k = nxk()) && (fc = (TFileCollection *) dsmap->GetValue(k))) {
1127  PrintDataSet(fc, popt);
1128  }
1129  delete dsmap;
1130  }
1131  } else {
1132  TString u(uri), grp, usr, dsn;
1133  // Support for "*" or "/*"
1134  if (u == "" || u == "*" || u == "/*" || u == "/*/" || u == "/*/*") u = "/*/*/";
1135  if (!ParseUri(u.Data(), &grp, &usr, &dsn, 0, kFALSE, kTRUE))
1136  Warning("ShowDataSets", "problems parsing URI '%s'", uri);
1137  // Scan the existing datasets and print the content
1139  if (o.Contains("forcescan:")) xopt |= (UInt_t)(TDataSetManager::kForceScan);
1140  if (o.Contains("noheader:")) xopt |= (UInt_t)(TDataSetManager::kNoHeaderPrint);
1141  if (o.Contains("noupdate:")) xopt |= (UInt_t)(TDataSetManager::kNoCacheUpdate);
1142  if (o.Contains("refresh:")) xopt |= (UInt_t)(TDataSetManager::kRefreshLs);
1143  if (!u.IsNull() && !u.Contains("*") && !grp.IsNull() && !usr.IsNull() && !dsn.IsNull()) {
1144  if (ExistsDataSet(uri)) {
1145  // Single dataset
1146  if (popt == 0) {
1147  // Quick listing
1148  GetDataSets(u.Data(), xopt);
1149  } else if ((fc = GetDataSet(uri))) {
1150  // Full print option
1151  PrintDataSet(fc, 10 + popt);
1152  delete fc;
1153  }
1154  return;
1155  }
1156  // Try all the directories
1157  TRegexp reg(grp, kTRUE), reu(usr, kTRUE);
1158  if (u.Index(reg) == kNPOS) grp = "*";
1159  if (u.Index(reu) == kNPOS) usr = "*";
1160  // Rebuild the uri
1161  u.Form("/%s/%s/%s", grp.Data(), usr.Data(), dsn.Data());
1162  }
1163  GetDataSets(u.Data(), xopt);
1164  }
1165 
1166  return;
1167 }
1168 
1169 ////////////////////////////////////////////////////////////////////////////////
1170 /// Go through the files in the specified dataset, selecting files according to
1171 /// 'fopt' and doing on these files the actions described by 'sopt'.
1172 /// If required, the information in 'dataset' is updated.
1173 ///
1174 /// The int fopt controls which files have to be processed (or added to the list
1175 /// if ropt is 1 - see below); 'fopt' is defined in term of csopt and fsopt:
1176 /// fopt = sign(fsopt) * csopt * 100 + fsopt
1177 /// where 'fsopt' controls the actual selection
1178 /// -1 all files in the dataset
1179 /// 0 process only files marked as 'non-staged'
1180 /// >=1 as 0 but files that are marked 'staged' are open
1181 /// >=2 as 1 but files that are marked 'staged' are touched
1182 /// 10 process only files marked as 'staged'; files marked as 'non-staged'
1183 /// are ignored
1184 /// and 'csopt' controls if an actual check on the staged status (via TFileStager) is done
1185 /// 0 check that the file is staged using TFileStager
1186 /// 1 do not hard check the staged status
1187 /// (example: use fopt = -101 to check the staged status of all the files, or fopt = 110
1188 /// to re-check the stage status of all the files marked as staged)
1189 ///
1190 /// If 'dbg' is true, some information about the ongoing operations is reguraly
1191 /// printed; this can be useful when processing very large datasets, an operation
1192 /// which can take a very long time.
1193 ///
1194 /// The int 'sopt' controls what is done on the selected files (this is effective only
1195 /// if ropt is 0 or 2 - see below):
1196 /// -1 no action (fopt = 2 and sopt = -1 touches all staged files)
1197 /// 0 do the full process: open the files and fill the meta-information
1198 /// in the TFileInfo object, including the end-point URL
1199 /// 1 only locate the files, by updating the end-point URL (uses TFileStager::Locate
1200 /// which is must faster of an TFile::Open)
1201 /// 2 issue a stage request on the files
1202 ///
1203 /// The int 'ropt' controls which actions are performed:
1204 /// 0 do the full process: get list of files to process and process them
1205 /// 1 get the list of files to be scanned and return it in flist
1206 /// 2 process the files in flist (according to sopt)
1207 /// When defined flist is under the responsability the caller.
1208 ///
1209 /// If avgsz > 0 it is used for the final update of the dataset global counters.
1210 ///
1211 /// If 'mss' is defined use it to initialize the stager (instead of the Url in the
1212 /// TFileInfo objects)
1213 ///
1214 /// If maxfiles > 0, select for processing a maximum of 'filesmax' files (but if fopt is 1 or 2
1215 /// all files marked as 'staged' are still open or touched)
1216 ///
1217 /// Return code
1218 /// 1 dataset was not changed
1219 /// 2 dataset was changed
1220 ///
1221 /// The number of touched, opened and disappeared files are returned in the respective
1222 /// variables, if these are defined.
1223 
1225  Int_t fopt, Int_t sopt, Int_t ropt, Bool_t dbg,
1226  Int_t *touched, Int_t *opened, Int_t *disappeared,
1227  TList *flist, Long64_t avgsz, const char *mss,
1228  Int_t maxfiles, const char *stageopts)
1229 {
1230  // Max number of files
1231  if (maxfiles > -1 && dbg)
1232  ::Info("TDataSetManager::ScanDataSet", "processing a maximum of %d files", maxfiles);
1233 
1234  // File selection, Reopen and Touch options
1235  Bool_t checkstg = (fopt >= 100 || fopt < -1) ? kFALSE : kTRUE;
1236 
1237  // File processing options
1238  Bool_t noaction = (sopt == -1) ? kTRUE : kFALSE;
1239  //Bool_t fullproc = (sopt == 0) ? kTRUE : kFALSE;
1240  Bool_t locateonly = (sopt == 1) ? kTRUE : kFALSE;
1241  Bool_t stageonly = (sopt == 2) ? kTRUE : kFALSE;
1242 
1243  // Run options
1244  Bool_t doall = (ropt == 0) ? kTRUE : kFALSE;
1245  Bool_t getlistonly = (ropt == 1) ? kTRUE : kFALSE;
1246  Bool_t scanlist = (ropt == 2) ? kTRUE : kFALSE;
1247 
1248  if (scanlist && !flist) {
1249  ::Error("TDataSetManager::ScanDataSet", "input list is mandatory for option 'scan file list'");
1250  return -1;
1251  }
1252 
1253  Int_t ftouched = 0;
1254  Int_t fopened = 0;
1255  Int_t fdisappeared = 0;
1256 
1257  Bool_t bchanged_ds = kFALSE;
1258 
1259  TList *newStagedFiles = 0;
1260  TFileInfo *fileInfo = 0;
1261  TFileStager *stager = 0;
1262  Bool_t createStager = kFALSE;
1263 
1264  if (doall || getlistonly) {
1265 
1266  // Point to the list
1267  newStagedFiles = (!doall && getlistonly && flist) ? flist : new TList;
1268  if (newStagedFiles != flist) newStagedFiles->SetOwner(kFALSE);
1269 
1270  stager = (mss && strlen(mss) > 0) ? TFileStager::Open(mss) : 0;
1271  createStager = (stager) ? kFALSE : kTRUE;
1272 
1273  Bool_t bchanged_fi = kFALSE;
1274  Bool_t btouched = kFALSE;
1275  Bool_t bdisappeared = kFALSE;
1276 
1277  // Check which files have been staged, this can be replaced by a bulk command,
1278  // once it exists in the xrdclient
1279  TIter iter(dataset->GetList());
1280  while ((fileInfo = (TFileInfo *) iter())) {
1281 
1282  // For real time monitoring
1284 
1285  bchanged_fi = kFALSE;
1286  btouched = kFALSE;
1287  bdisappeared = kFALSE;
1288  Bool_t newlystaged = CheckStagedStatus(fileInfo, fopt, maxfiles, newStagedFiles->GetEntries(),
1289  stager, createStager, dbg, bchanged_fi, btouched,
1290  bdisappeared);
1291 
1292  if (bchanged_fi) bchanged_ds = kTRUE;
1293  if (btouched) ftouched++;
1294  if (bdisappeared) fdisappeared++;
1295 
1296  // Notify
1297  if (dbg && (ftouched+fdisappeared) % 100 == 0)
1298  ::Info("TDataSetManager::ScanDataSet", "opening %d: file: %s",
1299  ftouched + fdisappeared, fileInfo->GetCurrentUrl()->GetUrl());
1300 
1301  // Register the newly staged file
1302  if (!noaction && newlystaged) newStagedFiles->Add(fileInfo);
1303  }
1304  SafeDelete(stager);
1305 
1306  // If required to only get the list we are done
1307  if (getlistonly) {
1308  if (dbg && newStagedFiles->GetEntries() > 0)
1309  ::Info("TDataSetManager::ScanDataSet", " %d files appear to be newly staged",
1310  newStagedFiles->GetEntries());
1311  if (!flist) SafeDelete(newStagedFiles);
1312  return ((bchanged_ds) ? 2 : 1);
1313  }
1314  }
1315 
1316  if (!noaction && (doall || scanlist)) {
1317 
1318  // Point to the list
1319  newStagedFiles = (!doall && scanlist && flist) ? flist : newStagedFiles;
1320  if (newStagedFiles != flist) newStagedFiles->SetOwner(kFALSE);
1321 
1322  // loop over now staged files
1323  if (dbg && newStagedFiles->GetEntries() > 0)
1324  ::Info("TDataSetManager::ScanDataSet", "opening %d files that appear to be newly staged",
1325  newStagedFiles->GetEntries());
1326 
1327  // If staging files, prepare the stager
1328  if (locateonly || stageonly) {
1329  stager = (mss && strlen(mss) > 0) ? TFileStager::Open(mss) : 0;
1330  createStager = (stager) ? kFALSE : kTRUE;
1331  }
1332 
1333  // Notify each 'fqnot' files (min 1, max 100)
1334  Int_t fqnot = (newStagedFiles->GetSize() > 10) ? newStagedFiles->GetSize() / 10 : 1;
1335  if (fqnot > 100) fqnot = 100;
1336  Int_t count = 0;
1337  Bool_t bchanged_fi = kFALSE;
1338  Bool_t bopened = kFALSE;
1339  TIter iter(newStagedFiles);
1340  while ((fileInfo = (TFileInfo *) iter())) {
1341 
1342  if (dbg && (count%fqnot == 0))
1343  ::Info("TDataSetManager::ScanDataSet", "processing %d.'new' file: %s",
1344  count, fileInfo->GetCurrentUrl()->GetUrl());
1345  count++;
1346 
1347  // For real time monitoring
1349  bchanged_fi = kFALSE;
1350  bopened = kFALSE;
1351 
1352  ProcessFile(fileInfo, sopt, checkstg, doall, stager, createStager,
1353  stageopts, dbg, bchanged_fi, bopened);
1354 
1355  bchanged_ds |= bchanged_fi;
1356  if (bopened) fopened++;
1357  }
1358  if (newStagedFiles != flist) SafeDelete(newStagedFiles);
1359 
1360  dataset->RemoveDuplicates();
1361  dataset->Update(avgsz);
1362  }
1363 
1364  Int_t result = (bchanged_ds) ? 2 : 1;
1365  if (result > 0 && dbg)
1366  ::Info("TDataSetManager::ScanDataSet", "%d files 'new'; %d files touched;"
1367  " %d files disappeared", fopened, ftouched, fdisappeared);
1368 
1369  // Fill outputs, if required
1370  if (touched) *touched = ftouched;
1371  if (opened) *opened = fopened;
1372  if (disappeared) *disappeared = fdisappeared;
1373 
1374  // For real time monitoring
1376 
1377  return result;
1378 }
1379 
1380 ////////////////////////////////////////////////////////////////////////////////
1381 /// Check stage status of the file described by "fileInfo".
1382 /// fopt is same as "fopt" in TDataSetManager::ScanDataSet, which is repeated below:
1383 /// The int fopt controls which files have to be processed (or added to the list
1384 /// if ropt is 1 - see below); 'fopt' is defined in term of csopt and fsopt:
1385 /// fopt = sign(fsopt) * csopt * 100 + fsopt
1386 /// where 'fsopt' controls the actual selection
1387 /// -1 all files in the dataset
1388 /// 0 process only files marked as 'non-staged'
1389 /// >=1 as 0 but files that are marked 'staged' are open
1390 /// >=2 as 1 but files that are marked 'staged' are touched
1391 /// 10 process only files marked as 'staged'; files marked as 'non-staged'
1392 /// are ignored
1393 /// and 'csopt' controls if an actual check on the staged status (via TFileStager) is done
1394 /// 0 check that the file is staged using TFileStager
1395 /// 1 do not hard check the staged status
1396 /// (example: use fopt = -101 to check the staged status of all the files, or fopt = 110
1397 /// to re-check the stage status of all the files marked as staged)
1398 ///
1399 /// If 'dbg' is true, some information about the ongoing operations is reguraly
1400 /// printed; this can be useful when processing very large datasets, an operation
1401 /// which can take a very long time.
1402 ///
1403 /// If maxfiles > 0, select for processing a maximum of 'filesmax' files (but if fopt is 1 or 2
1404 /// all files marked as 'staged' are still open or touched)
1405 ///
1406 /// Return code
1407 /// kTRUE the file appears newly staged
1408 /// kFALSE otherwise
1409 ///
1410 /// changed is true if the fileinfo is modified
1411 /// touched is true if the file is open and read
1412 /// disappeared is true if the file is marked staged but actually not staged
1413 
1415  Int_t newstagedfiles, TFileStager* stager,
1416  Bool_t createStager, Bool_t dbg, Bool_t& changed,
1417  Bool_t& touched, Bool_t& disappeared)
1418 {
1419  // File selection, Reopen and Touch options
1420  Bool_t allf = (fopt == -1) ? kTRUE : kFALSE;
1421  Bool_t checkstg = (fopt >= 100 || fopt < -1) ? kFALSE : kTRUE;
1422  if (fopt >= 0) fopt %= 100;
1423  Bool_t nonstgf = (fopt >= 0 && fopt < 10) ? kTRUE : kFALSE;
1424  Bool_t reopen = (fopt >= 1 && fopt < 10) ? kTRUE : kFALSE;
1425  Bool_t touch = (fopt >= 2 && fopt < 10) ? kTRUE : kFALSE;
1426  Bool_t stgf = (fopt == 10) ? kTRUE : kFALSE;
1427 
1428  changed = kFALSE;
1429  touched = kFALSE;
1430  disappeared = kFALSE;
1431 
1432  // Check which files have been staged, this can be replaced by a bulk command,
1433  // once it exists in the xrdclient
1434 
1435  if (!allf) {
1436 
1437  fileInfo->ResetUrl();
1438  if (!fileInfo->GetCurrentUrl()) {
1439  ::Error("TDataSetManager::CheckStagedStatus", "GetCurrentUrl() returned 0 for %s",
1440  fileInfo->GetFirstUrl()->GetUrl());
1441  return kFALSE;
1442  }
1443 
1444  if (nonstgf && fileInfo->TestBit(TFileInfo::kStaged)) {
1445 
1446  // Skip files flagged as corrupted
1447  if (fileInfo->TestBit(TFileInfo::kCorrupted)) return kFALSE;
1448 
1449  // Skip if we are not asked to re-open the staged files
1450  if (!reopen) return kFALSE;
1451 
1452  // Set the URL removing the anchor (e.g. #AliESDs.root) because IsStaged()
1453  // and TFile::Open() with filetype=raw do not accept anchors
1454  TUrl *curl = fileInfo->GetCurrentUrl();
1455  const char *furl = curl->GetUrl();
1456  TString urlmod;
1457  if (TDataSetManager::CheckDataSetSrvMaps(curl, urlmod) && !(urlmod.IsNull()))
1458  furl = urlmod.Data();
1459  TUrl url(furl);
1460  url.SetAnchor("");
1461 
1462  // Check if file is still available, if touch is set actually read from the file
1463  TString uopt(url.GetOptions());
1464  uopt += "filetype=raw&mxredir=2";
1465  url.SetOptions(uopt.Data());
1466  TFile *file = TFile::Open(url.GetUrl());
1467  if (file) {
1468  if (touch) {
1469  // Actually access the file
1470  char tmpChar = 0;
1471  if (file->ReadBuffer(&tmpChar, 1))
1472  ::Warning("TDataSetManager::CheckStagedStatus", "problems reading 1 byte from open file");
1473  // Count
1474  touched = kTRUE;
1475  }
1476  file->Close();
1477  delete file;
1478  } else {
1479  // File could not be opened, reset staged bit
1480  if (dbg) ::Info("TDataSetManager::CheckStagedStatus", "file %s disappeared", url.GetUrl());
1481  fileInfo->ResetBit(TFileInfo::kStaged);
1482  disappeared = kTRUE;
1483  changed = kTRUE;
1484 
1485  // Remove invalid URL, if other one left...
1486  if (fileInfo->GetNUrls() > 1)
1487  fileInfo->RemoveUrl(curl->GetUrl());
1488  }
1489  // Go to next
1490  return kFALSE;
1491  } else if (stgf && !(fileInfo->TestBit(TFileInfo::kStaged))) {
1492  // All staged files are processed: skip non staged
1493  return kFALSE;
1494  }
1495  }
1496 
1497  // Only open maximum number of 'new' files
1498  if (maxfiles > 0 && newstagedfiles >= maxfiles)
1499  return kFALSE;
1500 
1501  // Hard check of the staged status, if required
1502  if (checkstg) {
1503  // Set the URL removing the anchor (e.g. #AliESDs.root) because IsStaged()
1504  // and TFile::Open() with filetype=raw do not accept anchors
1505  TUrl *curl = fileInfo->GetCurrentUrl();
1506  const char *furl = curl->GetUrl();
1507  TString urlmod;
1508  Bool_t mapped = kFALSE;
1509  if (TDataSetManager::CheckDataSetSrvMaps(curl, urlmod) && !(urlmod.IsNull())) {
1510  furl = urlmod.Data();
1511  mapped = kTRUE;
1512  }
1513  TUrl url(furl);
1514  url.SetAnchor("");
1515 
1516  // Get the stager (either the global one or from the URL)
1517  stager = createStager ? TFileStager::Open(url.GetUrl()) : stager;
1518 
1519  Bool_t result = kFALSE;
1520  if (stager) {
1521  result = stager->IsStaged(url.GetUrl());
1522  if (gDebug > 0)
1523  ::Info("TDataSetManager::CheckStagedStatus", "IsStaged: %s: %d", url.GetUrl(), result);
1524  if (createStager)
1525  SafeDelete(stager);
1526  } else {
1527  ::Warning("TDataSetManager::CheckStagedStatus",
1528  "could not get stager instance for '%s'", url.GetUrl());
1529  }
1530 
1531  // Go to next in case of failure
1532  if (!result) {
1533  if (fileInfo->TestBit(TFileInfo::kStaged)) {
1534  // Reset the bit
1535  fileInfo->ResetBit(TFileInfo::kStaged);
1536  changed = kTRUE;
1537  }
1538  return kFALSE;
1539  } else {
1540  if (!(fileInfo->TestBit(TFileInfo::kStaged))) {
1541  // Set the bit
1542  fileInfo->SetBit(TFileInfo::kStaged);
1543  changed = kTRUE;
1544  }
1545  }
1546 
1547  // If the url was re-mapped add the new url in front of the list
1548  if (mapped) {
1549  url.SetOptions(curl->GetOptions());
1550  url.SetAnchor(curl->GetAnchor());
1551  fileInfo->AddUrl(url.GetUrl(), kTRUE);
1552  }
1553  }
1554  return kTRUE;
1555 }
1556 
1557 ////////////////////////////////////////////////////////////////////////////////
1558 /// Locate, stage, or fully validate file "fileInfo".
1559 
1560 void TDataSetManager::ProcessFile(TFileInfo *fileInfo, Int_t sopt, Bool_t checkstg, Bool_t doall,
1561  TFileStager* stager, Bool_t createStager, const char *stageopts,
1562  Bool_t dbg, Bool_t& changed, Bool_t& opened)
1563 {
1564  // File processing options
1565  //Bool_t noaction = (sopt == -1) ? kTRUE : kFALSE;
1566  Bool_t fullproc = (sopt == 0) ? kTRUE : kFALSE;
1567  Bool_t locateonly = (sopt == 1) ? kTRUE : kFALSE;
1568  Bool_t stageonly = (sopt == 2) ? kTRUE : kFALSE;
1569 
1570  changed = kFALSE;
1571  opened = kFALSE;
1572  Int_t rc = -1;
1573 
1574  // Set the URL removing the anchor (e.g. #AliESDs.root) because IsStaged()
1575  // and TFile::Open() with filetype=raw do not accept anchors
1576  TUrl *curl = fileInfo->GetCurrentUrl();
1577  const char *furl = curl->GetUrl();
1578  TString urlmod;
1579  //Bool_t mapped = kFALSE;
1580  if (TDataSetManager::CheckDataSetSrvMaps(curl, urlmod) && !(urlmod.IsNull())) {
1581  furl = urlmod.Data();
1582  //mapped = kTRUE;
1583  }
1584  TUrl url(furl);
1585  url.SetOptions("");
1586  url.SetAnchor("");
1587 
1588  if (createStager){
1589  if (!stager || (stager && !stager->Matches(url.GetUrl()))) {
1590  SafeDelete(stager);
1591  if (!(stager = TFileStager::Open(url.GetUrl())) || !(stager->IsValid())) {
1592  ::Error("TDataSetManager::ProcessFile",
1593  "could not get valid stager instance for '%s'", url.GetUrl());
1594  return;
1595  }
1596  }
1597  }
1598  // Locate the file, if just requested so
1599  if (locateonly) {
1600  TString eurl;
1601  if (stager && stager->Locate(url.GetUrl(), eurl) == 0) {
1602  TString opts(curl->GetOptions());
1603  TString anch(curl->GetAnchor());
1604  // Get the effective end-point Url
1605  curl->SetUrl(eurl);
1606  // Restore original options and anchor, if any
1607  curl->SetOptions(opts);
1608  curl->SetAnchor(anch);
1609  // Flag and count
1610  changed = kTRUE;
1611  opened = kTRUE;
1612  } else {
1613  // Failure
1614  ::Error("TDataSetManager::ProcessFile", "could not locate %s", url.GetUrl());
1615  }
1616 
1617  } else if (stageonly) {
1618  TString eurl;
1619  if (stager && !(stager->IsStaged(url.GetUrl()))) {
1620  if (!(stager->Stage(url.GetUrl(), stageopts))) {
1621  // Failure
1622  ::Error("TDataSetManager::ProcessFile",
1623  "problems issuing stage request for %s", url.GetUrl());
1624  }
1625  }
1626  } else if (fullproc) {
1627  TString eurl;
1628  // Full file validation
1629  rc = -2;
1630  Bool_t doscan = kTRUE;
1631  if (checkstg) {
1632  doscan = kFALSE;
1633  if ((doall && fileInfo->TestBit(TFileInfo::kStaged)) ||
1634  (stager && stager->IsStaged(url.GetUrl()))) doscan = kTRUE;
1635  }
1636  if (doscan) {
1637  if ((rc = TDataSetManager::ScanFile(fileInfo, dbg)) < -1) return;
1638  changed = kTRUE;
1639  } else if (stager) {
1640  ::Warning("TDataSetManager::ProcessFile",
1641  "required file '%s' does not look as being online (staged)", url.GetUrl());
1642  }
1643  if (rc < 0) return;
1644  // Count
1645  opened = kTRUE;
1646  }
1647  return;
1648 }
1649 
1650 ////////////////////////////////////////////////////////////////////////////////
1651 /// Open the file described by 'fileinfo' to extract the relevant meta-information.
1652 /// Return 0 if OK, -2 if the file cannot be open, -1 if it is corrupted
1653 
1655 {
1656  Int_t rc = -2;
1657  // We need an input
1658  if (!fileinfo) {
1659  ::Error("TDataSetManager::ScanFile", "undefined input (!)");
1660  return rc;
1661  }
1662 
1663  TUrl *url = fileinfo->GetCurrentUrl();
1664 
1665  TFile *file = 0;
1666  Bool_t anchor = kFALSE;
1667 
1668  // Get timeout settings (default none)
1669  Int_t timeout = gEnv->GetValue("DataSet.ScanFile.OpenTimeout", -1);
1670  TString fileopt;
1671  if (timeout > 0) fileopt.Form("TIMEOUT=%d", timeout);
1672 
1673  // To determine the size we have to open the file without the anchor
1674  // (otherwise we get the size of the contained file - in case of a zip archive)
1675  // We open in raw mode which makes sure that the opening succeeds, even if
1676  // the file is corrupted
1677  const char *furl = url->GetUrl();
1678  TString urlmod;
1679  if (TDataSetManager::CheckDataSetSrvMaps(url, urlmod) && !(urlmod.IsNull()))
1680  furl = urlmod.Data();
1681  if (strlen(url->GetAnchor()) > 0) {
1682  anchor = kTRUE;
1683  // We need a raw open firts to get the real size of the file
1684  TUrl urlNoAnchor(furl);
1685  urlNoAnchor.SetAnchor("");
1686  TString unaopts = urlNoAnchor.GetOptions();
1687  if (!unaopts.IsNull()) {
1688  unaopts += "&filetype=raw";
1689  } else {
1690  unaopts = "filetype=raw";
1691  }
1692  urlNoAnchor.SetOptions(unaopts);
1693  // Wait max 5 secs per file
1694  if (!(file = TFile::Open(urlNoAnchor.GetUrl(), fileopt))) return rc;
1695 
1696  // Save some relevant info
1697  if (file->GetSize() > 0) fileinfo->SetSize(file->GetSize());
1698  fileinfo->SetBit(TFileInfo::kStaged);
1699 
1700  fileinfo->SetUUID(file->GetUUID().AsString());
1701 
1702  // Add url of the disk server in front of the list
1703  if (file->GetEndpointUrl()) {
1704  // add endpoint url if it is not a local file
1705  TUrl eurl(*(file->GetEndpointUrl()));
1706 
1707  if (strcmp(eurl.GetProtocol(), "file") ||
1708  !strcmp(eurl.GetProtocol(), url->GetProtocol())) {
1709 
1710  eurl.SetOptions(url->GetOptions());
1711  eurl.SetAnchor(url->GetAnchor());
1712 
1713  // Fix the hostname
1714  if (!strcmp(eurl.GetHost(), "localhost") || !strcmp(eurl.GetHost(), "127.0.0.1") ||
1715  !strcmp(eurl.GetHost(), "localhost.localdomain")) {
1716  eurl.SetHost(TUrl(gSystem->HostName()).GetHostFQDN());
1717  }
1718  // Add only if different
1719  if (strcmp(eurl.GetUrl(), url->GetUrl()))
1720  fileinfo->AddUrl(eurl.GetUrl(), kTRUE);
1721 
1722  if (gDebug > 0) ::Info("TDataSetManager::ScanFile", "added URL %s", eurl.GetUrl());
1723  }
1724  } else {
1725  ::Warning("TDataSetManager::ScanFile", "end-point URL undefined for file %s", file->GetName());
1726  }
1727 
1728  file->Close();
1729  delete file;
1730  }
1731 
1732  // OK, set the relevant flags
1733  rc = -1;
1734 
1735  // Disable warnings when reading a tree without loading the corresponding library
1736  Int_t oldLevel = gErrorIgnoreLevel;
1738 
1739  // Wait max 5 secs per file
1740  if (!(file = TFile::Open(url->GetUrl(), fileopt))) {
1741  // If the file could be opened before, but fails now it is corrupt...
1742  if (dbg) ::Info("TDataSetManager::ScanFile", "marking %s as corrupt", url->GetUrl());
1743  fileinfo->SetBit(TFileInfo::kCorrupted);
1744  // Set back old warning level
1745  gErrorIgnoreLevel = oldLevel;
1746  return rc;
1747  } else if (!anchor) {
1748  // Do the relevant settings
1749  if (file->GetSize() > 0) fileinfo->SetSize(file->GetSize());
1750  fileinfo->SetBit(TFileInfo::kStaged);
1751 
1752  // Add url of the disk server in front of the list if it is not a local file
1753  TUrl eurl(*(file->GetEndpointUrl()));
1754 
1755  if (strcmp(eurl.GetProtocol(), "file") ||
1756  !strcmp(eurl.GetProtocol(), url->GetProtocol())) {
1757 
1758  eurl.SetOptions(url->GetOptions());
1759  eurl.SetAnchor(url->GetAnchor());
1760 
1761  // Fix the hostname
1762  if (!strcmp(eurl.GetHost(), "localhost") || !strcmp(eurl.GetHost(), "127.0.0.1") ||
1763  !strcmp(eurl.GetHost(), "localhost.localdomain")) {
1764  eurl.SetHost(TUrl(gSystem->HostName()).GetHostFQDN());
1765  }
1766  // Add only if different
1767  if (strcmp(eurl.GetUrl(), url->GetUrl()))
1768  fileinfo->AddUrl(eurl.GetUrl(), kTRUE);
1769 
1770  if (gDebug > 0) ::Info("TDataSetManager::ScanFile", "added URL %s", eurl.GetUrl());
1771  }
1772  fileinfo->SetUUID(file->GetUUID().AsString());
1773  }
1774  rc = 0;
1775 
1776  // Loop over all entries and create/update corresponding metadata.
1777  // TODO If we cannot read some of the trees, is the file corrupted as well?
1778  if ((rc = TDataSetManager::FillMetaData(fileinfo, file, "/")) != 0) {
1779  ::Error("TDataSetManager::ScanFile",
1780  "problems processing the directory tree in looking for metainfo");
1781  fileinfo->SetBit(TFileInfo::kCorrupted);
1782  rc = -1;
1783  }
1784  // Set back old warning level
1785  gErrorIgnoreLevel = oldLevel;
1786 
1787  file->Close();
1788  delete file;
1789 
1790  // Done
1791  return rc;
1792 }
1793 
1794 ////////////////////////////////////////////////////////////////////////////////
1795 /// Navigate the directory 'd' (and its subdirectories) looking for TTree objects.
1796 /// Fill in the relevant metadata information in 'fi'. The name of the TFileInfoMeta
1797 /// metadata entry will be "/dir1/dir2/.../tree_name".
1798 /// Return 0 on success, -1 if any problem happens (object found in keys cannot be read,
1799 /// for example)
1800 
1802 {
1803  // Check inputs
1804  if (!fi || !d || !rdir) {
1805  ::Error("TDataSetManager::FillMetaData",
1806  "some inputs are invalid (fi:%p,d:%p,r:%s)", fi, d, rdir);
1807  return -1;
1808  }
1809 
1810  if (d->GetListOfKeys()) {
1811  TIter nxk(d->GetListOfKeys());
1812  TKey *k = 0;
1813  while ((k = dynamic_cast<TKey *> (nxk()))) {
1814 
1815  if (TClass::GetClass(k->GetClassName())->InheritsFrom(TDirectory::Class())) {
1816  // Get the directory
1817  TDirectory *sd = (TDirectory *) d->Get(k->GetName());
1818  if (!sd) {
1819  ::Error("TDataSetManager::FillMetaData", "cannot get sub-directory '%s'", k->GetName());
1820  return -1;
1821  }
1822  if (TDataSetManager::FillMetaData(fi, sd, TString::Format("%s%s/", rdir, k->GetName())) != 0) {
1823  ::Error("TDataSetManager::FillMetaData", "problems processing sub-directory '%s'", k->GetName());
1824  return -1;
1825  }
1826 
1827  } else {
1828  // We process only trees
1829  if (!TClass::GetClass(k->GetClassName())->InheritsFrom(TTree::Class())) continue;
1830 
1831  TString ks;
1832  ks.Form("%s%s", rdir, k->GetName());
1833 
1834  TFileInfoMeta *md = fi->GetMetaData(ks);
1835  if (!md) {
1836  // Create it
1837  md = new TFileInfoMeta(ks, k->GetClassName());
1838  fi->AddMetaData(md);
1839  if (gDebug > 0)
1840  ::Info("TDataSetManager::FillMetaData", "created meta data for tree %s", ks.Data());
1841  }
1842  // Fill values
1843  TTree *t = dynamic_cast<TTree *> (d->Get(k->GetName()));
1844  if (t) {
1845  if (t->GetEntries() >= 0) {
1846  md->SetEntries(t->GetEntries());
1847  if (t->GetTotBytes() >= 0)
1848  md->SetTotBytes(t->GetTotBytes());
1849  if (t->GetZipBytes() >= 0)
1850  md->SetZipBytes(t->GetZipBytes());
1851  }
1852  } else {
1853  ::Error("TDataSetManager::FillMetaData", "could not get tree '%s'", k->GetName());
1854  return -1;
1855  }
1856  }
1857  }
1858  }
1859  // Done
1860  return 0;
1861 }
1862 
1863 ////////////////////////////////////////////////////////////////////////////////
1864 /// Create a server mapping list from the content of 'srvmaps'
1865 /// Return the list (owned by the caller) or 0 if no valid info could be found)
1866 
1868 {
1869  TList *srvmapslist = 0;
1870  if (srvmaps.IsNull()) {
1871  ::Warning("TDataSetManager::ParseDataSetSrvMaps",
1872  "called with an empty string! - nothing to do");
1873  return srvmapslist;
1874  }
1875  TString srvmap, sf, st;
1876  Int_t from = 0, from1 = 0;
1877  while (srvmaps.Tokenize(srvmap, from, " ")) {
1878  sf = ""; st = "";
1879  if (srvmap.Contains("|")) {
1880  from1 = 0;
1881  if (srvmap.Tokenize(sf, from1, "|"))
1882  if (srvmap.Tokenize(st, from1, "|")) { }
1883  } else {
1884  st = srvmap;
1885  }
1886  if (st.IsNull()) {
1887  ::Warning("TDataSetManager::ParseDataSetSrvMaps",
1888  "parsing DataSet.SrvMaps: target must be defined"
1889  " (token: %s) - ignoring", srvmap.Data());
1890  continue;
1891  } else if (!(st.EndsWith("/"))) {
1892  st += "/";
1893  }
1894  // TUrl if wildcards or TObjString
1895  TString sp;
1896  TUrl *u = 0;
1897  if (!(sf.IsNull()) && sf.Contains("*")) {
1898  u = new TUrl(sf);
1899  if (!(sf.BeginsWith(u->GetProtocol()))) u->SetProtocol("root");
1900  sp.Form(":%d", u->GetPort());
1901  if (!(sf.Contains(sp))) u->SetPort(1094);
1902  if (!TString(u->GetHost()).Contains("*")) SafeDelete(u);
1903  }
1904  if (!srvmapslist) srvmapslist = new TList;
1905  if (u) {
1906  srvmapslist->Add(new TPair(u, new TObjString(st)));
1907  } else {
1908  srvmapslist->Add(new TPair(new TObjString(sf), new TObjString(st)));
1909  }
1910  }
1911  // Done
1912  if (srvmapslist) srvmapslist->SetOwner(kTRUE);
1913  return srvmapslist;
1914 }
1915 
1916 ////////////////////////////////////////////////////////////////////////////////
1917 /// Static getter for server mapping list
1918 
1920 {
1921  return fgDataSetSrvMaps;
1922 }
1923 
1924 ////////////////////////////////////////////////////////////////////////////////
1925 /// Check if the dataset server mappings apply to the url defined by 'furl'.
1926 /// Use srvmaplist if defined, else use the default list.
1927 /// If yes, resolve the mapping into file1 and return kTRUE.
1928 /// Otherwise return kFALSE.
1929 
1931 {
1932  Bool_t replaced = kFALSE;
1933  if (!furl) return replaced;
1934 
1935  const char *file = furl->GetUrl();
1936  TList *mlist = (srvmaplist) ? srvmaplist : fgDataSetSrvMaps;
1937  if (mlist && mlist->GetSize() > 0) {
1938  TIter nxm(mlist);
1939  TPair *pr = 0;
1940  while ((pr = (TPair *) nxm())) {
1941  Bool_t replace = kFALSE;
1942  // If TUrl apply reg exp on host
1943  TUrl *u = dynamic_cast<TUrl *>(pr->Key());
1944  if (u) {
1945  if (!strcmp(u->GetProtocol(), furl->GetProtocol())) {
1946  Ssiz_t len;
1947  if (!strcmp(u->GetProtocol(), "file")) {
1948  TRegexp re(u->GetFileAndOptions(), kTRUE);
1949  if (re.Index(furl->GetFileAndOptions(), &len) == 0) replace = kTRUE;
1950  } else {
1951  if (u->GetPort() == furl->GetPort()) {
1952  TRegexp re(u->GetHost(), kTRUE);
1953  if (re.Index(furl->GetHost(), &len) == 0) replace = kTRUE;
1954  }
1955  }
1956  }
1957  } else {
1958  TObjString *os = dynamic_cast<TObjString *>(pr->Key());
1959  if (os) {
1960  if (os->GetString().IsNull() ||
1961  !strncmp(file, os->GetName(), os->GetString().Length())) replace = kTRUE;
1962  }
1963  }
1964  if (replace) {
1965  TObjString *ost = dynamic_cast<TObjString *>(pr->Value());
1966  if (ost) {
1967  file1.Form("%s%s", ost->GetName(), furl->GetFileAndOptions());
1968  replaced = kTRUE;
1969  break;
1970  }
1971  }
1972  }
1973  }
1974  // Done
1975  return replaced;
1976 }
1977 
1978 ////////////////////////////////////////////////////////////////////////////////
1979 /// Update scan counters
1980 
1982 {
1983  fNTouchedFiles = (t > -1) ? t : fNTouchedFiles;
1984  fNOpenedFiles = (o > -1) ? o : fNOpenedFiles;
1986 }
static Bool_t CheckDataSetSrvMaps(TUrl *furl, TString &fn, TList *srvmaplist=0)
Check if the dataset server mappings apply to the url defined by &#39;furl&#39;.
virtual Bool_t IsStaged(const char *)
Just check if the file exists locally.
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition: TSystem.cxx:1291
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
Definition: TObject.cxx:862
void SetPort(Int_t port)
Definition: TUrl.h:90
virtual Int_t ClearCache(const char *uri)
Clear cached information matching uri.
long long Long64_t
Definition: RtypesCore.h:71
void SetUUID(const char *uuid)
Set the UUID to the value associated to the string &#39;uuid&#39;.
Definition: TFileInfo.cxx:237
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3943
R__EXTERN Int_t gErrorIgnoreLevel
Definition: TError.h:105
static TList * fgDataSetSrvMaps
void SetProtocol(const char *proto, Bool_t setDefaultPort=kFALSE)
Set protocol and, optionally, change the port accordingly.
Definition: TUrl.cxx:520
virtual void ParseInitOpts(const char *opts)
Parse the opts string and set the init bits accordingly Available options: Cq: set kCheckQuota Ar: se...
TString fGroupConfigFile
TLine * line
Collectable string class.
Definition: TObjString.h:28
float Float_t
Definition: RtypesCore.h:55
Long64_t GetTotalSize() const
const Ssiz_t kNPOS
Definition: RtypesCore.h:113
This class represents a WWW compatible URL.
Definition: TUrl.h:35
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:687
Bool_t RemoveUrl(const char *url)
Remove an URL. Returns kTRUE if successful, kFALSE otherwise.
Definition: TFileInfo.cxx:320
int GetPathInfo(const char *path, Long_t *id, Long_t *size, Long_t *flags, Long_t *modtime)
Get info about a file: id, size, flags, modification time.
Definition: TSystem.cxx:1393
TUrl * GetCurrentUrl() const
Return the current url.
Definition: TFileInfo.cxx:248
const char * GetProtocol() const
Definition: TUrl.h:66
virtual void SetOwner(Bool_t enable=kTRUE)
Set whether this collection is the owner (enable==true) of its content.
virtual Bool_t RemoveDataSet(const char *uri)
Removes the indicated dataset.
void SetUrl(const char *url, Bool_t defaultIsFile=kFALSE)
Parse url character string and split in its different subcomponents.
Definition: TUrl.cxx:108
void SetEntries(Long64_t entries)
Definition: TFileInfo.h:144
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:53
virtual Int_t GetEntries() const
Definition: TCollection.h:177
virtual TFileCollection * GetDataSet(const char *uri, const char *server=0)
Utility function used in various methods for user dataset upload.
Regular expression class.
Definition: TRegexp.h:31
const char * GetFileAndOptions() const
Return the file and its options (the string specified behind the ?).
Definition: TUrl.cxx:501
void Add(TObject *obj)
This function may not be used (but we need to provide it since it is a pure virtual in TCollection)...
Definition: TMap.cxx:54
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:634
R__ALWAYS_INLINE Bool_t TestBit(UInt_t f) const
Definition: TObject.h:172
void SetScanCounters(Int_t t=-1, Int_t o=-1, Int_t d=-1)
Update scan counters.
virtual ~TDataSetManager()
Destructor.
void SetVal(const AParamType &val)
Definition: TParameter.h:71
Basic string class.
Definition: TString.h:131
static Long64_t ToBytes(const char *size=0)
Static utility function to gt the number of bytes from a string representation in the form "<digit><s...
#define f(i)
Definition: RSha256.hxx:104
const char * GetOptions() const
Definition: TUrl.h:73
virtual void ShowDataSets(const char *uri="*", const char *opt="")
Prints formatted information about the dataset &#39;uri&#39;.
This class represents a RFC 3986 compatible URI.
Definition: TUri.h:35
Long_t fMtime
Definition: TSystem.h:130
static TString CreateUri(const char *dsGroup=0, const char *dsUser=0, const char *dsName=0, const char *dsTree=0)
Creates URI for the dataset manger in the form &#39;[[/dsGroup/]dsUser/]dsName[#dsObjPath]&#39;, The optional dsObjPath can be in the form [subdir/]objname]&#39;.
virtual TMap * GetGroupQuotaMap()
Bool_t AddUrl(const char *url, Bool_t infront=kFALSE)
Add a new URL.
Definition: TFileInfo.cxx:295
static void ProcessFile(TFileInfo *fileInfo, Int_t sopt, Bool_t checkstg, Bool_t doall, TFileStager *stager, Bool_t createStager, const char *stageopts, Bool_t dbg, Bool_t &changed, Bool_t &opened)
Locate, stage, or fully validate file "fileInfo".
void SetBit(UInt_t f, Bool_t set)
Set or unset the user status bits as specified in f.
Definition: TObject.cxx:694
const char * GetUrl(Bool_t withDeflt=kFALSE) const
Return full URL.
Definition: TUrl.cxx:387
const TString GetPath() const
Definition: TUri.h:87
virtual void MonitorUsedSpace(TVirtualMonitoringWriter *monitoring)
Log info to the monitoring server.
static struct mg_connection * fc(struct mg_context *ctx)
Definition: civetweb.c:3728
const char * GetHost() const
Definition: TUrl.h:69
Int_t Update(Long64_t avgsize=-1)
Update accumulated information about the elements of the collection (e.g.
Bool_t SetUri(const TString &uri)
TDataSetManager(const TDataSetManager &)
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString...
Definition: TString.cxx:2311
Bool_t AddMetaData(TObject *meta)
Add&#39;s a meta data object to the file info object.
Definition: TFileInfo.cxx:384
void Class()
Definition: Class.C:29
Int_t GetNUrls() const
Definition: TFileInfo.h:72
virtual void ShowQuota(const char *opt)
Display quota information.
TUrl * GetFirstUrl() const
Definition: TFileInfo.h:69
void SetSize(Long64_t size)
Definition: TFileInfo.h:83
virtual Int_t RegisterDataSet(const char *uri, TFileCollection *dataSet, const char *opt)
Register a dataset, perfoming quota checkings, if needed.
Long_t fMTimeGroupConfig
void DeleteAll()
Remove all (key,value) pairs from the map AND delete the keys AND values when they are allocated on t...
Definition: TMap.cxx:168
virtual Long64_t GetGroupQuota(const char *group)
returns the quota a group is allowed to have
static TList * GetDataSetSrvMaps()
Static getter for server mapping list.
virtual const char * Getenv(const char *env)
Get environment variable.
Definition: TSystem.cxx:1658
Bool_t EndsWith(const char *pat, ECaseCompare cmp=kExact) const
Return true if string ends with the specified string.
Definition: TString.cxx:2177
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
Definition: TSystem.cxx:1594
const char * GetAnchor() const
Definition: TUrl.h:72
static constexpr double s
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:28
const TString GetFragment() const
Definition: TUri.h:89
R__EXTERN Int_t gDebug
Definition: RtypesCore.h:117
static Bool_t CheckStagedStatus(TFileInfo *fileInfo, Int_t fopt, Int_t maxfiles, Int_t newstagedfiles, TFileStager *stager, Bool_t createStager, Bool_t dbg, Bool_t &changed, Bool_t &touched, Bool_t &disappeared)
Check stage status of the file described by "fileInfo".
TObject * Value() const
Definition: TMap.h:121
A doubly linked list.
Definition: TList.h:44
TFileInfoMeta * GetMetaData(const char *meta=0) const
Get meta data object with specified name.
Definition: TFileInfo.cxx:424
const char * GetName() const
Returns name of object.
Definition: TObjString.h:38
void PrintUsedSpace()
Prints the quota.
virtual void UpdateUsedSpace()
updates the used space maps
const TString & GetString() const
Definition: TObjString.h:46
virtual TMap * GetSubDataSets(const char *uri, const char *excludeservers)
Partition dataset &#39;ds&#39; accordingly to the servers.
TString fUser
Definition: TSystem.h:140
static Int_t FillMetaData(TFileInfo *fi, TDirectory *d, const char *rdir="/")
Navigate the directory &#39;d&#39; (and its subdirectories) looking for TTree objects.
R__EXTERN TSystem * gSystem
Definition: TSystem.h:556
static constexpr double pc
Int_t RemoveDuplicates()
Remove duplicates based on the UUID, typically after a verification.
TObject * Key() const
Definition: TMap.h:120
THashList * GetList()
Bool_t ReadGroupConfig(const char *cf=0)
Read group config file &#39;cf&#39;.
virtual TMap * GetUserUsedMap()
virtual Bool_t ExistsDataSet(const char *uri)
Checks if the indicated dataset exits.
virtual Bool_t InheritsFrom(const char *classname) const
Returns kTRUE if object inherits from class "classname".
Definition: TObject.cxx:443
Bool_t ParseUri(const char *uri, TString *dsGroup=0, TString *dsUser=0, TString *dsName=0, TString *dsTree=0, Bool_t onlyCurrent=kFALSE, Bool_t wildcards=kFALSE)
Parses a (relative) URI that describes a DataSet on the cluster.
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:610
TObject * Next()
Definition: TCollection.h:249
virtual Int_t ShowCache(const char *uri)
Show cached information matching uri.
Bool_t HasQuery() const
Definition: TUri.h:98
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition: TString.cxx:2289
unsigned int UInt_t
Definition: RtypesCore.h:44
Int_t ScanDataSet(const char *uri, const char *opt)
Scans the dataset indicated by &#39;uri&#39; following the &#39;opts&#39; directives.
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:888
char * Form(const char *fmt,...)
Ssiz_t Length() const
Definition: TString.h:405
virtual Bool_t SendParameters(TList *, const char *=0)
Long64_t fAvgFileSize
TString & String()
Definition: TObjString.h:48
static Int_t ScanFile(TFileInfo *fileinfo, Bool_t notify)
Open the file described by &#39;fileinfo&#39; to extract the relevant meta-information.
const Bool_t kFALSE
Definition: RtypesCore.h:90
void GetQuota(const char *group, const char *user, const char *dsName, TFileCollection *dataset)
Gets quota information from this dataset.
virtual Bool_t Stage(const char *, Option_t *=0)
Definition: TFileStager.h:47
#define SafeDelete(p)
Definition: RConfig.hxx:543
void SetHost(const char *host)
Definition: TUrl.h:86
TString & Remove(Ssiz_t pos)
Definition: TString.h:668
long Long_t
Definition: RtypesCore.h:52
#define d(i)
Definition: RSha256.hxx:102
Class used by TMap to store (key,value) pairs.
Definition: TMap.h:102
#define DSM_ONE_GB
void SetAnchor(const char *anchor)
Definition: TUrl.h:88
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2197
#define ClassImp(name)
Definition: Rtypes.h:361
static TString fgCommonDataSetTag
virtual const char * HostName()
Return the system&#39;s host name.
Definition: TSystem.cxx:301
void Printf(const char *fmt,...)
Describe directory structure in memory.
Definition: TDirectory.h:40
TMap implements an associative array of (key,value) pairs using a THashTable for efficient retrieval ...
Definition: TMap.h:40
int type
Definition: TGX11.cxx:120
R__EXTERN TEnv * gEnv
Definition: TEnv.h:171
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
static TFileStager * Open(const char *stager)
Open a stager, after having loaded the relevant plug-in.
virtual void DispatchOneEvent(Bool_t pendingOnly=kFALSE)
Dispatch a single event.
Definition: TSystem.cxx:427
virtual Int_t NotifyUpdate(const char *group=0, const char *user=0, const char *dspath=0, Long_t mtime=0, const char *checksum=0)
Save into the <datasetdir>/dataset.list file the name of the last updated or created or modified data...
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
Definition: TString.cxx:418
static constexpr double nm
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2923
void ResetUrl()
Definition: TFileInfo.h:66
Int_t GetPort() const
Definition: TUrl.h:80
virtual Long64_t GetEntries() const
Definition: TTree.h:454
Bool_t IsNull() const
Definition: TString.h:402
Int_t Match(const TString &s, UInt_t start=0)
Runs a match on s against the regex &#39;this&#39; was created with.
Definition: TPRegexp.cxx:708
Mother of all ROOT objects.
Definition: TObject.h:37
void SetTotBytes(Long64_t tot)
Definition: TFileInfo.h:147
virtual Long64_t GetTotBytes() const
Definition: TTree.h:507
static TUri Transform(const TUri &reference, const TUri &base)
virtual void Add(TObject *obj)
Definition: TList.h:87
Wrapper for PCRE library (Perl Compatible Regular Expressions).
Definition: TPRegexp.h:97
Class that contains a list of TFileInfo&#39;s and accumulated meta data information about its entries...
Definition: file.py:1
void PrintDataSet(TFileCollection *fc, Int_t popt=0)
Formatted printout of the content of TFileCollection &#39;fc&#39;.
void SetZipBytes(Long64_t zip)
Definition: TFileInfo.h:148
virtual Long64_t GetZipBytes() const
Definition: TTree.h:536
void SetOptions(const char *opt)
Definition: TUrl.h:89
const Int_t kError
Definition: TError.h:39
static TList * ParseDataSetSrvMaps(const TString &srvmaps)
Create a server mapping list from the content of &#39;srvmaps&#39; Return the list (owned by the caller) or 0...
Int_t CountChar(Int_t c) const
Return number of times character c occurs in the string.
Definition: TString.cxx:476
TObject * GetValue(const char *keyname) const
Returns a pointer to the value associated with keyname as name of the key.
Definition: TMap.cxx:236
virtual Bool_t Matches(const char *s)
Definition: TFileStager.h:46
Definition: tree.py:1
Bool_t IsDigit() const
Returns true if all characters in string are digits (0-9) or white spaces, i.e.
Definition: TString.cxx:1763
A TTree represents a columnar dataset.
Definition: TTree.h:78
const AParamType & GetVal() const
Definition: TParameter.h:69
Class describing a generic file including meta information.
Definition: TFileInfo.h:36
void ResetBit(UInt_t f)
Definition: TObject.h:171
const TString GetQuery() const
Definition: TUri.h:88
virtual Bool_t IsValid() const
Definition: TFileStager.h:50
virtual Int_t Locate(const char *u, TString &f)
Just check if the file exists locally.
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Definition: TCollection.h:182
Int_t Match(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10, TArrayI *pos=0)
The number of matches is returned, this equals the full match + sub-pattern matches.
Definition: TPRegexp.cxx:339
virtual Int_t GetValue(const char *name, Int_t dflt) const
Returns the integer value for a resource.
Definition: TEnv.cxx:491
virtual Long64_t GetGroupUsed(const char *group)
Returns the used space of that group.
const Bool_t kTRUE
Definition: RtypesCore.h:89
virtual TMap * GetDataSets(const char *uri, UInt_t=TDataSetManager::kExport)
Returns all datasets for the <group> and <user> specified by <uri>.
void AbstractMethod(const char *method) const
Use this method to implement an "abstract" method that you don&#39;t want to leave purely abstract...
Definition: TObject.cxx:930
char name[80]
Definition: TGX11.cxx:109
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:874
const char * Data() const
Definition: TString.h:364