ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
TDataSetManager.cxx
Go to the documentation of this file.
1 // @(#)root/base:$Id$
2 // Author: Jan Fiete Grosse-Oetringhaus, 04.06.07
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////////////////////
13 // //
14 // TDataSetManager //
15 // //
16 // This class contains functions to handle datasets in PROOF //
17 // It is the layer between TProofServ and the file system that stores //
18 // the datasets. //
19 // //
20 //////////////////////////////////////////////////////////////////////////
21 
22 
23 #include "TDataSetManager.h"
24 
25 #include "Riostream.h"
26 
27 #include "TEnv.h"
28 #include "TError.h"
29 #include "TFile.h"
30 #include "TFileCollection.h"
31 #include "TFileInfo.h"
32 #include "TFileStager.h"
33 #include "TMD5.h"
34 #include "THashList.h"
35 #include "TKey.h"
36 #include "TObjArray.h"
37 #include "TObjString.h"
38 #include "TParameter.h"
39 #include "TPRegexp.h"
40 #include "TRegexp.h"
41 #include "TSystem.h"
42 #include "TTree.h"
43 #include "TUrl.h"
44 #include "TVirtualMonitoring.h"
45 
46 // One Gigabyte
47 #define DSM_ONE_GB (1073741824)
48 
49 // Name for common datasets
50 TString TDataSetManager::fgCommonDataSetTag = "COMMON";
51 TList *TDataSetManager::fgDataSetSrvMaps = 0;
52 
54 
55 ////////////////////////////////////////////////////////////////////////////////
56 ///
57 /// Main constructor
58 
59 TDataSetManager::TDataSetManager(const char *group, const char *user,
60  const char *options)
61  : fGroup(group),
62  fUser(user), fCommonUser(), fCommonGroup(),
63  fGroupQuota(), fGroupUsed(),
64  fUserUsed(), fNTouchedFiles(0), fNOpenedFiles(0),
65  fNDisappearedFiles(0), fMTimeGroupConfig(-1)
66 {
67  // Fill default group and user if none is given
68  if (fGroup.IsNull())
69  fGroup = "default";
70  if (fUser.IsNull()) {
71  fUser = "--nouser--";
72  // Get user logon name
74  if (pw) {
75  fUser = pw->fUser;
76  delete pw;
77  }
78  }
79 
80  fGroupQuota.SetOwner();
81  fGroupUsed.SetOwner();
82  fUserUsed.SetOwner();
83 
84  fCommonUser = "COMMON";
85  fCommonGroup = "COMMON";
86 
87  fNTouchedFiles = -1;
88  fNOpenedFiles = -1;
89  fNDisappearedFiles = -1;
90  fMTimeGroupConfig = -1;
91 
92  fAvgFileSize = 50000000; // Default 50 MB per file
93 
94  // Parse options
95  ParseInitOpts(options);
96 
97  if (!fUser.IsNull() && !fGroup.IsNull()) {
98 
99  // If not in sandbox, construct the base URI using session defaults
100  // (group, user) (syntax: /group/user/dsname[#[subdir/]objname])
101  if (!TestBit(TDataSetManager::kIsSandbox))
102  fBase.SetUri(TString(Form("/%s/%s/", fGroup.Data(), fUser.Data())));
103 
104  }
105 
106  // List of dataset server mapping instructions
107  TString srvmaps(gEnv->GetValue("DataSet.SrvMaps",""));
108  TString srvmapsenv(gSystem->Getenv("DATASETSRVMAPS"));
109  if (!(srvmapsenv.IsNull())) {
110  if (srvmapsenv.BeginsWith("+")) {
111  if (!(srvmaps.IsNull())) srvmaps += ",";
112  srvmaps += srvmapsenv(1,srvmapsenv.Length());
113  } else {
114  srvmaps = srvmapsenv;
115  }
116  }
117  if (!(srvmaps.IsNull()) && !(fgDataSetSrvMaps = ParseDataSetSrvMaps(srvmaps)))
118  Warning("TDataSetManager", "problems parsing DataSet.SrvMaps input info (%s)"
119  " - ignoring", srvmaps.Data());
120 
121  // Read config file
122  ReadGroupConfig(gEnv->GetValue("Proof.GroupFile", ""));
123 }
124 
125 ////////////////////////////////////////////////////////////////////////////////
126 /// Destructor
127 
129 {
130  // Clear used space
134 }
135 
136 ////////////////////////////////////////////////////////////////////////////////
137 /// Parse the opts string and set the init bits accordingly
138 /// Available options:
139 /// Cq: set kCheckQuota
140 /// Ar: set kAllowRegister
141 /// Av: set kAllowVerify
142 /// Ti: set kTrustInfo
143 /// Sb: set kIsSandbox
144 /// Ca: set kUseCache or kDoNotUseCache
145 /// The opts string may also contain additional unrelated info: in such a case
146 /// the field delimited by the prefix "opt:" is analyzed, e.g. if opts is
147 /// "/tmp/dataset opt:Cq:-Ar: root://lxb6046.cern.ch" only the substring
148 /// "Cq:-Ar:" will be parsed .
149 
150 void TDataSetManager::ParseInitOpts(const char *opts)
151 {
152  // Default option bits
160 
161  if (opts && strlen(opts) > 0) {
162  TString opt(opts);
163  // If it contains the prefix "opt:", isolate the related field
164  Int_t ip = opt.Index("opt:");
165  if (ip != kNPOS) opt.Remove(0, ip + 4);
166  ip = opt.Index(" ");
167  if (ip != kNPOS) opt.Remove(ip);
168  // Check the content, now
169  if (opt.Contains("Cq:") && !opt.Contains("-Cq:"))
171  if (opt.Contains("-Ar:"))
173  if (opt.Contains("-Av:"))
175  if (opt.Contains("-Ti:"))
177  if (opt.Contains("Sb:") && !opt.Contains("-Sb:"))
179  if (opt.Contains("Ca:"))
181  if (opt.Contains("-Ca:"))
183  }
184 
185  // Check dependencies
187  // Dataset verification or requires registration permition
189  }
190  // UseCache has priority
193 }
194 
195 ////////////////////////////////////////////////////////////////////////////////
196 /// Read group config file 'cf'.
197 /// If cf == 0 re-read, if changed, the file pointed by fGroupConfigFile .
198 ///
199 /// expects the following directives:
200 /// Group definition:
201 /// group <groupname> <user>+
202 /// disk quota
203 /// property <groupname> diskquota <quota in GB>
204 /// average filesize (to be used when the file size is not available)
205 /// averagefilesize <average size>{G,g,M,m,K,k}
206 
208 {
209  // Validate input
210  FileStat_t st;
211  if (!cf || (strlen(cf) <= 0) || !strcmp(cf, fGroupConfigFile.Data())) {
212  // If this is the first time we cannot do anything
213  if (fGroupConfigFile.IsNull()) {
214  if (gDebug > 0)
215  Info("ReadGroupConfig", "path to config file undefined - nothing to do");
216  return kFALSE;
217  }
218  // Check if fGroupConfigFile has changed
220  Error("ReadGroupConfig", "could not stat %s", fGroupConfigFile.Data());
221  return kFALSE;
222  }
223  if (st.fMtime <= fMTimeGroupConfig) {
224  if (gDebug > 0)
225  Info("ReadGroupConfig","file has not changed - do nothing");
226  return kTRUE;
227  }
228  }
229 
230  // Either new file or the file has changed
231  if (cf && (strlen(cf) > 0)) {
232  // The file must exist and be readable
233  if (gSystem->GetPathInfo(cf, st)) {
234  Error("ReadGroupConfig", "could not stat %s", cf);
235  return kFALSE;
236  }
238  Error("ReadGroupConfig", "cannot read %s", cf);
239  return kFALSE;
240  }
241  // Ok
242  fGroupConfigFile = cf;
244  }
245 
246  if (gDebug > 0)
247  Info("ReadGroupConfig","reading group config from %s", cf);
248 
249  // Open the config file
250  std::ifstream in;
251  in.open(cf);
252  if (!in.is_open()) {
253  Error("ReadGroupConfig", "could not open config file %s", cf);
254  return kFALSE;
255  }
256 
257  // Container for the global common user
258  TString tmpCommonUser;
259 
260  // Go through
261  TString line;
262  while (in.good()) {
263  // Read new line
264  line.ReadLine(in);
265  // Explicitely skip comment lines
266  if (line[0] == '#') continue;
267  // Parse it
268  Ssiz_t from = 0;
269  TString key;
270  if (!line.Tokenize(key, from, " ")) // No token
271  continue;
272  // Parsing depends on the key
273  if (key == "property") {
274  // Read group
275  TString grp;
276  if (!line.Tokenize(grp, from, " ")) {// No token
277  if (gDebug > 0)
278  Info("ReadGroupConfig","incomplete line: '%s'", line.Data());
279  continue;
280  }
281  // Read type of property
282  TString type;
283  if (!line.Tokenize(type, from, " ")) // No token
284  continue;
285  if (type == "diskquota") {
286  // Read diskquota
287  TString sdq;
288  if (!line.Tokenize(sdq, from, " ")) // No token
289  continue;
290  // Enforce GigaBytes as default
291  if (sdq.IsDigit()) sdq += "G";
292  Long64_t quota = ToBytes(sdq);
293  if (quota > -1) {
294  fGroupQuota.Add(new TObjString(grp),
295  new TParameter<Long64_t> ("group quota", quota));
296  } else {
297  Warning("ReadGroupConfig",
298  "problems parsing string: wrong or unsupported suffix? %s",
299  sdq.Data());
300  }
301  } else if (type == "commonuser") {
302  // Read common user for this group
303  TString comusr;
304  if (!line.Tokenize(comusr, from, " ")) // No token
305  continue;
306 
307  }
308 
309  } else if (key == "dataset") {
310  // Read type
311  TString type;
312  if (!line.Tokenize(type, from, " ")) {// No token
313  if (gDebug > 0)
314  Info("ReadGroupConfig","incomplete line: '%s'", line.Data());
315  continue;
316  }
317  if (type == "commonuser") {
318  // Read global common user
319  TString comusr;
320  if (!line.Tokenize(comusr, from, " ")) // No token
321  continue;
322  fCommonUser = comusr;
323  } else if (type == "commongroup") {
324  // Read global common group
325  TString comgrp;
326  if (!line.Tokenize(comgrp, from, " ")) // No token
327  continue;
328  fCommonGroup = comgrp;
329  } else if (type == "diskquota") {
330  // Quota check switch
331  TString on;
332  if (!line.Tokenize(on, from, " ")) // No token
333  continue;
334  if (on == "on") {
336  } else if (on == "off") {
338  }
339  }
340 
341  } else if (key == "averagefilesize") {
342 
343  // Read average size
344  TString avgsize;
345  if (!line.Tokenize(avgsize, from, " ")) {// No token
346  if (gDebug > 0)
347  Info("ReadGroupConfig","incomplete line: '%s'", line.Data());
348  continue;
349  }
350  Long64_t avgsz = ToBytes(avgsize);
351  if (avgsz > -1) {
352  fAvgFileSize = avgsz;
353  } else {
354  Warning("ReadGroupConfig",
355  "problems parsing string: wrong or unsupported suffix? %s",
356  avgsize.Data());
357  }
358  } else if (key == "include") {
359 
360  // Read file to include
361  TString subfn;
362  if (!line.Tokenize(subfn, from, " ")) {// No token
363  if (gDebug > 0)
364  Info("ReadGroupConfig","incomplete line: '%s'", line.Data());
365  continue;
366  }
367  // The file must be readable
368  if (gSystem->AccessPathName(subfn, kReadPermission)) {
369  Error("ReadGroupConfig", "request to parse file '%s' which is not readable",
370  subfn.Data());
371  continue;
372  }
373  if (!ReadGroupConfig(subfn))
374  Error("ReadGroupConfig", "problems parsing include file '%s'", subfn.Data());
375  }
376  }
377  in.close();
378 
379  return kTRUE;
380 }
381 
382 ////////////////////////////////////////////////////////////////////////////////
383 /// Static utility function to gt the number of bytes from a string
384 /// representation in the form "<digit><sfx>" with <sfx> = {"", "k", "M", "G",
385 /// "T", "P"} (case insensitive).
386 /// Returns -1 if the format is wrong.
387 
389 {
390  Long64_t lsize = -1;
391 
392  // Check if valid
393  if (!size || strlen(size) <= 0) return lsize;
394 
395  TString s(size);
396  // Determine factor
397  Long64_t fact = 1;
398  if (!s.IsDigit()) {
399  const char *unit[5] = { "k", "M", "G", "T", "P"};
400  fact = 1024;
401  Int_t jj = 0;
402  while (jj <= 4) {
403  if (s.EndsWith(unit[jj], TString::kIgnoreCase)) {
404  s.Remove(s.Length()-1);
405  break;
406  }
407  fact *= 1024;
408  jj++;
409  }
410  }
411  // Apply factor now
412  if (s.IsDigit())
413  lsize = s.Atoi() * fact;
414 
415  // Done
416  return lsize;
417 }
418 
419 ////////////////////////////////////////////////////////////////////////////////
420 /// Utility function used in various methods for user dataset upload.
421 
422 TFileCollection *TDataSetManager::GetDataSet(const char *, const char *)
423 {
424  AbstractMethod("GetDataSet");
425  return (TFileCollection *)0;
426 }
427 
428 ////////////////////////////////////////////////////////////////////////////////
429 /// Removes the indicated dataset
430 
432 {
433  AbstractMethod("RemoveDataSet");
434  return kFALSE;
435 }
436 
437 ////////////////////////////////////////////////////////////////////////////////
438 /// Checks if the indicated dataset exits
439 
441 {
442  AbstractMethod("ExistsDataSet");
443  return kFALSE;
444 }
445 
446 ////////////////////////////////////////////////////////////////////////////////
447 ///
448 /// Returns all datasets for the <group> and <user> specified by <uri>.
449 /// If <user> is 0, it returns all datasets for the given <group>.
450 /// If <group> is 0, it returns all datasets.
451 /// The returned TMap contains:
452 /// <group> --> <map of users> --> <map of datasets> --> <dataset> (TFileCollection)
453 ///
454 /// The unsigned int 'option' is forwarded to GetDataSet and BrowseDataSet.
455 /// Available options (to be .or.ed):
456 /// kShowDefault a default selection is shown that include the ones from
457 /// the current user, the ones from the group and the common ones
458 /// kPrint print the dataset content
459 /// kQuotaUpdate update quotas
460 /// kExport use export naming
461 ///
462 /// NB1: options "kPrint", "kQuoatUpdate" and "kExport" are mutually exclusive
463 /// NB2: for options "kPrint" and "kQuoatUpdate" return is null.
464 
466 {
467  AbstractMethod("GetDataSets");
468 
469  return (TMap *)0;
470 }
471 ////////////////////////////////////////////////////////////////////////////////
472 /// Scans the dataset indicated by 'uri' following the 'opts' directives
473 ///
474 /// The 'opts' string contains up to 4 directive fields separated by ':'
475 ///
476 /// 'selection' field :
477 /// A, allfiles: process all files
478 /// D, staged: process only staged (on Disk) files (if 'allfiles:' is not specified
479 /// the default is to process only files marked as non-staged)
480 /// 'pre-action field':
481 /// O, open: open the files marked as staged when processing only files
482 /// marked as non-staged
483 /// T, touch: open and touch the files marked as staged when processing
484 /// only files marked as non-staged
485 /// I, nostagedcheck: do not check the actual stage status on selected files
486 ///
487 /// 'process' field:
488 /// N, noaction: do nothing on the selected files
489 /// P, fullproc: open the selected files and extract the meta information
490 /// L, locateonly: only locate the selected files
491 /// S, stageonly: issue a stage request for the selected files not yet staged
492 ///
493 /// 'auxiliary' field
494 /// V, verbose: notify the actions
495 ///
496 /// Returns 0 on success, -1 if any failure occurs.
497 
498 Int_t TDataSetManager::ScanDataSet(const char *uri, const char *opts)
499 {
500  // Extract the directives
501  UInt_t o = 0;
502  if (opts && strlen(opts) > 0) {
503  // Selection options
504  if (strstr(opts, "allfiles:") || strchr(opts, 'A'))
505  o |= kAllFiles;
506  else if (strstr(opts, "staged:") || strchr(opts, 'D'))
507  o |= kStagedFiles;
508  // Pre-action options
509  if (strstr(opts, "open:") || strchr(opts, 'O'))
510  o |= kReopen;
511  if (strstr(opts, "touch:") || strchr(opts, 'T'))
512  o |= kTouch;
513  if (strstr(opts, "nostagedcheck:") || strchr(opts, 'I'))
514  o |= kNoStagedCheck;
515  // Process options
516  if (strstr(opts, "noaction:") || strchr(opts, 'N'))
517  o |= kNoAction;
518  if (strstr(opts, "locateonly:") || strchr(opts, 'L'))
519  o |= kLocateOnly;
520  if (strstr(opts, "stageonly:") || strchr(opts, 'S'))
521  o |= kStageOnly;
522  // Auxilliary options
523  if (strstr(opts, "verbose:") || strchr(opts, 'V'))
524  o |= kDebug;
525  } else {
526  // Default
527  o = kReopen | kDebug;
528  }
529 
530  // Run
531  return ScanDataSet(uri, o);
532 }
533 
534 ////////////////////////////////////////////////////////////////////////////////
535 /// Scans the dataset indicated by <uri> and returns the number of missing files.
536 /// Returns -1 if any failure occurs.
537 /// For more details, see documentation of
538 /// ScanDataSet(TFileCollection *dataset, const char *option)
539 
541 {
542  AbstractMethod("ScanDataSet");
543 
544  return -1;
545 }
546 
547 ////////////////////////////////////////////////////////////////////////////////
548 ///
549 /// Gets quota information from this dataset
550 
551 void TDataSetManager::GetQuota(const char *group, const char *user,
552  const char *dsName, TFileCollection *dataset)
553 {
554  if (gDebug > 0)
555  Info("GetQuota", "processing dataset %s %s %s", group, user, dsName);
556 
557  if (dataset->GetTotalSize() > 0) {
558  TParameter<Long64_t> *size =
559  dynamic_cast<TParameter<Long64_t>*> (fGroupUsed.GetValue(group));
560  if (!size) {
561  size = new TParameter<Long64_t> ("group used", 0);
562  fGroupUsed.Add(new TObjString(group), size);
563  }
564 
565  size->SetVal(size->GetVal() + dataset->GetTotalSize());
566 
567  TMap *userMap = dynamic_cast<TMap*> (fUserUsed.GetValue(group));
568  if (!userMap) {
569  userMap = new TMap;
570  fUserUsed.Add(new TObjString(group), userMap);
571  }
572 
573  size = dynamic_cast<TParameter<Long64_t>*> (userMap->GetValue(user));
574  if (!size) {
575  size = new TParameter<Long64_t> ("user used", 0);
576  userMap->Add(new TObjString(user), size);
577  }
578 
579  size->SetVal(size->GetVal() + dataset->GetTotalSize());
580  }
581 }
582 
583 ////////////////////////////////////////////////////////////////////////////////
584 /// Display quota information
585 
586 void TDataSetManager::ShowQuota(const char *opt)
587 {
588  UpdateUsedSpace();
589 
590  TMap *groupQuotaMap = GetGroupQuotaMap();
591  TMap *userUsedMap = GetUserUsedMap();
592  if (!groupQuotaMap || !userUsedMap)
593  return;
594 
595  Bool_t noInfo = kTRUE;
596  TIter iter(groupQuotaMap);
597  TObjString *group = 0;
598  while ((group = dynamic_cast<TObjString*> (iter.Next()))) {
599  noInfo = kFALSE;
600  Long64_t groupQuota = GetGroupQuota(group->String());
601  Long64_t groupUsed = GetGroupUsed(group->String());
602 
603  Printf(" +++ Group %s uses %.1f GB out of %.1f GB", group->String().Data(),
604  (Float_t) groupUsed / DSM_ONE_GB,
605  (Float_t) groupQuota / DSM_ONE_GB);
606 
607  // display also user information
608  if (opt && !TString(opt).Contains("U", TString::kIgnoreCase))
609  continue;
610 
611  TMap *userMap = dynamic_cast<TMap*> (userUsedMap->GetValue(group->String()));
612  if (!userMap)
613  continue;
614 
615  TIter iter2(userMap);
616  TObjString *user = 0;
617  while ((user = dynamic_cast<TObjString*> (iter2.Next()))) {
618  TParameter<Long64_t> *size2 =
619  dynamic_cast<TParameter<Long64_t>*> (userMap->GetValue(user->String().Data()));
620  if (!size2)
621  continue;
622 
623  Printf(" +++ User %s uses %.1f GB", user->String().Data(),
624  (Float_t) size2->GetVal() / DSM_ONE_GB);
625  }
626 
627  Printf("------------------------------------------------------");
628  }
629  // Check if something has been printed
630  if (noInfo) {
631  Printf(" +++ Quota check enabled but no quota info available +++ ");
632  }
633 }
634 
635 ////////////////////////////////////////////////////////////////////////////////
636 ///
637 /// Prints the quota
638 
640 {
641  Info("PrintUsedSpace", "listing used space");
642 
643  TIter iter(&fUserUsed);
644  TObjString *group = 0;
645  while ((group = dynamic_cast<TObjString*> (iter.Next()))) {
646  TMap *userMap = dynamic_cast<TMap*> (fUserUsed.GetValue(group->String()));
647 
648  TParameter<Long64_t> *size =
649  dynamic_cast<TParameter<Long64_t>*> (fGroupUsed.GetValue(group->String()));
650 
651  if (userMap && size) {
652  Printf("Group %s: %lld B = %.2f GB", group->String().Data(), size->GetVal(),
653  (Float_t) size->GetVal() / DSM_ONE_GB);
654 
655  TIter iter2(userMap);
656  TObjString *user = 0;
657  while ((user = dynamic_cast<TObjString*> (iter2.Next()))) {
658  TParameter<Long64_t> *size2 =
659  dynamic_cast<TParameter<Long64_t>*> (userMap->GetValue(user->String().Data()));
660  if (size2)
661  Printf(" User %s: %lld B = %.2f GB", user->String().Data(), size2->GetVal(),
662  (Float_t) size2->GetVal() / DSM_ONE_GB);
663  }
664 
665  Printf("------------------------------------------------------");
666  }
667  }
668 }
669 
670 ////////////////////////////////////////////////////////////////////////////////
671 ///
672 /// Log info to the monitoring server
673 
675 {
676  Info("MonitorUsedSpace", "sending used space to monitoring server");
677 
678  TIter iter(&fUserUsed);
679  TObjString *group = 0;
680  while ((group = dynamic_cast<TObjString*> (iter.Next()))) {
681  TMap *userMap = dynamic_cast<TMap*> (fUserUsed.GetValue(group->String()));
682  TParameter<Long64_t> *size =
683  dynamic_cast<TParameter<Long64_t>*> (fGroupUsed.GetValue(group->String()));
684 
685  if (!userMap || !size)
686  continue;
687 
688  TList *list = new TList;
689  list->SetOwner();
690  list->Add(new TParameter<Long64_t>("_TOTAL_", size->GetVal()));
691  Long64_t groupQuota = GetGroupQuota(group->String());
692  if (groupQuota != -1)
693  list->Add(new TParameter<Long64_t>("_QUOTA_", groupQuota));
694 
695  TIter iter2(userMap);
696  TObjString *user = 0;
697  while ((user = dynamic_cast<TObjString*> (iter2.Next()))) {
698  TParameter<Long64_t> *size2 =
699  dynamic_cast<TParameter<Long64_t>*> (userMap->GetValue(user->String().Data()));
700  if (!size2)
701  continue;
702  list->Add(new TParameter<Long64_t>(user->String().Data(), size2->GetVal()));
703  }
704 
705  if (!monitoring->SendParameters(list, group->String()))
706  Warning("MonitorUsedSpace", "problems sending monitoring parameters");
707  delete list;
708  }
709 }
710 
711 ////////////////////////////////////////////////////////////////////////////////
712 ///
713 /// Returns the used space of that group
714 
716 {
717  if (fgCommonDataSetTag == group)
718  group = fCommonGroup;
719 
720  TParameter<Long64_t> *size =
721  dynamic_cast<TParameter<Long64_t>*> (fGroupUsed.GetValue(group));
722  if (!size) {
723  if (gDebug > 0)
724  Info("GetGroupUsed", "group %s not found", group);
725  return 0;
726  }
727 
728  return size->GetVal();
729 }
730 
731 ////////////////////////////////////////////////////////////////////////////////
732 ///
733 /// returns the quota a group is allowed to have
734 
736 {
737  if (fgCommonDataSetTag == group)
738  group = fCommonGroup;
739 
741  dynamic_cast<TParameter<Long64_t>*> (fGroupQuota.GetValue(group));
742  if (!value) {
743  if (gDebug > 0)
744  Info("GetGroupQuota", "group %s not found", group);
745  return 0;
746  }
747  return value->GetVal();
748 }
749 
750 ////////////////////////////////////////////////////////////////////////////////
751 /// updates the used space maps
752 
754 {
755  AbstractMethod("UpdateUsedSpace");
756 }
757 
758 ////////////////////////////////////////////////////////////////////////////////
759 /// Register a dataset, perfoming quota checkings, if needed.
760 /// Returns 0 on success, -1 on failure
761 
763  TFileCollection *, const char *)
764 {
765  AbstractMethod("RegisterDataSet");
766  return -1;
767 }
768 
769 ////////////////////////////////////////////////////////////////////////////////
770 /// Save into the <datasetdir>/dataset.list file the name of the last updated
771 /// or created or modified dataset
772 /// Returns 0 on success, -1 on error
773 
774 Int_t TDataSetManager::NotifyUpdate(const char * /*group*/,
775  const char * /*user*/,
776  const char * /*dspath*/,
777  Long_t /*mtime*/,
778  const char * /*checksum*/)
779 {
780  AbstractMethod("NotifyUpdate");
781  return -1;
782 }
783 
784 ////////////////////////////////////////////////////////////////////////////////
785 /// Clear cached information matching uri
786 
787 Int_t TDataSetManager::ClearCache(const char * /*uri*/)
788 {
789  AbstractMethod("ClearCache");
790  return -1;
791 }
792 
793 ////////////////////////////////////////////////////////////////////////////////
794 /// Show cached information matching uri
795 
796 Int_t TDataSetManager::ShowCache(const char * /*uri*/)
797 {
798  AbstractMethod("ShowCache");
799  return -1;
800 }
801 
802 ////////////////////////////////////////////////////////////////////////////////
803 /// Creates URI for the dataset manger in the form '[[/dsGroup/]dsUser/]dsName[#dsObjPath]',
804 /// The optional dsObjPath can be in the form [subdir/]objname]'.
805 
806 TString TDataSetManager::CreateUri(const char *dsGroup, const char *dsUser,
807  const char *dsName, const char *dsObjPath)
808 {
809  TString uri;
810 
811  if (dsGroup && strlen(dsGroup) > 0) {
812  if (dsUser && strlen(dsUser) > 0) {
813  uri += Form("/%s/%s/", dsGroup, dsUser);
814  } else {
815  uri += Form("/%s/*/", dsGroup);
816  }
817  } else if (dsUser && strlen(dsUser) > 0) {
818  uri += Form("%s/", dsUser);
819  }
820  if (dsName && strlen(dsName) > 0)
821  uri += dsName;
822  if (dsObjPath && strlen(dsObjPath) > 0)
823  uri += Form("#%s", dsObjPath);
824 
825  // Done
826  return uri;
827 }
828 
829 ////////////////////////////////////////////////////////////////////////////////
830 /// Parses a (relative) URI that describes a DataSet on the cluster.
831 /// The input 'uri' should be in the form '[[/group/]user/]dsname[#[subdir/]objname]',
832 /// where 'objname' is the name of the object (e.g. the tree name) and the 'subdir'
833 /// is the directory in the file wher it should be looked for.
834 /// After resolving against a base URI consisting of proof://masterhost/group/user/
835 /// - meaning masterhost, group and user of the current session -
836 /// the path is checked to contain exactly three elements separated by '/':
837 /// group/user/dsname
838 /// If wildcards, '*' is allowed in group and user and dsname is allowed to be empty.
839 /// If onlyCurrent, only group and user of current session are allowed.
840 /// Only non-null parameters are filled by this function.
841 /// Returns kTRUE in case of success.
842 
844  TString *dsGroup, TString *dsUser,
845  TString *dsName, TString *dsTree,
846  Bool_t onlyCurrent, Bool_t wildcards)
847 {
848  TString uristr(uri);
849 
850  // If URI contains fields in the form "Field=Value;" it is a virtual URI and
851  // should be treated differently
852  if ((uristr.Index('=') >= 0) && (uristr.Index(';') >= 0)) {
853 
854  // URI is composed of two parts: a name (dsName), and the tree after the
855  // pound sign
856 
857  Warning("ParseUri",
858  "Dataset URI looks like a virtual URI, treating it as such. "
859  "No group and user will be parsed!");
860 
861  TPMERegexp reVirtualUri("^([^#]+)(#(.*))?$");
862  Int_t nm = reVirtualUri.Match(uristr);
863 
864  if (nm >= 2) {
865  if (dsGroup) *dsGroup = "";
866  if (dsUser) *dsUser = "";
867  if (dsName) *dsName = reVirtualUri[1];
868  if (dsTree) {
869  if (nm == 4) *dsTree = reVirtualUri[3];
870  else *dsTree = "";
871  }
872  }
873  else return kFALSE; // should never happen!
874 
875  return kTRUE;
876  }
877 
878  // Append trailing slash if missing when wildcards are enabled
879  Int_t pc = 0;
880  if (wildcards && uristr.Length() > 0) {
881  pc = uristr.CountChar('/');
882  Bool_t endsl = uristr.EndsWith("/") ? kTRUE : kFALSE;
883  Bool_t beginsl = uristr.BeginsWith("/") ? kTRUE : kFALSE;
884  if (beginsl) {
885  if (pc == 1) uristr += "/*/";
886  if (pc == 2 && endsl) uristr += "*/";
887  if (pc == 2 && !endsl) uristr += "/";
888  }
889  }
890 
891  // Resolve given URI agains the base
892  TUri resolved = TUri::Transform(uristr, fBase);
893  if (resolved.HasQuery())
894  Info ("ParseUri", "URI query part <%s> ignored", resolved.GetQuery().Data());
895 
896  TString path(resolved.GetPath());
897  // Must be in the form /group/user/dsname
898  if ((pc = path.CountChar('/')) != 3) {
900  Error ("ParseUri", "illegal dataset path: '%s'", uri);
901  return kFALSE;
902  } else if (pc >= 0 && pc < 3) {
903  // Add missing slashes
904  TString sls("/");
905  if (pc == 2) {
906  sls = "/";
907  } else if (pc == 1) {
908  sls.Form("/%s/", fGroup.Data());
909  } else if (pc == 0) {
910  sls.Form("/%s/%s/", fGroup.Data(), fUser.Data());
911  }
912  path.Insert(0, sls);
913  }
914  }
915  if (gDebug > 1)
916  Info("ParseUri", "path: '%s'", path.Data());
917 
918  // Get individual values from tokens
919  Int_t from = 1;
920  TString group, user, name;
921  if (path.Tokenize(group, from, "/")) {
922  if (path.Tokenize(user, from, "/")) {
923  if (!path.Tokenize(name, from, "/"))
924  if (gDebug > 0) Info("ParseUri", "'name' missing");
925  } else {
926  if (gDebug > 0) Info("ParseUri", "'user' missing");
927  }
928  } else {
929  if (gDebug > 1) Info("ParseUri", "'group' missing");
930  }
931 
932  // The fragment may contain the subdir and the object name in the form '[subdir/]objname'
933  TString tree = resolved.GetFragment();
934  if (tree.EndsWith("/"))
935  tree.Remove(tree.Length()-1);
936 
937  if (gDebug > 1)
938  Info("ParseUri", "group: '%s', user: '%s', dsname:'%s', seg: '%s'",
939  group.Data(), user.Data(), name.Data(), tree.Data());
940 
941  // Check for unwanted use of wildcards
942  if ((user == "*" || group == "*") && !wildcards) {
943  Error ("ParseUri", "no wildcards allowed for user/group in this context (uri: '%s')", uri);
944  return kFALSE;
945  }
946 
947  // dsname may only be empty if wildcards expected
948  if (name.IsNull() && !wildcards) {
949  Error ("ParseUri", "DataSet name is empty");
950  return kFALSE;
951  }
952 
953  // Construct regexp whitelist for checking illegal characters in user/group
954  TPRegexp wcExp (wildcards ? "^(?:[A-Za-z0-9-*_.]*|[*])$" : "^[A-Za-z0-9-_.]*$");
955 
956  // Check for illegal characters in all components
957  if (!wcExp.Match(group)) {
958  Error("ParseUri", "illegal characters in group (uri: '%s', group: '%s')", uri, group.Data());
959  return kFALSE;
960  }
961 
962  if (!wcExp.Match(user)) {
963  Error("ParseUri", "illegal characters in user (uri: '%s', user: '%s')", uri, user.Data());
964  return kFALSE;
965  }
966 
967  // Construct regexp whitelist for checking illegal characters in name
968  if (!wcExp.Match(name)) {
969  Error("ParseUri", "illegal characters in name (uri: '%s', name: '%s')", uri, name.Data());
970  return kFALSE;
971  }
972 
973  if (tree.Contains(TRegexp("[^A-Za-z0-9-/_]"))) {
974  Error("ParseUri", "Illegal characters in subdir/object name (uri: '%s', obj: '%s')", uri, tree.Data());
975  return kFALSE;
976  }
977 
978  // Check user & group
979  if (onlyCurrent && (group.CompareTo(fGroup) || user.CompareTo(fUser))) {
980  Error("ParseUri", "only datasets from your group/user allowed");
981  return kFALSE;
982  }
983 
984  // fill parameters passed by reference, if defined
985  if (dsGroup)
986  *dsGroup = group;
987  if (dsUser)
988  *dsUser = user;
989  if (dsName)
990  *dsName = name;
991  if (dsTree)
992  *dsTree = tree;
993 
994  return kTRUE;
995 }
996 
997 ////////////////////////////////////////////////////////////////////////////////
998 /// Partition dataset 'ds' accordingly to the servers.
999 /// The returned TMap contains:
1000 /// <server> --> <subdataset> (TFileCollection)
1001 /// where <subdataset> is the subset of 'ds' on <server>
1002 /// The partitioning is done using all the URLs in the TFileInfo's, so the
1003 /// resulting datasets are not mutually exclusive.
1004 /// The string 'exclude' contains a comma-separated list of servers to exclude
1005 /// from the map.
1006 
1007 TMap *TDataSetManager::GetSubDataSets(const char *ds, const char *exclude)
1008 {
1009  TMap *map = (TMap *)0;
1010 
1011  if (!ds || strlen(ds) <= 0) {
1012  Info("GetDataSets", "dataset name undefined!");
1013  return map;
1014  }
1015 
1016  // Get the dataset
1017  TFileCollection *fc = GetDataSet(ds);
1018  if (!fc) {
1019  Info("GetDataSets", "could not retrieve the dataset '%s'", ds);
1020  return map;
1021  }
1022 
1023  // Get the subset
1024  if (!(map = fc->GetFilesPerServer(exclude))) {
1025  if (gDebug > 0)
1026  Info("GetDataSets", "could not get map for '%s'", ds);
1027  }
1028 
1029  // Cleanup
1030  delete fc;
1031 
1032  // Done
1033  return map;
1034 }
1035 
1036 ////////////////////////////////////////////////////////////////////////////////
1037 /// Formatted printout of the content of TFileCollection 'fc'.
1038 /// Options in the form
1039 /// popt = u * 10 + f
1040 /// f 0 => header only, 1 => header + files
1041 /// when printing files
1042 /// u 0 => print file name only, 1 => print full URL
1043 
1045 {
1046  if (!fc) return;
1047 
1048  Int_t f = popt%10;
1049  Int_t u = popt - 10 * f;
1050 
1051  Printf("+++");
1052  if (fc->GetTitle() && (strlen(fc->GetTitle()) > 0)) {
1053  Printf("+++ Dumping: %s: ", fc->GetTitle());
1054  } else {
1055  Printf("+++ Dumping: %s: ", fc->GetName());
1056  }
1057  Printf("%s", fc->ExportInfo("+++ Summary:", 1)->GetName());
1058  if (f == 1) {
1059  Printf("+++ Files:");
1060  Int_t nf = 0;
1061  TIter nxfi(fc->GetList());
1062  TFileInfo *fi = 0;
1063  while ((fi = (TFileInfo *)nxfi())) {
1064  if (u == 1)
1065  Printf("+++ %5d. %s", ++nf, fi->GetCurrentUrl()->GetUrl());
1066  else
1067  Printf("+++ %5d. %s", ++nf, fi->GetCurrentUrl()->GetFile());
1068  }
1069  }
1070  Printf("+++");
1071 }
1072 
1073 ////////////////////////////////////////////////////////////////////////////////
1074 /// Prints formatted information about the dataset 'uri'.
1075 /// The type and format of output is driven by 'opt':
1076 ///
1077 /// 1. opt = "server:srv1[,srv2[,srv3[,...]]]"
1078 /// Print info about the subsets of 'uri' on servers srv1, srv2, ...
1079 /// 2. opt = "servers[:exclude:srv1[,srv2[,srv3[,...]]]]"
1080 /// Print info about the subsets of 'uri' on all servers, except
1081 /// the ones in the exclude list srv1, srv2, ...
1082 /// 3. opt = <any>
1083 /// Print info about all datasets matching 'uri'
1084 ///
1085 /// If 'opt' contains 'full:' the list of files in the datasets are also printed.
1086 /// In case 3. this is enabled only if 'uri' matches a single dataset.
1087 ///
1088 /// In case 3, if 'opt' contains
1089 /// 'full:' the list of files in the datasets are also printed.
1090 /// 'forcescan:' the dataset are open to get the information; otherwise the
1091 /// pre-processed information is used.
1092 /// 'noheader:' the labelling header is not printed; usefull when to chain
1093 /// several printouts
1094 /// 'noupdate:' do not update the cache (which may be slow on very remote
1095 /// servers)
1096 /// 'refresh:' refresh the information (requires appropriate credentials;
1097 /// typically it can be done only for owned datasets)
1098 
1099 void TDataSetManager::ShowDataSets(const char *uri, const char *opt)
1100 {
1101  TFileCollection *fc = 0;
1102  TString o(opt);
1103  Int_t popt = 0;
1104  if (o.Contains("full:")) {
1105  o.ReplaceAll("full:","");
1106  popt = 1;
1107  }
1108  if (o.BeginsWith("server:")) {
1109  o.ReplaceAll("server:", "");
1110  TString srv;
1111  Int_t from = 0;
1112  while ((o.Tokenize(srv, from, ","))) {
1113  fc = GetDataSet(uri, srv.Data());
1114  PrintDataSet(fc, popt);
1115  delete fc;
1116  }
1117  } else if (o.BeginsWith("servers")) {
1118  o.ReplaceAll("servers", "");
1119  if (o.BeginsWith(":exclude:"))
1120  o.ReplaceAll(":exclude:", "");
1121  else
1122  o = "";
1123  TMap *dsmap = GetSubDataSets(uri, o.Data());
1124  if (dsmap) {
1125  TIter nxk(dsmap);
1126  TObject *k = 0;
1127  while ((k = nxk()) && (fc = (TFileCollection *) dsmap->GetValue(k))) {
1128  PrintDataSet(fc, popt);
1129  }
1130  delete dsmap;
1131  }
1132  } else {
1133  TString u(uri), grp, usr, dsn;
1134  // Support for "*" or "/*"
1135  if (u == "" || u == "*" || u == "/*" || u == "/*/" || u == "/*/*") u = "/*/*/";
1136  if (!ParseUri(u.Data(), &grp, &usr, &dsn, 0, kFALSE, kTRUE))
1137  Warning("ShowDataSets", "problems parsing URI '%s'", uri);
1138  // Scan the existing datasets and print the content
1140  if (o.Contains("forcescan:")) xopt |= (UInt_t)(TDataSetManager::kForceScan);
1141  if (o.Contains("noheader:")) xopt |= (UInt_t)(TDataSetManager::kNoHeaderPrint);
1142  if (o.Contains("noupdate:")) xopt |= (UInt_t)(TDataSetManager::kNoCacheUpdate);
1143  if (o.Contains("refresh:")) xopt |= (UInt_t)(TDataSetManager::kRefreshLs);
1144  if (!u.IsNull() && !u.Contains("*") && !grp.IsNull() && !usr.IsNull() && !dsn.IsNull()) {
1145  if (ExistsDataSet(uri)) {
1146  // Single dataset
1147  if (popt == 0) {
1148  // Quick listing
1149  GetDataSets(u.Data(), xopt);
1150  } else if ((fc = GetDataSet(uri))) {
1151  // Full print option
1152  PrintDataSet(fc, 10 + popt);
1153  delete fc;
1154  }
1155  return;
1156  }
1157  // Try all the directories
1158  TRegexp reg(grp, kTRUE), reu(usr, kTRUE);
1159  if (u.Index(reg) == kNPOS) grp = "*";
1160  if (u.Index(reu) == kNPOS) usr = "*";
1161  // Rebuild the uri
1162  u.Form("/%s/%s/%s", grp.Data(), usr.Data(), dsn.Data());
1163  }
1164  GetDataSets(u.Data(), xopt);
1165  }
1166 
1167  return;
1168 }
1169 
1170 ////////////////////////////////////////////////////////////////////////////////
1171 /// Go through the files in the specified dataset, selecting files according to
1172 /// 'fopt' and doing on these files the actions described by 'sopt'.
1173 /// If required, the information in 'dataset' is updated.
1174 ///
1175 /// The int fopt controls which files have to be processed (or added to the list
1176 /// if ropt is 1 - see below); 'fopt' is defined in term of csopt and fsopt:
1177 /// fopt = sign(fsopt) * csopt * 100 + fsopt
1178 /// where 'fsopt' controls the actual selection
1179 /// -1 all files in the dataset
1180 /// 0 process only files marked as 'non-staged'
1181 /// >=1 as 0 but files that are marked 'staged' are open
1182 /// >=2 as 1 but files that are marked 'staged' are touched
1183 /// 10 process only files marked as 'staged'; files marked as 'non-staged'
1184 /// are ignored
1185 /// and 'csopt' controls if an actual check on the staged status (via TFileStager) is done
1186 /// 0 check that the file is staged using TFileStager
1187 /// 1 do not hard check the staged status
1188 /// (example: use fopt = -101 to check the staged status of all the files, or fopt = 110
1189 /// to re-check the stage status of all the files marked as staged)
1190 ///
1191 /// If 'dbg' is true, some information about the ongoing operations is reguraly
1192 /// printed; this can be useful when processing very large datasets, an operation
1193 /// which can take a very long time.
1194 ///
1195 /// The int 'sopt' controls what is done on the selected files (this is effective only
1196 /// if ropt is 0 or 2 - see below):
1197 /// -1 no action (fopt = 2 and sopt = -1 touches all staged files)
1198 /// 0 do the full process: open the files and fill the meta-information
1199 /// in the TFileInfo object, including the end-point URL
1200 /// 1 only locate the files, by updating the end-point URL (uses TFileStager::Locate
1201 /// which is must faster of an TFile::Open)
1202 /// 2 issue a stage request on the files
1203 ///
1204 /// The int 'ropt' controls which actions are performed:
1205 /// 0 do the full process: get list of files to process and process them
1206 /// 1 get the list of files to be scanned and return it in flist
1207 /// 2 process the files in flist (according to sopt)
1208 /// When defined flist is under the responsability the caller.
1209 ///
1210 /// If avgsz > 0 it is used for the final update of the dataset global counters.
1211 ///
1212 /// If 'mss' is defined use it to initialize the stager (instead of the Url in the
1213 /// TFileInfo objects)
1214 ///
1215 /// If maxfiles > 0, select for processing a maximum of 'filesmax' files (but if fopt is 1 or 2
1216 /// all files marked as 'staged' are still open or touched)
1217 ///
1218 /// Return code
1219 /// 1 dataset was not changed
1220 /// 2 dataset was changed
1221 ///
1222 /// The number of touched, opened and disappeared files are returned in the respective
1223 /// variables, if these are defined.
1224 
1226  Int_t fopt, Int_t sopt, Int_t ropt, Bool_t dbg,
1227  Int_t *touched, Int_t *opened, Int_t *disappeared,
1228  TList *flist, Long64_t avgsz, const char *mss,
1229  Int_t maxfiles, const char *stageopts)
1230 {
1231  // Max number of files
1232  if (maxfiles > -1 && dbg)
1233  ::Info("TDataSetManager::ScanDataSet", "processing a maximum of %d files", maxfiles);
1234 
1235  // File selection, Reopen and Touch options
1236  Bool_t checkstg = (fopt >= 100 || fopt < -1) ? kFALSE : kTRUE;
1237 
1238  // File processing options
1239  Bool_t noaction = (sopt == -1) ? kTRUE : kFALSE;
1240  //Bool_t fullproc = (sopt == 0) ? kTRUE : kFALSE;
1241  Bool_t locateonly = (sopt == 1) ? kTRUE : kFALSE;
1242  Bool_t stageonly = (sopt == 2) ? kTRUE : kFALSE;
1243 
1244  // Run options
1245  Bool_t doall = (ropt == 0) ? kTRUE : kFALSE;
1246  Bool_t getlistonly = (ropt == 1) ? kTRUE : kFALSE;
1247  Bool_t scanlist = (ropt == 2) ? kTRUE : kFALSE;
1248 
1249  if (scanlist && !flist) {
1250  ::Error("TDataSetManager::ScanDataSet", "input list is mandatory for option 'scan file list'");
1251  return -1;
1252  }
1253 
1254  Int_t ftouched = 0;
1255  Int_t fopened = 0;
1256  Int_t fdisappeared = 0;
1257 
1258  Bool_t bchanged_ds = kFALSE;
1259 
1260  TList *newStagedFiles = 0;
1261  TFileInfo *fileInfo = 0;
1262  TFileStager *stager = 0;
1263  Bool_t createStager = kFALSE;
1264 
1265  if (doall || getlistonly) {
1266 
1267  // Point to the list
1268  newStagedFiles = (!doall && getlistonly && flist) ? flist : new TList;
1269  if (newStagedFiles != flist) newStagedFiles->SetOwner(kFALSE);
1270 
1271  stager = (mss && strlen(mss) > 0) ? TFileStager::Open(mss) : 0;
1272  createStager = (stager) ? kFALSE : kTRUE;
1273 
1274  Bool_t bchanged_fi = kFALSE;
1275  Bool_t btouched = kFALSE;
1276  Bool_t bdisappeared = kFALSE;
1277 
1278  // Check which files have been staged, this can be replaced by a bulk command,
1279  // once it exists in the xrdclient
1280  TIter iter(dataset->GetList());
1281  while ((fileInfo = (TFileInfo *) iter())) {
1282 
1283  // For real time monitoring
1285 
1286  bchanged_fi = kFALSE;
1287  btouched = kFALSE;
1288  bdisappeared = kFALSE;
1289  Bool_t newlystaged = CheckStagedStatus(fileInfo, fopt, maxfiles, newStagedFiles->GetEntries(),
1290  stager, createStager, dbg, bchanged_fi, btouched,
1291  bdisappeared);
1292 
1293  if (bchanged_fi) bchanged_ds = kTRUE;
1294  if (btouched) ftouched++;
1295  if (bdisappeared) fdisappeared++;
1296 
1297  // Notify
1298  if (dbg && (ftouched+fdisappeared) % 100 == 0)
1299  ::Info("TDataSetManager::ScanDataSet", "opening %d: file: %s",
1300  ftouched + fdisappeared, fileInfo->GetCurrentUrl()->GetUrl());
1301 
1302  // Register the newly staged file
1303  if (!noaction && newlystaged) newStagedFiles->Add(fileInfo);
1304  }
1305  SafeDelete(stager);
1306 
1307  // If required to only get the list we are done
1308  if (getlistonly) {
1309  if (dbg && newStagedFiles->GetEntries() > 0)
1310  ::Info("TDataSetManager::ScanDataSet", " %d files appear to be newly staged",
1311  newStagedFiles->GetEntries());
1312  if (!flist) SafeDelete(newStagedFiles);
1313  return ((bchanged_ds) ? 2 : 1);
1314  }
1315  }
1316 
1317  if (!noaction && (doall || scanlist)) {
1318 
1319  // Point to the list
1320  newStagedFiles = (!doall && scanlist && flist) ? flist : newStagedFiles;
1321  if (newStagedFiles != flist) newStagedFiles->SetOwner(kFALSE);
1322 
1323  // loop over now staged files
1324  if (dbg && newStagedFiles->GetEntries() > 0)
1325  ::Info("TDataSetManager::ScanDataSet", "opening %d files that appear to be newly staged",
1326  newStagedFiles->GetEntries());
1327 
1328  // If staging files, prepare the stager
1329  if (locateonly || stageonly) {
1330  stager = (mss && strlen(mss) > 0) ? TFileStager::Open(mss) : 0;
1331  createStager = (stager) ? kFALSE : kTRUE;
1332  }
1333 
1334  // Notify each 'fqnot' files (min 1, max 100)
1335  Int_t fqnot = (newStagedFiles->GetSize() > 10) ? newStagedFiles->GetSize() / 10 : 1;
1336  if (fqnot > 100) fqnot = 100;
1337  Int_t count = 0;
1338  Bool_t bchanged_fi = kFALSE;
1339  Bool_t bopened = kFALSE;
1340  TIter iter(newStagedFiles);
1341  while ((fileInfo = (TFileInfo *) iter())) {
1342 
1343  if (dbg && (count%fqnot == 0))
1344  ::Info("TDataSetManager::ScanDataSet", "processing %d.'new' file: %s",
1345  count, fileInfo->GetCurrentUrl()->GetUrl());
1346  count++;
1347 
1348  // For real time monitoring
1350  bchanged_fi = kFALSE;
1351  bopened = kFALSE;
1352 
1353  ProcessFile(fileInfo, sopt, checkstg, doall, stager, createStager,
1354  stageopts, dbg, bchanged_fi, bopened);
1355 
1356  bchanged_ds |= bchanged_fi;
1357  if (bopened) fopened++;
1358  }
1359  if (newStagedFiles != flist) SafeDelete(newStagedFiles);
1360 
1361  dataset->RemoveDuplicates();
1362  dataset->Update(avgsz);
1363  }
1364 
1365  Int_t result = (bchanged_ds) ? 2 : 1;
1366  if (result > 0 && dbg)
1367  ::Info("TDataSetManager::ScanDataSet", "%d files 'new'; %d files touched;"
1368  " %d files disappeared", fopened, ftouched, fdisappeared);
1369 
1370  // Fill outputs, if required
1371  if (touched) *touched = ftouched;
1372  if (opened) *opened = fopened;
1373  if (disappeared) *disappeared = fdisappeared;
1374 
1375  // For real time monitoring
1377 
1378  return result;
1379 }
1380 
1381 ////////////////////////////////////////////////////////////////////////////////
1382 /// Check stage status of the file described by "fileInfo".
1383 /// fopt is same as "fopt" in TDataSetManager::ScanDataSet, which is repeated below:
1384 /// The int fopt controls which files have to be processed (or added to the list
1385 /// if ropt is 1 - see below); 'fopt' is defined in term of csopt and fsopt:
1386 /// fopt = sign(fsopt) * csopt * 100 + fsopt
1387 /// where 'fsopt' controls the actual selection
1388 /// -1 all files in the dataset
1389 /// 0 process only files marked as 'non-staged'
1390 /// >=1 as 0 but files that are marked 'staged' are open
1391 /// >=2 as 1 but files that are marked 'staged' are touched
1392 /// 10 process only files marked as 'staged'; files marked as 'non-staged'
1393 /// are ignored
1394 /// and 'csopt' controls if an actual check on the staged status (via TFileStager) is done
1395 /// 0 check that the file is staged using TFileStager
1396 /// 1 do not hard check the staged status
1397 /// (example: use fopt = -101 to check the staged status of all the files, or fopt = 110
1398 /// to re-check the stage status of all the files marked as staged)
1399 ///
1400 /// If 'dbg' is true, some information about the ongoing operations is reguraly
1401 /// printed; this can be useful when processing very large datasets, an operation
1402 /// which can take a very long time.
1403 ///
1404 /// If maxfiles > 0, select for processing a maximum of 'filesmax' files (but if fopt is 1 or 2
1405 /// all files marked as 'staged' are still open or touched)
1406 ///
1407 /// Return code
1408 /// kTRUE the file appears newly staged
1409 /// kFALSE otherwise
1410 ///
1411 /// changed is true if the fileinfo is modified
1412 /// touched is true if the file is open and read
1413 /// disappeared is true if the file is marked staged but actually not staged
1414 
1416  Int_t newstagedfiles, TFileStager* stager,
1417  Bool_t createStager, Bool_t dbg, Bool_t& changed,
1418  Bool_t& touched, Bool_t& disappeared)
1419 {
1420  // File selection, Reopen and Touch options
1421  Bool_t allf = (fopt == -1) ? kTRUE : kFALSE;
1422  Bool_t checkstg = (fopt >= 100 || fopt < -1) ? kFALSE : kTRUE;
1423  if (fopt >= 0) fopt %= 100;
1424  Bool_t nonstgf = (fopt >= 0 && fopt < 10) ? kTRUE : kFALSE;
1425  Bool_t reopen = (fopt >= 1 && fopt < 10) ? kTRUE : kFALSE;
1426  Bool_t touch = (fopt >= 2 && fopt < 10) ? kTRUE : kFALSE;
1427  Bool_t stgf = (fopt == 10) ? kTRUE : kFALSE;
1428 
1429  changed = kFALSE;
1430  touched = kFALSE;
1431  disappeared = kFALSE;
1432 
1433  // Check which files have been staged, this can be replaced by a bulk command,
1434  // once it exists in the xrdclient
1435 
1436  if (!allf) {
1437 
1438  fileInfo->ResetUrl();
1439  if (!fileInfo->GetCurrentUrl()) {
1440  ::Error("TDataSetManager::CheckStagedStatus", "GetCurrentUrl() returned 0 for %s",
1441  fileInfo->GetFirstUrl()->GetUrl());
1442  return kFALSE;
1443  }
1444 
1445  if (nonstgf && fileInfo->TestBit(TFileInfo::kStaged)) {
1446 
1447  // Skip files flagged as corrupted
1448  if (fileInfo->TestBit(TFileInfo::kCorrupted)) return kFALSE;
1449 
1450  // Skip if we are not asked to re-open the staged files
1451  if (!reopen) return kFALSE;
1452 
1453  // Set the URL removing the anchor (e.g. #AliESDs.root) because IsStaged()
1454  // and TFile::Open() with filetype=raw do not accept anchors
1455  TUrl *curl = fileInfo->GetCurrentUrl();
1456  const char *furl = curl->GetUrl();
1457  TString urlmod;
1458  if (TDataSetManager::CheckDataSetSrvMaps(curl, urlmod) && !(urlmod.IsNull()))
1459  furl = urlmod.Data();
1460  TUrl url(furl);
1461  url.SetAnchor("");
1462 
1463  // Check if file is still available, if touch is set actually read from the file
1464  TString uopt(url.GetOptions());
1465  uopt += "filetype=raw&mxredir=2";
1466  url.SetOptions(uopt.Data());
1467  TFile *file = TFile::Open(url.GetUrl());
1468  if (file) {
1469  if (touch) {
1470  // Actually access the file
1471  char tmpChar = 0;
1472  if (file->ReadBuffer(&tmpChar, 1))
1473  ::Warning("TDataSetManager::CheckStagedStatus", "problems reading 1 byte from open file");
1474  // Count
1475  touched = kTRUE;
1476  }
1477  file->Close();
1478  delete file;
1479  } else {
1480  // File could not be opened, reset staged bit
1481  if (dbg) ::Info("TDataSetManager::CheckStagedStatus", "file %s disappeared", url.GetUrl());
1482  fileInfo->ResetBit(TFileInfo::kStaged);
1483  disappeared = kTRUE;
1484  changed = kTRUE;
1485 
1486  // Remove invalid URL, if other one left...
1487  if (fileInfo->GetNUrls() > 1)
1488  fileInfo->RemoveUrl(curl->GetUrl());
1489  }
1490  // Go to next
1491  return kFALSE;
1492  } else if (stgf && !(fileInfo->TestBit(TFileInfo::kStaged))) {
1493  // All staged files are processed: skip non staged
1494  return kFALSE;
1495  }
1496  }
1497 
1498  // Only open maximum number of 'new' files
1499  if (maxfiles > 0 && newstagedfiles >= maxfiles)
1500  return kFALSE;
1501 
1502  // Hard check of the staged status, if required
1503  if (checkstg) {
1504  // Set the URL removing the anchor (e.g. #AliESDs.root) because IsStaged()
1505  // and TFile::Open() with filetype=raw do not accept anchors
1506  TUrl *curl = fileInfo->GetCurrentUrl();
1507  const char *furl = curl->GetUrl();
1508  TString urlmod;
1509  Bool_t mapped = kFALSE;
1510  if (TDataSetManager::CheckDataSetSrvMaps(curl, urlmod) && !(urlmod.IsNull())) {
1511  furl = urlmod.Data();
1512  mapped = kTRUE;
1513  }
1514  TUrl url(furl);
1515  url.SetAnchor("");
1516 
1517  // Get the stager (either the global one or from the URL)
1518  stager = createStager ? TFileStager::Open(url.GetUrl()) : stager;
1519 
1520  Bool_t result = kFALSE;
1521  if (stager) {
1522  result = stager->IsStaged(url.GetUrl());
1523  if (gDebug > 0)
1524  ::Info("TDataSetManager::CheckStagedStatus", "IsStaged: %s: %d", url.GetUrl(), result);
1525  if (createStager)
1526  SafeDelete(stager);
1527  } else {
1528  ::Warning("TDataSetManager::CheckStagedStatus",
1529  "could not get stager instance for '%s'", url.GetUrl());
1530  }
1531 
1532  // Go to next in case of failure
1533  if (!result) {
1534  if (fileInfo->TestBit(TFileInfo::kStaged)) {
1535  // Reset the bit
1536  fileInfo->ResetBit(TFileInfo::kStaged);
1537  changed = kTRUE;
1538  }
1539  return kFALSE;
1540  } else {
1541  if (!(fileInfo->TestBit(TFileInfo::kStaged))) {
1542  // Set the bit
1543  fileInfo->SetBit(TFileInfo::kStaged);
1544  changed = kTRUE;
1545  }
1546  }
1547 
1548  // If the url was re-mapped add the new url in front of the list
1549  if (mapped) {
1550  url.SetOptions(curl->GetOptions());
1551  url.SetAnchor(curl->GetAnchor());
1552  fileInfo->AddUrl(url.GetUrl(), kTRUE);
1553  }
1554  }
1555  return kTRUE;
1556 }
1557 
1558 ////////////////////////////////////////////////////////////////////////////////
1559 /// Locate, stage, or fully validate file "fileInfo".
1560 
1561 void TDataSetManager::ProcessFile(TFileInfo *fileInfo, Int_t sopt, Bool_t checkstg, Bool_t doall,
1562  TFileStager* stager, Bool_t createStager, const char *stageopts,
1563  Bool_t dbg, Bool_t& changed, Bool_t& opened)
1564 {
1565  // File processing options
1566  //Bool_t noaction = (sopt == -1) ? kTRUE : kFALSE;
1567  Bool_t fullproc = (sopt == 0) ? kTRUE : kFALSE;
1568  Bool_t locateonly = (sopt == 1) ? kTRUE : kFALSE;
1569  Bool_t stageonly = (sopt == 2) ? kTRUE : kFALSE;
1570 
1571  changed = kFALSE;
1572  opened = kFALSE;
1573  Int_t rc = -1;
1574 
1575  // Set the URL removing the anchor (e.g. #AliESDs.root) because IsStaged()
1576  // and TFile::Open() with filetype=raw do not accept anchors
1577  TUrl *curl = fileInfo->GetCurrentUrl();
1578  const char *furl = curl->GetUrl();
1579  TString urlmod;
1580  //Bool_t mapped = kFALSE;
1581  if (TDataSetManager::CheckDataSetSrvMaps(curl, urlmod) && !(urlmod.IsNull())) {
1582  furl = urlmod.Data();
1583  //mapped = kTRUE;
1584  }
1585  TUrl url(furl);
1586  url.SetOptions("");
1587  url.SetAnchor("");
1588 
1589  if (createStager){
1590  if (!stager || (stager && !stager->Matches(url.GetUrl()))) {
1591  SafeDelete(stager);
1592  if (!(stager = TFileStager::Open(url.GetUrl())) || !(stager->IsValid())) {
1593  ::Error("TDataSetManager::ProcessFile",
1594  "could not get valid stager instance for '%s'", url.GetUrl());
1595  return;
1596  }
1597  }
1598  }
1599  // Locate the file, if just requested so
1600  if (locateonly) {
1601  TString eurl;
1602  if (stager && stager->Locate(url.GetUrl(), eurl) == 0) {
1603  TString opts(curl->GetOptions());
1604  TString anch(curl->GetAnchor());
1605  // Get the effective end-point Url
1606  curl->SetUrl(eurl);
1607  // Restore original options and anchor, if any
1608  curl->SetOptions(opts);
1609  curl->SetAnchor(anch);
1610  // Flag and count
1611  changed = kTRUE;
1612  opened = kTRUE;
1613  } else {
1614  // Failure
1615  ::Error("TDataSetManager::ProcessFile", "could not locate %s", url.GetUrl());
1616  }
1617 
1618  } else if (stageonly) {
1619  TString eurl;
1620  if (stager && !(stager->IsStaged(url.GetUrl()))) {
1621  if (!(stager->Stage(url.GetUrl(), stageopts))) {
1622  // Failure
1623  ::Error("TDataSetManager::ProcessFile",
1624  "problems issuing stage request for %s", url.GetUrl());
1625  }
1626  }
1627  } else if (fullproc) {
1628  TString eurl;
1629  // Full file validation
1630  rc = -2;
1631  Bool_t doscan = kTRUE;
1632  if (checkstg) {
1633  doscan = kFALSE;
1634  if ((doall && fileInfo->TestBit(TFileInfo::kStaged)) ||
1635  (stager && stager->IsStaged(url.GetUrl()))) doscan = kTRUE;
1636  }
1637  if (doscan) {
1638  if ((rc = TDataSetManager::ScanFile(fileInfo, dbg)) < -1) return;
1639  changed = kTRUE;
1640  } else if (stager) {
1641  ::Warning("TDataSetManager::ProcessFile",
1642  "required file '%s' does not look as being online (staged)", url.GetUrl());
1643  }
1644  if (rc < 0) return;
1645  // Count
1646  opened = kTRUE;
1647  }
1648  return;
1649 }
1650 
1651 ////////////////////////////////////////////////////////////////////////////////
1652 /// Open the file described by 'fileinfo' to extract the relevant meta-information.
1653 /// Return 0 if OK, -2 if the file cannot be open, -1 if it is corrupted
1654 
1656 {
1657  Int_t rc = -2;
1658  // We need an input
1659  if (!fileinfo) {
1660  ::Error("TDataSetManager::ScanFile", "undefined input (!)");
1661  return rc;
1662  }
1663 
1664  TUrl *url = fileinfo->GetCurrentUrl();
1665 
1666  TFile *file = 0;
1667  Bool_t anchor = kFALSE;
1668 
1669  // Get timeout settings (default none)
1670  Int_t timeout = gEnv->GetValue("DataSet.ScanFile.OpenTimeout", -1);
1671  TString fileopt;
1672  if (timeout > 0) fileopt.Form("TIMEOUT=%d", timeout);
1673 
1674  // To determine the size we have to open the file without the anchor
1675  // (otherwise we get the size of the contained file - in case of a zip archive)
1676  // We open in raw mode which makes sure that the opening succeeds, even if
1677  // the file is corrupted
1678  const char *furl = url->GetUrl();
1679  TString urlmod;
1680  if (TDataSetManager::CheckDataSetSrvMaps(url, urlmod) && !(urlmod.IsNull()))
1681  furl = urlmod.Data();
1682  if (strlen(url->GetAnchor()) > 0) {
1683  anchor = kTRUE;
1684  // We need a raw open firts to get the real size of the file
1685  TUrl urlNoAnchor(furl);
1686  urlNoAnchor.SetAnchor("");
1687  TString unaopts = urlNoAnchor.GetOptions();
1688  if (!unaopts.IsNull()) {
1689  unaopts += "&filetype=raw";
1690  } else {
1691  unaopts = "filetype=raw";
1692  }
1693  urlNoAnchor.SetOptions(unaopts);
1694  // Wait max 5 secs per file
1695  if (!(file = TFile::Open(urlNoAnchor.GetUrl(), fileopt))) return rc;
1696 
1697  // Save some relevant info
1698  if (file->GetSize() > 0) fileinfo->SetSize(file->GetSize());
1699  fileinfo->SetBit(TFileInfo::kStaged);
1700 
1701  fileinfo->SetUUID(file->GetUUID().AsString());
1702 
1703  // Add url of the disk server in front of the list
1704  if (file->GetEndpointUrl()) {
1705  // add endpoint url if it is not a local file
1706  TUrl eurl(*(file->GetEndpointUrl()));
1707 
1708  if (strcmp(eurl.GetProtocol(), "file") ||
1709  !strcmp(eurl.GetProtocol(), url->GetProtocol())) {
1710 
1711  eurl.SetOptions(url->GetOptions());
1712  eurl.SetAnchor(url->GetAnchor());
1713 
1714  // Fix the hostname
1715  if (!strcmp(eurl.GetHost(), "localhost") || !strcmp(eurl.GetHost(), "127.0.0.1") ||
1716  !strcmp(eurl.GetHost(), "localhost.localdomain")) {
1717  eurl.SetHost(TUrl(gSystem->HostName()).GetHostFQDN());
1718  }
1719  // Add only if different
1720  if (strcmp(eurl.GetUrl(), url->GetUrl()))
1721  fileinfo->AddUrl(eurl.GetUrl(), kTRUE);
1722 
1723  if (gDebug > 0) ::Info("TDataSetManager::ScanFile", "added URL %s", eurl.GetUrl());
1724  }
1725  } else {
1726  ::Warning("TDataSetManager::ScanFile", "end-point URL undefined for file %s", file->GetName());
1727  }
1728 
1729  file->Close();
1730  delete file;
1731  }
1732 
1733  // OK, set the relevant flags
1734  rc = -1;
1735 
1736  // Disable warnings when reading a tree without loading the corresponding library
1737  Int_t oldLevel = gErrorIgnoreLevel;
1739 
1740  // Wait max 5 secs per file
1741  if (!(file = TFile::Open(url->GetUrl(), fileopt))) {
1742  // If the file could be opened before, but fails now it is corrupt...
1743  if (dbg) ::Info("TDataSetManager::ScanFile", "marking %s as corrupt", url->GetUrl());
1744  fileinfo->SetBit(TFileInfo::kCorrupted);
1745  // Set back old warning level
1746  gErrorIgnoreLevel = oldLevel;
1747  return rc;
1748  } else if (!anchor) {
1749  // Do the relevant settings
1750  if (file->GetSize() > 0) fileinfo->SetSize(file->GetSize());
1751  fileinfo->SetBit(TFileInfo::kStaged);
1752 
1753  // Add url of the disk server in front of the list if it is not a local file
1754  TUrl eurl(*(file->GetEndpointUrl()));
1755 
1756  if (strcmp(eurl.GetProtocol(), "file") ||
1757  !strcmp(eurl.GetProtocol(), url->GetProtocol())) {
1758 
1759  eurl.SetOptions(url->GetOptions());
1760  eurl.SetAnchor(url->GetAnchor());
1761 
1762  // Fix the hostname
1763  if (!strcmp(eurl.GetHost(), "localhost") || !strcmp(eurl.GetHost(), "127.0.0.1") ||
1764  !strcmp(eurl.GetHost(), "localhost.localdomain")) {
1765  eurl.SetHost(TUrl(gSystem->HostName()).GetHostFQDN());
1766  }
1767  // Add only if different
1768  if (strcmp(eurl.GetUrl(), url->GetUrl()))
1769  fileinfo->AddUrl(eurl.GetUrl(), kTRUE);
1770 
1771  if (gDebug > 0) ::Info("TDataSetManager::ScanFile", "added URL %s", eurl.GetUrl());
1772  }
1773  fileinfo->SetUUID(file->GetUUID().AsString());
1774  }
1775  rc = 0;
1776 
1777  // Loop over all entries and create/update corresponding metadata.
1778  // TODO If we cannot read some of the trees, is the file corrupted as well?
1779  if ((rc = TDataSetManager::FillMetaData(fileinfo, file, "/")) != 0) {
1780  ::Error("TDataSetManager::ScanFile",
1781  "problems processing the directory tree in looking for metainfo");
1782  fileinfo->SetBit(TFileInfo::kCorrupted);
1783  rc = -1;
1784  }
1785  // Set back old warning level
1786  gErrorIgnoreLevel = oldLevel;
1787 
1788  file->Close();
1789  delete file;
1790 
1791  // Done
1792  return rc;
1793 }
1794 
1795 ////////////////////////////////////////////////////////////////////////////////
1796 /// Navigate the directory 'd' (and its subdirectories) looking for TTree objects.
1797 /// Fill in the relevant metadata information in 'fi'. The name of the TFileInfoMeta
1798 /// metadata entry will be "/dir1/dir2/.../tree_name".
1799 /// Return 0 on success, -1 if any problem happens (object found in keys cannot be read,
1800 /// for example)
1801 
1803 {
1804  // Check inputs
1805  if (!fi || !d || !rdir) {
1806  ::Error("TDataSetManager::FillMetaData",
1807  "some inputs are invalid (fi:%p,d:%p,r:%s)", fi, d, rdir);
1808  return -1;
1809  }
1810 
1811  if (d->GetListOfKeys()) {
1812  TIter nxk(d->GetListOfKeys());
1813  TKey *k = 0;
1814  while ((k = dynamic_cast<TKey *> (nxk()))) {
1815 
1816  if (TClass::GetClass(k->GetClassName())->InheritsFrom(TDirectory::Class())) {
1817  // Get the directory
1818  TDirectory *sd = (TDirectory *) d->Get(k->GetName());
1819  if (!sd) {
1820  ::Error("TDataSetManager::FillMetaData", "cannot get sub-directory '%s'", k->GetName());
1821  return -1;
1822  }
1823  if (TDataSetManager::FillMetaData(fi, sd, TString::Format("%s%s/", rdir, k->GetName())) != 0) {
1824  ::Error("TDataSetManager::FillMetaData", "problems processing sub-directory '%s'", k->GetName());
1825  return -1;
1826  }
1827 
1828  } else {
1829  // We process only trees
1830  if (!TClass::GetClass(k->GetClassName())->InheritsFrom(TTree::Class())) continue;
1831 
1832  TString ks;
1833  ks.Form("%s%s", rdir, k->GetName());
1834 
1835  TFileInfoMeta *md = fi->GetMetaData(ks);
1836  if (!md) {
1837  // Create it
1838  md = new TFileInfoMeta(ks, k->GetClassName());
1839  fi->AddMetaData(md);
1840  if (gDebug > 0)
1841  ::Info("TDataSetManager::FillMetaData", "created meta data for tree %s", ks.Data());
1842  }
1843  // Fill values
1844  TTree *t = dynamic_cast<TTree *> (d->Get(k->GetName()));
1845  if (t) {
1846  if (t->GetEntries() >= 0) {
1847  md->SetEntries(t->GetEntries());
1848  if (t->GetTotBytes() >= 0)
1849  md->SetTotBytes(t->GetTotBytes());
1850  if (t->GetZipBytes() >= 0)
1851  md->SetZipBytes(t->GetZipBytes());
1852  }
1853  } else {
1854  ::Error("TDataSetManager::FillMetaData", "could not get tree '%s'", k->GetName());
1855  return -1;
1856  }
1857  }
1858  }
1859  }
1860  // Done
1861  return 0;
1862 }
1863 
1864 ////////////////////////////////////////////////////////////////////////////////
1865 /// Create a server mapping list from the content of 'srvmaps'
1866 /// Return the list (owned by the caller) or 0 if no valid info could be found)
1867 
1869 {
1870  TList *srvmapslist = 0;
1871  if (srvmaps.IsNull()) {
1872  ::Warning("TDataSetManager::ParseDataSetSrvMaps",
1873  "called with an empty string! - nothing to do");
1874  return srvmapslist;
1875  }
1876  TString srvmap, sf, st;
1877  Int_t from = 0, from1 = 0;
1878  while (srvmaps.Tokenize(srvmap, from, " ")) {
1879  sf = ""; st = "";
1880  if (srvmap.Contains("|")) {
1881  from1 = 0;
1882  if (srvmap.Tokenize(sf, from1, "|"))
1883  if (srvmap.Tokenize(st, from1, "|")) { }
1884  } else {
1885  st = srvmap;
1886  }
1887  if (st.IsNull()) {
1888  ::Warning("TDataSetManager::ParseDataSetSrvMaps",
1889  "parsing DataSet.SrvMaps: target must be defined"
1890  " (token: %s) - ignoring", srvmap.Data());
1891  continue;
1892  } else if (!(st.EndsWith("/"))) {
1893  st += "/";
1894  }
1895  // TUrl if wildcards or TObjString
1896  TString sp;
1897  TUrl *u = 0;
1898  if (!(sf.IsNull()) && sf.Contains("*")) {
1899  u = new TUrl(sf);
1900  if (!(sf.BeginsWith(u->GetProtocol()))) u->SetProtocol("root");
1901  sp.Form(":%d", u->GetPort());
1902  if (!(sf.Contains(sp))) u->SetPort(1094);
1903  if (!TString(u->GetHost()).Contains("*")) SafeDelete(u);
1904  }
1905  if (!srvmapslist) srvmapslist = new TList;
1906  if (u) {
1907  srvmapslist->Add(new TPair(u, new TObjString(st)));
1908  } else {
1909  srvmapslist->Add(new TPair(new TObjString(sf), new TObjString(st)));
1910  }
1911  }
1912  // Done
1913  if (srvmapslist) srvmapslist->SetOwner(kTRUE);
1914  return srvmapslist;
1915 }
1916 
1917 ////////////////////////////////////////////////////////////////////////////////
1918 /// Static getter for server mapping list
1919 
1921 {
1922  return fgDataSetSrvMaps;
1923 }
1924 
1925 ////////////////////////////////////////////////////////////////////////////////
1926 /// Check if the dataset server mappings apply to the url defined by 'furl'.
1927 /// Use srvmaplist if defined, else use the default list.
1928 /// If yes, resolve the mapping into file1 and return kTRUE.
1929 /// Otherwise return kFALSE.
1930 
1932 {
1933  Bool_t replaced = kFALSE;
1934  if (!furl) return replaced;
1935 
1936  const char *file = furl->GetUrl();
1937  TList *mlist = (srvmaplist) ? srvmaplist : fgDataSetSrvMaps;
1938  if (mlist && mlist->GetSize() > 0) {
1939  TIter nxm(mlist);
1940  TPair *pr = 0;
1941  while ((pr = (TPair *) nxm())) {
1942  Bool_t replace = kFALSE;
1943  // If TUrl apply reg exp on host
1944  TUrl *u = dynamic_cast<TUrl *>(pr->Key());
1945  if (u) {
1946  if (!strcmp(u->GetProtocol(), furl->GetProtocol())) {
1947  Ssiz_t len;
1948  if (!strcmp(u->GetProtocol(), "file")) {
1949  TRegexp re(u->GetFileAndOptions(), kTRUE);
1950  if (re.Index(furl->GetFileAndOptions(), &len) == 0) replace = kTRUE;
1951  } else {
1952  if (u->GetPort() == furl->GetPort()) {
1953  TRegexp re(u->GetHost(), kTRUE);
1954  if (re.Index(furl->GetHost(), &len) == 0) replace = kTRUE;
1955  }
1956  }
1957  }
1958  } else {
1959  TObjString *os = dynamic_cast<TObjString *>(pr->Key());
1960  if (os) {
1961  if (os->GetString().IsNull() ||
1962  !strncmp(file, os->GetName(), os->GetString().Length())) replace = kTRUE;
1963  }
1964  }
1965  if (replace) {
1966  TObjString *ost = dynamic_cast<TObjString *>(pr->Value());
1967  if (ost) {
1968  file1.Form("%s%s", ost->GetName(), furl->GetFileAndOptions());
1969  replaced = kTRUE;
1970  break;
1971  }
1972  }
1973  }
1974  }
1975  // Done
1976  return replaced;
1977 }
1978 
1979 ////////////////////////////////////////////////////////////////////////////////
1980 /// Update scan counters
1981 
1983 {
1984  fNTouchedFiles = (t > -1) ? t : fNTouchedFiles;
1985  fNOpenedFiles = (o > -1) ? o : fNOpenedFiles;
1986  fNDisappearedFiles = (d > -1) ? d : fNDisappearedFiles;
1987 }
const char * GetHost() const
Definition: TUrl.h:76
const char * GetName() const
Returns name of object.
Definition: TObjString.h:42
static Bool_t CheckDataSetSrvMaps(TUrl *furl, TString &fn, TList *srvmaplist=0)
Check if the dataset server mappings apply to the url defined by 'furl'.
virtual Bool_t IsStaged(const char *)
Just check if the file exists locally.
virtual Int_t GetEntries() const
Definition: TCollection.h:92
virtual const char * GetTitle() const
Returns title of object.
Definition: TNamed.h:52
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition: TSystem.cxx:1213
virtual Long64_t GetSize() const
Returns the current file size.
Definition: TFile.cxx:1279
void SetPort(Int_t port)
Definition: TUrl.h:97
virtual Int_t ClearCache(const char *uri)
Clear cached information matching uri.
long long Long64_t
Definition: RtypesCore.h:69
void SetUUID(const char *uuid)
Set the UUID to the value associated to the string 'uuid'.
Definition: TFileInfo.cxx:235
R__EXTERN Int_t gErrorIgnoreLevel
Definition: TError.h:107
static TList * fgDataSetSrvMaps
virtual Bool_t InheritsFrom(const char *classname) const
Returns kTRUE if object inherits from class "classname".
Definition: TObject.cxx:487
virtual TList * GetListOfKeys() const
Definition: TDirectory.h:158
void SetProtocol(const char *proto, Bool_t setDefaultPort=kFALSE)
Set protocol and, optionally, change the port accordingly.
Definition: TUrl.cxx:518
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
Definition: TDirectory.cxx:727
virtual void ParseInitOpts(const char *opts)
Parse the opts string and set the init bits accordingly Available options: Cq: set kCheckQuota Ar: se...
const TString GetFragment() const
Definition: TUri.h:93
TString fGroupConfigFile
Ssiz_t Length() const
Definition: TString.h:390
TLine * line
Collectable string class.
Definition: TObjString.h:32
float Float_t
Definition: RtypesCore.h:53
This class represents a WWW compatible URL.
Definition: TUrl.h:41
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
Bool_t RemoveUrl(const char *url)
Remove an URL. Returns kTRUE if successful, kFALSE otherwise.
Definition: TFileInfo.cxx:318
int GetPathInfo(const char *path, Long_t *id, Long_t *size, Long_t *flags, Long_t *modtime)
Get info about a file: id, size, flags, modification time.
Definition: TSystem.cxx:1311
std::istream & ReadLine(std::istream &str, Bool_t skipWhite=kTRUE)
Read a line from stream upto newline skipping any whitespace.
Definition: Stringio.cxx:65
virtual void SetOwner(Bool_t enable=kTRUE)
Set whether this collection is the owner (enable==true) of its content.
const char * GetProtocol() const
Definition: TUrl.h:73
virtual Bool_t RemoveDataSet(const char *uri)
Removes the indicated dataset.
void SetUrl(const char *url, Bool_t defaultIsFile=kFALSE)
Parse url character string and split in its different subcomponents.
Definition: TUrl.cxx:108
void SetEntries(Long64_t entries)
Definition: TFileInfo.h:158
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
Definition: TObject.cxx:892
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:45
virtual Bool_t ReadBuffer(char *buf, Int_t len)
Read a buffer from the file.
Definition: TFile.cxx:1596
ClassImp(TDataSetManager) TDataSetManager
Main constructor.
virtual TFileCollection * GetDataSet(const char *uri, const char *server=0)
Utility function used in various methods for user dataset upload.
Regular expression class.
Definition: TRegexp.h:35
void Add(TObject *obj)
This function may not be used (but we need to provide it since it is a pure virtual in TCollection)...
Definition: TMap.cxx:53
void SetScanCounters(Int_t t=-1, Int_t o=-1, Int_t d=-1)
Update scan counters.
virtual ~TDataSetManager()
Destructor.
void SetVal(const AParamType &val)
Definition: TParameter.h:79
virtual const TUrl * GetEndpointUrl() const
Definition: TFile.h:195
Basic string class.
Definition: TString.h:137
static Long64_t ToBytes(const char *size=0)
Static utility function to gt the number of bytes from a string representation in the form "<digit><s...
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
virtual Long64_t GetZipBytes() const
Definition: TTree.h:463
virtual void ShowDataSets(const char *uri="*", const char *opt="")
Prints formatted information about the dataset 'uri'.
This class represents a RFC 3986 compatible URI.
Definition: TUri.h:39
Long_t fMtime
Definition: TSystem.h:142
static TString CreateUri(const char *dsGroup=0, const char *dsUser=0, const char *dsName=0, const char *dsTree=0)
Creates URI for the dataset manger in the form '[[/dsGroup/]dsUser/]dsName[#dsObjPath]', The optional dsObjPath can be in the form [subdir/]objname]'.
virtual TMap * GetGroupQuotaMap()
Bool_t AddUrl(const char *url, Bool_t infront=kFALSE)
Add a new URL.
Definition: TFileInfo.cxx:293
static void ProcessFile(TFileInfo *fileInfo, Int_t sopt, Bool_t checkstg, Bool_t doall, TFileStager *stager, Bool_t createStager, const char *stageopts, Bool_t dbg, Bool_t &changed, Bool_t &opened)
Locate, stage, or fully validate file "fileInfo".
const char * GetOptions() const
Definition: TUrl.h:80
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:558
TUrl * GetFirstUrl() const
Definition: TFileInfo.h:83
TFile * f
void SetBit(UInt_t f, Bool_t set)
Set or unset the user status bits as specified in f.
Definition: TObject.cxx:732
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3851
const char * Data() const
Definition: TString.h:349
virtual void MonitorUsedSpace(TVirtualMonitoringWriter *monitoring)
Log info to the monitoring server.
static struct mg_connection * fc(struct mg_context *ctx)
Definition: civetweb.c:839
Int_t Update(Long64_t avgsize=-1)
Update accumulated information about the elements of the collection (e.g.
#define SafeDelete(p)
Definition: RConfig.h:436
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString...
Definition: TString.cxx:2321
TBonjourRegistrar * reg
Definition: hserv2bonj.C:28
Bool_t AddMetaData(TObject *meta)
Add's a meta data object to the file info object.
Definition: TFileInfo.cxx:382
Int_t GetNUrls() const
Definition: TFileInfo.h:86
virtual void ShowQuota(const char *opt)
Display quota information.
void Class()
Definition: Class.C:29
void SetSize(Long64_t size)
Definition: TFileInfo.h:97
virtual Bool_t IsValid() const
Definition: TFileStager.h:54
virtual Int_t RegisterDataSet(const char *uri, TFileCollection *dataSet, const char *opt)
Register a dataset, perfoming quota checkings, if needed.
int d
Definition: tornado.py:11
Long_t fMTimeGroupConfig
void DeleteAll()
Remove all (key,value) pairs from the map AND delete the keys AND values when they are allocated on t...
Definition: TMap.cxx:167
std::map< std::string, std::string >::const_iterator iter
Definition: TAlienJob.cxx:54
virtual Long64_t GetGroupQuota(const char *group)
returns the quota a group is allowed to have
static TList * GetDataSetSrvMaps()
Static getter for server mapping list.
virtual const char * Getenv(const char *env)
Get environment variable.
Definition: TSystem.cxx:1575
TObject * GetValue(const char *keyname) const
Returns a pointer to the value associated with keyname as name of the key.
Definition: TMap.cxx:235
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
Definition: TSystem.cxx:1511
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1951
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:30
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:918
static Bool_t CheckStagedStatus(TFileInfo *fileInfo, Int_t fopt, Int_t maxfiles, Int_t newstagedfiles, TFileStager *stager, Bool_t createStager, Bool_t dbg, Bool_t &changed, Bool_t &touched, Bool_t &disappeared)
Check stage status of the file described by "fileInfo".
A doubly linked list.
Definition: TList.h:47
void PrintUsedSpace()
Prints the quota.
Int_t GetPort() const
Definition: TUrl.h:87
virtual Long64_t GetTotBytes() const
Definition: TTree.h:435
const TString GetPath() const
Definition: TUri.h:91
virtual void UpdateUsedSpace()
updates the used space maps
virtual TMap * GetSubDataSets(const char *uri, const char *excludeservers)
Partition dataset 'ds' accordingly to the servers.
TThread * t[5]
Definition: threadsh1.C:13
TString fUser
Definition: TSystem.h:152
TObjString * ExportInfo(const char *name=0, Int_t popt=0)
Export the relevant info as a string; use 'name' as collection name, if defined, else use GetName()...
static Int_t FillMetaData(TFileInfo *fi, TDirectory *d, const char *rdir="/")
Navigate the directory 'd' (and its subdirectories) looking for TTree objects.
TString GetString() const
Definition: TObjString.h:50
TFileInfoMeta * GetMetaData(const char *meta=0) const
Get meta data object with specified name.
Definition: TFileInfo.cxx:422
Bool_t EndsWith(const char *pat, ECaseCompare cmp=kExact) const
Return true if string ends with the specified string.
Definition: TString.cxx:2207
R__EXTERN TSystem * gSystem
Definition: TSystem.h:545
Int_t RemoveDuplicates()
Remove duplicates based on the UUID, typically after a verification.
THashList * GetList()
Bool_t ReadGroupConfig(const char *cf=0)
Read group config file 'cf'.
virtual TMap * GetUserUsedMap()
virtual Int_t GetValue(const char *name, Int_t dflt)
Returns the integer value for a resource.
Definition: TEnv.cxx:494
virtual Bool_t ExistsDataSet(const char *uri)
Checks if the indicated dataset exits.
Bool_t ParseUri(const char *uri, TString *dsGroup=0, TString *dsUser=0, TString *dsName=0, TString *dsTree=0, Bool_t onlyCurrent=kFALSE, Bool_t wildcards=kFALSE)
Parses a (relative) URI that describes a DataSet on the cluster.
TObject * Next()
Definition: TCollection.h:158
virtual Int_t ShowCache(const char *uri)
Show cached information matching uri.
TObject * Value() const
Definition: TMap.h:125
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition: TString.cxx:2308
unsigned int UInt_t
Definition: RtypesCore.h:42
Bool_t TestBit(UInt_t f) const
Definition: TObject.h:173
Int_t ScanDataSet(const char *uri, const char *opt)
Scans the dataset indicated by 'uri' following the 'opts' directives.
char * Form(const char *fmt,...)
const char * GetFileAndOptions() const
Return the file and its options (the string specified behind the ?).
Definition: TUrl.cxx:499
virtual Bool_t SendParameters(TList *, const char *=0)
const char * AsString() const
Return UUID as string. Copy string immediately since it will be reused.
Definition: TUUID.cxx:536
void AbstractMethod(const char *method) const
Use this method to implement an "abstract" method that you don't want to leave purely abstract...
Definition: TObject.cxx:960
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
const TString GetQuery() const
Definition: TUri.h:92
static TUri Transform(const TUri &reference, const TUri &base)
Transform a URI reference into its target URI using given a base URI.
Definition: TUri.cxx:1121
const char * GetAnchor() const
Definition: TUrl.h:79
Int_t CountChar(Int_t c) const
Return number of times character c occurs in the string.
Definition: TString.cxx:430
Long64_t fAvgFileSize
Bool_t IsNull() const
Definition: TString.h:387
void Warning(const char *location, const char *msgfmt,...)
TString & String()
Definition: TObjString.h:52
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2227
#define Printf
Definition: TGeoToOCC.h:18
static Int_t ScanFile(TFileInfo *fileinfo, Bool_t notify)
Open the file described by 'fileinfo' to extract the relevant meta-information.
Long64_t GetTotalSize() const
const char * GetUrl(Bool_t withDeflt=kFALSE) const
Return full URL.
Definition: TUrl.cxx:385
void GetQuota(const char *group, const char *user, const char *dsName, TFileCollection *dataset)
Gets quota information from this dataset.
virtual Bool_t Stage(const char *, Option_t *=0)
Definition: TFileStager.h:51
void SetHost(const char *host)
Definition: TUrl.h:93
TString & Remove(Ssiz_t pos)
Definition: TString.h:616
long Long_t
Definition: RtypesCore.h:50
int Ssiz_t
Definition: RtypesCore.h:63
Class used by TMap to store (key,value) pairs.
Definition: TMap.h:106
#define DSM_ONE_GB
void SetAnchor(const char *anchor)
Definition: TUrl.h:95
tuple tree
Definition: tree.py:24
virtual Int_t GetSize() const
Definition: TCollection.h:95
tuple file
Definition: fildir.py:20
static TString fgCommonDataSetTag
virtual const char * HostName()
Return the system's host name.
Definition: TSystem.cxx:307
TObject * Key() const
Definition: TMap.h:124
Describe directory structure in memory.
Definition: TDirectory.h:44
TMap implements an associative array of (key,value) pairs using a THashTable for efficient retrieval ...
Definition: TMap.h:44
int type
Definition: TGX11.cxx:120
R__EXTERN TEnv * gEnv
Definition: TEnv.h:174
static TFileStager * Open(const char *stager)
Open a stager, after having loaded the relevant plug-in.
virtual void DispatchOneEvent(Bool_t pendingOnly=kFALSE)
Dispatch a single event.
Definition: TSystem.cxx:433
virtual Int_t NotifyUpdate(const char *group=0, const char *user=0, const char *dspath=0, Long_t mtime=0, const char *checksum=0)
Save into the <datasetdir>/dataset.list file the name of the last updated or created or modified data...
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2801
void ResetUrl()
Definition: TFileInfo.h:80
Int_t Match(const TString &s, UInt_t start=0)
Runs a match on s against the regex 'this' was created with.
Definition: TPRegexp.cxx:704
#define name(a, b)
Definition: linkTestLib0.cpp:5
Bool_t HasQuery() const
Definition: TUri.h:102
Mother of all ROOT objects.
Definition: TObject.h:58
Bool_t IsDigit() const
Returns true if all characters in string are digits (0-9) or white spaces, i.e.
Definition: TString.cxx:1793
void SetTotBytes(Long64_t tot)
Definition: TFileInfo.h:161
TUrl * GetCurrentUrl() const
Return the current url.
Definition: TFileInfo.cxx:246
virtual void Add(TObject *obj)
Definition: TList.h:81
const Ssiz_t kNPOS
Definition: Rtypes.h:115
Wrapper for PCRE library (Perl Compatible Regular Expressions).
Definition: TPRegexp.h:103
Class that contains a list of TFileInfo's and accumulated meta data information about its entries...
void PrintDataSet(TFileCollection *fc, Int_t popt=0)
Formatted printout of the content of TFileCollection 'fc'.
void SetZipBytes(Long64_t zip)
Definition: TFileInfo.h:162
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:567
void SetOptions(const char *opt)
Definition: TUrl.h:96
const Int_t kError
Definition: TError.h:41
static TList * ParseDataSetSrvMaps(const TString &srvmaps)
Create a server mapping list from the content of 'srvmaps' Return the list (owned by the caller) or 0...
R__EXTERN Int_t gDebug
Definition: Rtypes.h:128
virtual Bool_t Matches(const char *s)
Definition: TFileStager.h:50
virtual Long64_t GetEntries() const
Definition: TTree.h:386
A TTree object has a header with a name and a title.
Definition: TTree.h:98
double result[121]
Class describing a generic file including meta information.
Definition: TFileInfo.h:50
void ResetBit(UInt_t f)
Definition: TObject.h:172
const AParamType & GetVal() const
Definition: TParameter.h:77
virtual Int_t Locate(const char *u, TString &f)
Just check if the file exists locally.
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:582
const Bool_t kTRUE
Definition: Rtypes.h:91
Int_t Match(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10, TArrayI *pos=0)
The number of matches is returned, this equals the full match + sub-pattern matches.
Definition: TPRegexp.cxx:335
TMap * GetFilesPerServer(const char *exclude=0, Bool_t curronly=kFALSE)
Return a map of TFileCollections with the files on each data server, excluding servers in the comma-s...
virtual Long64_t GetGroupUsed(const char *group)
Returns the used space of that group.
float value
Definition: math.cpp:443
virtual TMap * GetDataSets(const char *uri, UInt_t=TDataSetManager::kExport)
Returns all datasets for the <group> and <user> specified by <uri>.
TUUID GetUUID() const
Definition: TDirectory.h:168
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
Definition: TString.cxx:372
virtual void Close(Option_t *option="")
Close a file.
Definition: TFile.cxx:898
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:904