TDataSetManager This class contains functions to handle datasets in PROOF It is the layer between TProofServ and the file system that stores the datasets.
TDataSetManager(const char* group = 0, const char* user = 0, const char* options = 0) | |
virtual | ~TDataSetManager() |
void | TObject::AbstractMethod(const char* method) const |
virtual void | TObject::AppendPad(Option_t* option = "") |
virtual void | TObject::Browse(TBrowser* b) |
static Bool_t | CheckDataSetSrvMaps(TUrl* furl, TString& fn, TList* srvmaplist = 0) |
static Bool_t | CheckStagedStatus(TFileInfo* fileInfo, Int_t fopt, Int_t maxfiles, Int_t newstagedfiles, TFileStager* stager, Bool_t createStager, Bool_t dbg, Bool_t& changed, Bool_t& touched, Bool_t& disappeared) |
static TClass* | Class() |
virtual const char* | TObject::ClassName() const |
virtual void | TObject::Clear(Option_t* = "") |
virtual Int_t | ClearCache(const char* uri) |
virtual TObject* | TObject::Clone(const char* newname = "") const |
virtual Int_t | TObject::Compare(const TObject* obj) const |
virtual void | TObject::Copy(TObject& object) const |
static TString | CreateUri(const char* dsGroup = 0, const char* dsUser = 0, const char* dsName = 0, const char* dsTree = 0) |
virtual void | TObject::Delete(Option_t* option = "")MENU |
virtual Int_t | TObject::DistancetoPrimitive(Int_t px, Int_t py) |
virtual void | TObject::Draw(Option_t* option = "") |
virtual void | TObject::DrawClass() constMENU |
virtual TObject* | TObject::DrawClone(Option_t* option = "") constMENU |
virtual void | TObject::Dump() constMENU |
virtual void | TObject::Error(const char* method, const char* msgfmt) const |
virtual void | TObject::Execute(const char* method, const char* params, Int_t* error = 0) |
virtual void | TObject::Execute(TMethod* method, TObjArray* params, Int_t* error = 0) |
virtual void | TObject::ExecuteEvent(Int_t event, Int_t px, Int_t py) |
virtual Bool_t | ExistsDataSet(const char* uri) |
virtual void | TObject::Fatal(const char* method, const char* msgfmt) const |
static Int_t | FillMetaData(TFileInfo* fi, TDirectory* d, const char* rdir = "/") |
virtual TObject* | TObject::FindObject(const char* name) const |
virtual TObject* | TObject::FindObject(const TObject* obj) const |
virtual Long64_t | GetAvgFileSize() const |
virtual TFileCollection* | GetDataSet(const char* uri, const char* server = 0) |
virtual TMap* | GetDataSets(const char* uri, UInt_t = TDataSetManager::kExport) |
static TList* | GetDataSetSrvMaps() |
virtual Option_t* | TObject::GetDrawOption() const |
static Long_t | TObject::GetDtorOnly() |
virtual Long64_t | GetGroupQuota(const char* group) |
virtual TMap* | GetGroupQuotaMap() |
virtual Long64_t | GetGroupUsed(const char* group) |
virtual const char* | TObject::GetIconName() const |
virtual const char* | TObject::GetName() const |
virtual char* | TObject::GetObjectInfo(Int_t px, Int_t py) const |
static Bool_t | TObject::GetObjectStat() |
virtual Option_t* | TObject::GetOption() const |
virtual TMap* | GetSubDataSets(const char* uri, const char* excludeservers) |
virtual const char* | TObject::GetTitle() const |
virtual UInt_t | TObject::GetUniqueID() const |
virtual Bool_t | TObject::HandleTimer(TTimer* timer) |
virtual ULong_t | TObject::Hash() const |
virtual void | TObject::Info(const char* method, const char* msgfmt) const |
virtual Bool_t | TObject::InheritsFrom(const char* classname) const |
virtual Bool_t | TObject::InheritsFrom(const TClass* cl) const |
virtual void | TObject::Inspect() constMENU |
void | TObject::InvertBit(UInt_t f) |
virtual TClass* | IsA() const |
virtual Bool_t | TObject::IsEqual(const TObject* obj) const |
virtual Bool_t | TObject::IsFolder() const |
Bool_t | TObject::IsOnHeap() const |
virtual Bool_t | TObject::IsSortable() const |
Bool_t | TObject::IsZombie() const |
virtual void | TObject::ls(Option_t* option = "") const |
void | TObject::MayNotUse(const char* method) const |
virtual void | MonitorUsedSpace(TVirtualMonitoringWriter* monitoring) |
virtual Bool_t | TObject::Notify() |
virtual Int_t | NotifyUpdate(const char* group = 0, const char* user = 0, const char* dspath = 0, Long_t mtime = 0, const char* checksum = 0) |
void | TObject::Obsolete(const char* method, const char* asOfVers, const char* removedFromVers) const |
static void | TObject::operator delete(void* ptr) |
static void | TObject::operator delete(void* ptr, void* vp) |
static void | TObject::operator delete[](void* ptr) |
static void | TObject::operator delete[](void* ptr, void* vp) |
void* | TObject::operator new(size_t sz) |
void* | TObject::operator new(size_t sz, void* vp) |
void* | TObject::operator new[](size_t sz) |
void* | TObject::operator new[](size_t sz, void* vp) |
virtual void | TObject::Paint(Option_t* option = "") |
static TList* | ParseDataSetSrvMaps(const TString& srvmaps) |
virtual void | ParseInitOpts(const char* opts) |
Bool_t | ParseUri(const char* uri, TString* dsGroup = 0, TString* dsUser = 0, TString* dsName = 0, TString* dsTree = 0, Bool_t onlyCurrent = kFALSE, Bool_t wildcards = kFALSE) |
virtual void | TObject::Pop() |
virtual void | TObject::Print(Option_t* option = "") const |
static void | ProcessFile(TFileInfo* fileInfo, Int_t sopt, Bool_t checkstg, Bool_t doall, TFileStager* stager, Bool_t createStager, const char* stageopts, Bool_t dbg, Bool_t& changed, Bool_t& opened) |
virtual Int_t | TObject::Read(const char* name) |
virtual void | TObject::RecursiveRemove(TObject* obj) |
virtual Int_t | RegisterDataSet(const char* uri, TFileCollection* dataSet, const char* opt) |
virtual Bool_t | RemoveDataSet(const char* uri) |
void | TObject::ResetBit(UInt_t f) |
virtual void | TObject::SaveAs(const char* filename = "", Option_t* option = "") constMENU |
virtual void | TObject::SavePrimitive(ostream& out, Option_t* option = "") |
Int_t | ScanDataSet(const char* uri, const char* opt) |
virtual Int_t | ScanDataSet(const char* uri, UInt_t option = kReopen|kDebug) |
static Int_t | ScanDataSet(TFileCollection* dataset, Int_t fopt, Int_t sopt = 0, Int_t ropt = 0, Bool_t dbg = kFALSE, Int_t* touched = 0, Int_t* opened = 0, Int_t* disappeared = 0, TList* flist = 0, Long64_t avgsz = -1, const char* mss = 0, Int_t maxfiles = -1, const char* stageopts = 0) |
static Int_t | ScanFile(TFileInfo* fileinfo, Bool_t notify) |
void | TObject::SetBit(UInt_t f) |
void | TObject::SetBit(UInt_t f, Bool_t set) |
virtual void | TObject::SetDrawOption(Option_t* option = "")MENU |
static void | TObject::SetDtorOnly(void* obj) |
static void | TObject::SetObjectStat(Bool_t stat) |
void | SetScanCounters(Int_t t = -1, Int_t o = -1, Int_t d = -1) |
virtual void | TObject::SetUniqueID(UInt_t uid) |
virtual Int_t | ShowCache(const char* uri) |
virtual void | ShowDataSets(const char* uri = "*", const char* opt = "") |
virtual void | ShowMembers(TMemberInspector&) |
virtual void | ShowQuota(const char* opt) |
virtual void | Streamer(TBuffer&) |
void | StreamerNVirtual(TBuffer& ClassDef_StreamerNVirtual_b) |
virtual void | TObject::SysError(const char* method, const char* msgfmt) const |
Bool_t | TObject::TestBit(UInt_t f) const |
Int_t | TObject::TestBits(UInt_t f) const |
virtual void | TObject::UseCurrentStyle() |
virtual void | TObject::Warning(const char* method, const char* msgfmt) const |
virtual Int_t | TObject::Write(const char* name = 0, Int_t option = 0, Int_t bufsize = 0) |
virtual Int_t | TObject::Write(const char* name = 0, Int_t option = 0, Int_t bufsize = 0) const |
virtual void | TObject::DoError(int level, const char* location, const char* fmt, va_list va) const |
virtual TMap* | GetGroupUsedMap() |
Int_t | GetNDisapparedFiles() const |
Int_t | GetNOpenedFiles() const |
Int_t | GetNTouchedFiles() const |
void | GetQuota(const char* group, const char* user, const char* dsName, TFileCollection* dataset) |
virtual TMap* | GetUserUsedMap() |
void | TObject::MakeZombie() |
void | PrintDataSet(TFileCollection* fc, Int_t popt = 0) |
void | PrintUsedSpace() |
Bool_t | ReadGroupConfig(const char* cf = 0) |
static Long64_t | ToBytes(const char* size = 0) |
virtual void | UpdateUsedSpace() |
TDataSetManager(const TDataSetManager&) | |
TDataSetManager& | operator=(const TDataSetManager&) |
enum EDataSetStatusBits { | kCheckQuota | |
kAllowRegister | ||
kAllowVerify | ||
kTrustInfo | ||
kIsSandbox | ||
kUseCache | ||
kDoNotUseCache | ||
}; | ||
enum EDataSetWorkOpts { | kDebug | |
kShowDefault | ||
kPrint | ||
kExport | ||
kQuotaUpdate | ||
kSetDefaultTree | ||
kForceScan | ||
kNoHeaderPrint | ||
kReopen | ||
kTouch | ||
kMaxFiles | ||
kReadShort | ||
kFileMustExist | ||
kNoAction | ||
kLocateOnly | ||
kStageOnly | ||
kNoCacheUpdate | ||
kRefreshLs | ||
kList | ||
kAllFiles | ||
kStagedFiles | ||
kNoStagedCheck | ||
}; | ||
enum TObject::EStatusBits { | kCanDelete | |
kMustCleanup | ||
kObjInCanvas | ||
kIsReferenced | ||
kHasUUID | ||
kCannotPick | ||
kNoContextMenu | ||
kInvalidObject | ||
}; | ||
enum TObject::[unnamed] { | kIsOnHeap | |
kNotDeleted | ||
kZombie | ||
kBitMask | ||
kSingleKey | ||
kOverwrite | ||
kWriteDelete | ||
}; |
Long64_t | fAvgFileSize | Average file size to be used to estimate the dataset size (in MB) |
TUri | fBase | Base URI used to parse dataset names |
TString | fCommonGroup | Group that stores the COMMON datasets |
TString | fCommonUser | User that stores the COMMON datasets |
TString | fGroup | Group to which the owner of this session belongs |
TString | fGroupConfigFile | Path to the group config file |
TMap | fGroupQuota | Group quotas (read from config file) |
TMap | fGroupUsed | <group> --> <used bytes> (TParameter) |
Long_t | fMTimeGroupConfig | Last modification of the group config file |
Int_t | fNDisappearedFiles | Number of files disappared in the last ScanDataSet operation |
Int_t | fNOpenedFiles | Number of files opened in the last ScanDataSet operation |
Int_t | fNTouchedFiles | Number of files touched in the last ScanDataSet operation |
TString | fUser | Owner of the session |
TMap | fUserUsed | <group> --> <map of users> --> <value> |
static TString | fgCommonDataSetTag | Name for common datasets, default: COMMON |
static TList* | fgDataSetSrvMaps | List of TPair(TRegexp, TObjString) for mapping server coordinates |
Main constructor
Parse the opts string and set the init bits accordingly Available options: Cq: set kCheckQuota Ar: set kAllowRegister Av: set kAllowVerify Ti: set kTrustInfo Sb: set kIsSandbox Ca: set kUseCache or kDoNotUseCache The opts string may also contain additional unrelated info: in such a case the field delimited by the prefix "opt:" is analyzed, e.g. if opts is "/tmp/dataset opt:Cq:-Ar: root://lxb6046.cern.ch" only the substring "Cq:-Ar:" will be parsed .
Read group config file 'cf'. If cf == 0 re-read, if changed, the file pointed by fGroupConfigFile . expects the following directives: Group definition: group <groupname> <user>+ disk quota property <groupname> diskquota <quota in GB> average filesize (to be used when the file size is not available) averagefilesize <average size>{G,g,M,m,K,k}
Static utility function to gt the number of bytes from a string representation in the form "<digit><sfx>" with <sfx> = {"", "k", "M", "G", "T", "P"} (case insensitive). Returns -1 if the format is wrong.
Utility function used in various methods for user dataset upload.
Returns all datasets for the <group> and <user> specified by <uri>. If <user> is 0, it returns all datasets for the given <group>. If <group> is 0, it returns all datasets. The returned TMap contains: <group> --> <map of users> --> <map of datasets> --> <dataset> (TFileCollection) The unsigned int 'option' is forwarded to GetDataSet and BrowseDataSet. Available options (to be .or.ed): kShowDefault a default selection is shown that include the ones from the current user, the ones from the group and the common ones kPrint print the dataset content kQuotaUpdate update quotas kExport use export naming NB1: options "kPrint", "kQuoatUpdate" and "kExport" are mutually exclusive NB2: for options "kPrint" and "kQuoatUpdate" return is null.
Scans the dataset indicated by 'uri' following the 'opts' directives
The 'opts' string contains up to 4 directive fields separated by ':'
'selection' field :
A, allfiles: process all files
D, staged: process only staged (on Disk) files (if 'allfiles:' is not specified
the default is to process only files marked as non-staged)
'pre-action field':
O, open: open the files marked as staged when processing only files
marked as non-staged
T, touch: open and touch the files marked as staged when processing
only files marked as non-staged
I, nostagedcheck: do not check the actual stage status on selected files
'process' field:
N, noaction: do nothing on the selected files
P, fullproc: open the selected files and extract the meta information
L, locateonly: only locate the selected files
S, stageonly: issue a stage request for the selected files not yet staged
'auxiliary' field
V, verbose: notify the actions
Returns 0 on success, -1 if any failure occurs.
Scans the dataset indicated by <uri> and returns the number of missing files. Returns -1 if any failure occurs. For more details, see documentation of ScanDataSet(TFileCollection *dataset, const char *option)
Gets quota information from this dataset
Register a dataset, perfoming quota checkings, if needed. Returns 0 on success, -1 on failure
Save into the <datasetdir>/dataset.list file the name of the last updated or created or modified dataset Returns 0 on success, -1 on error
Creates URI for the dataset manger in the form '[[/dsGroup/]dsUser/]dsName[#dsObjPath]', The optional dsObjPath can be in the form [subdir/]objname]'.
Parses a (relative) URI that describes a DataSet on the cluster. The input 'uri' should be in the form '[[/group/]user/]dsname[#[subdir/]objname]', where 'objname' is the name of the object (e.g. the tree name) and the 'subdir' is the directory in the file wher it should be looked for. After resolving against a base URI consisting of proof://masterhost/group/user - meaning masterhost, group and user of the current session - the path is checked to contain exactly three elements separated by '/': group/user/dsname If wildcards, '*' is allowed in group and user and dsname is allowed to be empty. If onlyCurrent, only group and user of current session are allowed. Only non-null parameters are filled by this function. Returns kTRUE in case of success.
Partition dataset 'ds' accordingly to the servers. The returned TMap contains: <server> --> <subdataset> (TFileCollection) where <subdataset> is the subset of 'ds' on <server> The partitioning is done using all the URLs in the TFileInfo's, so the resulting datasets are not mutually exclusive. The string 'exclude' contains a comma-separated list of servers to exclude from the map.
Formatted printout of the content of TFileCollection 'fc'. Options in the form popt = u * 10 + f f 0 => header only, 1 => header + files when printing files u 0 => print file name only, 1 => print full URL
Prints formatted information about the dataset 'uri'. The type and format of output is driven by 'opt': 1. opt = "server:srv1[,srv2[,srv3[,...]]]" Print info about the subsets of 'uri' on servers srv1, srv2, ... 2. opt = "servers[:exclude:srv1[,srv2[,srv3[,...]]]]" Print info about the subsets of 'uri' on all servers, except the ones in the exclude list srv1, srv2, ... 3. opt = <any> Print info about all datasets matching 'uri' If 'opt' contains 'full:' the list of files in the datasets are also printed. In case 3. this is enabled only if 'uri' matches a single dataset. In case 3, if 'opt' contains 'full:' the list of files in the datasets are also printed. 'forcescan:' the dataset are open to get the information; otherwise the pre-processed information is used. 'noheader:' the labelling header is not printed; usefull when to chain several printouts 'noupdate:' do not update the cache (which may be slow on very remote servers) 'refresh:' refresh the information (requires appropriate credentials; typically it can be done only for owned datasets)
Go through the files in the specified dataset, selecting files according to 'fopt' and doing on these files the actions described by 'sopt'. If required, the information in 'dataset' is updated. The int fopt controls which files have to be processed (or added to the list if ropt is 1 - see below); 'fopt' is defined in term of csopt and fsopt: fopt = sign(fsopt) * csopt * 100 + fsopt where 'fsopt' controls the actual selection -1 all files in the dataset 0 process only files marked as 'non-staged' >=1 as 0 but files that are marked 'staged' are open >=2 as 1 but files that are marked 'staged' are touched 10 process only files marked as 'staged'; files marked as 'non-staged' are ignored and 'csopt' controls if an actual check on the staged status (via TFileStager) is done 0 check that the file is staged using TFileStager 1 do not hard check the staged status (example: use fopt = -101 to check the staged status of all the files, or fopt = 110 to re-check the stage status of all the files marked as staged) If 'dbg' is true, some information about the ongoing operations is reguraly printed; this can be useful when processing very large datasets, an operation which can take a very long time. The int 'sopt' controls what is done on the selected files (this is effective only if ropt is 0 or 2 - see below): -1 no action (fopt = 2 and sopt = -1 touches all staged files) 0 do the full process: open the files and fill the meta-information in the TFileInfo object, including the end-point URL 1 only locate the files, by updating the end-point URL (uses TFileStager::Locate which is must faster of an TFile::Open) 2 issue a stage request on the files The int 'ropt' controls which actions are performed: 0 do the full process: get list of files to process and process them 1 get the list of files to be scanned and return it in flist 2 process the files in flist (according to sopt) When defined flist is under the responsability the caller. If avgsz > 0 it is used for the final update of the dataset global counters. If 'mss' is defined use it to initialize the stager (instead of the Url in the TFileInfo objects) If maxfiles > 0, select for processing a maximum of 'filesmax' files (but if fopt is 1 or 2 all files marked as 'staged' are still open or touched) Return code 1 dataset was not changed 2 dataset was changed The number of touched, opened and disappeared files are returned in the respective variables, if these are defined.
Check stage status of the file described by "fileInfo". fopt is same as "fopt" in TDataSetManager::ScanDataSet, which is repeated below: The int fopt controls which files have to be processed (or added to the list if ropt is 1 - see below); 'fopt' is defined in term of csopt and fsopt: fopt = sign(fsopt) * csopt * 100 + fsopt where 'fsopt' controls the actual selection -1 all files in the dataset 0 process only files marked as 'non-staged' >=1 as 0 but files that are marked 'staged' are open >=2 as 1 but files that are marked 'staged' are touched 10 process only files marked as 'staged'; files marked as 'non-staged' are ignored and 'csopt' controls if an actual check on the staged status (via TFileStager) is done 0 check that the file is staged using TFileStager 1 do not hard check the staged status (example: use fopt = -101 to check the staged status of all the files, or fopt = 110 to re-check the stage status of all the files marked as staged) If 'dbg' is true, some information about the ongoing operations is reguraly printed; this can be useful when processing very large datasets, an operation which can take a very long time. If maxfiles > 0, select for processing a maximum of 'filesmax' files (but if fopt is 1 or 2 all files marked as 'staged' are still open or touched) Return code kTRUE the file appears newly staged kFALSE otherwise changed is true if the fileinfo is modified touched is true if the file is open and read disappeared is true if the file is marked staged but actually not staged
Locate, stage, or fully validate file "fileInfo".
Open the file described by 'fileinfo' to extract the relevant meta-information. Return 0 if OK, -2 if the file cannot be open, -1 if it is corrupted
Navigate the directory 'd' (and its subdirectories) looking for TTree objects. Fill in the relevant metadata information in 'fi'. The name of the TFileInfoMeta metadata entry will be "/dir1/dir2/.../tree_name". Return 0 on success, -1 if any problem happens (object found in keys cannot be read, for example)
Create a server mapping list from the content of 'srvmaps' Return the list (owned by the caller) or 0 if no valid info could be found)