Logo ROOT   6.08/07
Reference Guide
TProofSuperMaster.cxx
Go to the documentation of this file.
1 // @(#)root/proof:$Id$
2 // Author: Fons Rademakers 13/02/97
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /** \class TProofSuperMaster
13 \ingroup proofkernel
14 
15 Implementation of TProof controlling PROOF federated clusters.
16 
17 */
18 
19 #include "TProofSuperMaster.h"
20 #include "TString.h"
21 #include "TObjString.h"
22 #include "TError.h"
23 #include "TList.h"
24 #include "TSortedList.h"
25 #include "TSlave.h"
26 #include "TMap.h"
27 #include "TProofServ.h"
28 #include "TSocket.h"
29 #include "TMonitor.h"
30 #include "TDSet.h"
31 #include "TPluginManager.h"
32 #include "TVirtualProofPlayer.h"
33 #include "TMessage.h"
34 #include "TUrl.h"
35 #include "TProofResourcesStatic.h"
36 #include "TProofNodeInfo.h"
37 #include "TROOT.h"
38 
40 
41 ////////////////////////////////////////////////////////////////////////////////
42 /// Start super master PROOF session.
43 
44 TProofSuperMaster::TProofSuperMaster(const char *masterurl, const char *conffile,
45  const char *confdir, Int_t loglevel,
46  const char *alias, TProofMgr *mgr)
47 {
48  // Default initializations
49  InitMembers();
50 
51  // This may be needed during init
52  fManager = mgr;
53 
54  fUrl = TUrl(masterurl);
55 
56  if (!conffile || !conffile[0])
57  conffile = kPROOF_ConfFile;
58  else if (!strncasecmp(conffile, "sm:", 3))
59  conffile+=3;
60  if (!confdir || !confdir[0])
61  confdir = kPROOF_ConfDir;
62 
63  // Instance type
64  fMasterServ = kTRUE;
65  ResetBit(TProof::kIsClient);
66  SetBit(TProof::kIsMaster);
67  SetBit(TProof::kIsTopMaster);
68 
69  Init(masterurl, conffile, confdir, loglevel, alias);
70 
71  // For Final cleanup
72  gROOT->GetListOfProofs()->Add(this);
73 }
74 
75 ////////////////////////////////////////////////////////////////////////////////
76 /// Start up PROOF submasters.
77 
79 {
80  // If this is a supermaster server, find the config file and start
81  // submaster servers as specified in the config file.
82  // There is a difference in startup between a slave and a submaster
83  // in which the submaster will issue a kPROOF_LOGFILE and
84  // then a kPROOF_LOGDONE message (which must be collected)
85  // while slaves do not.
86 
87  Int_t pc = 0;
88  TList *submasterList = new TList;
89  // Get list of workers
90  if (gProofServ->GetWorkers(submasterList, pc) == TProofServ::kQueryStop) {
91  Error("StartSlaves", "getting list of submaster nodes");
92  return kFALSE;
93  }
95  if (fImage.IsNull())
96  fImage = Form("%s:%s", TUrl(gSystem->HostName()).GetHostFQDN(),
98 
99  UInt_t nSubmasters = submasterList->GetSize();
100  UInt_t nSubmastersDone = 0;
101  Int_t ord = 0;
102  TList validSubmasters;
103  TList validPairs;
104  validPairs.SetOwner();
105 
106  // Loop over all submasters and start them
107  TListIter next(submasterList);
108  TObject *to;
109  TProofNodeInfo *submaster;
110  while ((to = next())) {
111  // Get the next submaster from the list
112  submaster = (TProofNodeInfo *)to;
113  const Char_t *conffile = submaster->GetConfig();
114  const Char_t *image = submaster->GetImage();
115  const Char_t *msd = submaster->GetMsd();
116  Int_t sport = submaster->GetPort();
117  if (sport == -1)
118  sport = fUrl.GetPort();
119 
120  TString fullord = TString(gProofServ->GetOrdinal()) + "." + ((Long_t) ord);
121 
122  // create submaster server
123  TUrl u(Form("%s:%d", submaster->GetNodeName().Data(), sport));
124  // Add group info in the password firdl, if any
125  if (strlen(gProofServ->GetGroup()) > 0) {
126  // Set also the user, otherwise the password is not exported
127  if (strlen(u.GetUser()) <= 0)
128  u.SetUser(gProofServ->GetUser());
129  u.SetPasswd(gProofServ->GetGroup());
130  }
131  TSlave *slave =
132  CreateSubmaster(u.GetUrl(), fullord, image, msd);
133 
134  // Add to global list (we will add to the monitor list after
135  // finalizing the server startup)
136  Bool_t submasterOk = kTRUE;
137  fSlaves->Add(slave);
138  if (slave->IsValid()) {
139  validPairs.Add(new TPair(slave, new TObjString(conffile)));
140  } else {
141  submasterOk = kFALSE;
142  fBadSlaves->Add(slave);
143  }
144 
145  PDB(kGlobal,3)
146  Info("StartSlaves","submaster on host %s created and"
147  " added to list", submaster->GetNodeName().Data());
148 
149  // Notify opening of connection
150  nSubmastersDone++;
152  m << TString("Opening connections to submasters") << nSubmasters
153  << nSubmastersDone << submasterOk;
154  gProofServ->GetSocket()->Send(m);
155 
156  ord++;
157 
158  } // end loop over all submasters
159 
160  // Cleanup
161  SafeDelete(submasterList);
162 
163  nSubmastersDone = 0;
164 
165  // Here we finalize the server startup: in this way the bulk
166  // of remote operations are almost parallelized
167  TIter nxsc(&validPairs);
168  TPair *sc = 0;
169  while ((sc = (TPair *) nxsc())) {
170  // Finalize setup of the server
171  TSlave *sl = (TSlave *) sc->Key();
172  TObjString *cf = (TObjString *) sc->Value();
173  sl->SetupServ(TSlave::kMaster, cf->GetName());
174 
175  // Monitor good slaves
176  Bool_t submasterOk = kTRUE;
177  if (sl->IsValid()) {
178  // check protocol compatability
179  // protocol 1 is not supported anymore
180  if (fProtocol == 1) {
181  Error("StartSlaves", "master and submaster protocols"
182  " not compatible (%d and %d)",
184  submasterOk = kFALSE;
185  fBadSlaves->Add(sl);
186  } else {
187  fAllMonitor->Add(sl->GetSocket());
188  validSubmasters.Add(sl);
189  }
190  } else {
191  submasterOk = kFALSE;
192  fBadSlaves->Add(sl);
193  }
194 
195  // Notify end of startup operations
196  nSubmastersDone++;
198  m << TString("Setting up submasters") << nSubmasters
199  << nSubmastersDone << submasterOk;
200  gProofServ->GetSocket()->Send(m);
201  }
202 
203  Collect(kAll); //Get kPROOF_LOGFILE and kPROOF_LOGDONE messages
204  TIter nextSubmaster(&validSubmasters);
205  while (TSlave* sl = dynamic_cast<TSlave*>(nextSubmaster())) {
206  if (sl->GetStatus() == -99) {
207  Error("StartSlaves", "not allowed to connect to PROOF master server");
208  fBadSlaves->Add(sl);
209  continue;
210  }
211 
212  if (!sl->IsValid()) {
213  Error("StartSlaves", "failed to setup connection with PROOF master server");
214  fBadSlaves->Add(sl);
215  continue;
216  }
217  }
218 
219  return kTRUE;
220 }
221 
222 ////////////////////////////////////////////////////////////////////////////////
223 /// Process a data set (TDSet) using the specified selector (.C) file.
224 /// Entry- or event-lists should be set in the data set object using
225 /// TDSet::SetEntryList.
226 /// The return value is -1 in case of error and TSelector::GetStatus() in
227 /// in case of success.
228 
229 Long64_t TProofSuperMaster::Process(TDSet *set, const char *selector, Option_t *option,
231 {
232  if (!IsValid()) return -1;
233 
234  R__ASSERT(GetPlayer());
235 
236  if (GetProgressDialog())
237  GetProgressDialog()->ExecPlugin(5, this, selector, set->GetListOfElements()->GetSize(),
238  first, nentries);
239 
240  return GetPlayer()->Process(set, selector, option, nentries, first);
241 }
242 
243 ////////////////////////////////////////////////////////////////////////////////
244 /// Validate a TDSet.
245 
247 {
248  if (dset->ElementsValid()) return;
249 
250  // We need to recheck after this
253 
254  TList msds;
255  msds.SetOwner();
256 
257  TList smholder;
258  smholder.SetOwner();
259  TList elemholder;
260  elemholder.SetOwner();
261 
262  // build nodelist with slaves and elements
263  TIter nextSubmaster(GetListOfActiveSlaves());
264  while (TSlave *sl = dynamic_cast<TSlave*>(nextSubmaster())) {
265  TList *smlist = 0;
266  TPair *p = dynamic_cast<TPair*>(msds.FindObject(sl->GetMsd()));
267  if (!p) {
268  smlist = new TList;
269  smlist->SetName(sl->GetMsd());
270 
271  smholder.Add(smlist);
272  TList *elemlist = new TSortedList(kSortDescending);
273  elemlist->SetName(TString(sl->GetMsd())+"_elem");
274  elemholder.Add(elemlist);
275  msds.Add(new TPair(smlist, elemlist));
276  } else {
277  smlist = dynamic_cast<TList*>(p->Key());
278  }
279  if (smlist) smlist->Add(sl);
280  }
281 
282  TIter nextElem(dset->GetListOfElements());
283  while (TDSetElement *elem = dynamic_cast<TDSetElement*>(nextElem())) {
284  if (elem->GetValid()) continue;
285  TPair *p = dynamic_cast<TPair*>(msds.FindObject(elem->GetMsd()));
286  if (p && p->Value()) {
287  TList *xl = dynamic_cast<TList*>(p->Value());
288  if (xl) xl->Add(elem);
289  } else {
290  Error("ValidateDSet", "no mass storage domain '%s' associated"
291  " with available submasters",
292  elem->GetMsd());
293  return;
294  }
295  }
296 
297  // send to slaves
298  TList usedsms;
299  TIter nextSM(&msds);
300  SetDSet(dset); // set dset to be validated in Collect()
301  while (TPair *msd = dynamic_cast<TPair*>(nextSM())) {
302  TList *sms = dynamic_cast<TList*>(msd->Key());
303  TList *setelements = dynamic_cast<TList*>(msd->Value());
304 
305  // distribute elements over the slaves
306  Int_t nsms = sms ? sms->GetSize() : -1;
307  Int_t nelements = setelements ? setelements->GetSize() : -1;
308  for (Int_t i=0; i<nsms; i++) {
309 
310  TDSet set(dset->GetType(), dset->GetObjName(),
311  dset->GetDirectory());
312  for (Int_t j = (i*nelements)/nsms;
313  j < ((i+1)*nelements)/nsms;
314  j++) {
315  TDSetElement *elem = setelements ?
316  dynamic_cast<TDSetElement*>(setelements->At(j)) : (TDSetElement *)0;
317  if (elem) {
318  set.Add(elem->GetFileName(), elem->GetObjName(),
319  elem->GetDirectory(), elem->GetFirst(),
320  elem->GetNum(), elem->GetMsd());
321  }
322  }
323 
324  if (set.GetListOfElements()->GetSize()>0) {
326  mesg << &set;
327 
328  TSlave *sl = dynamic_cast<TSlave*>(sms->At(i));
329  if (sl) {
330  PDB(kGlobal,1)
331  Info("ValidateDSet",
332  "Sending TDSet with %d elements to worker %s"
333  " to be validated", set.GetListOfElements()->GetSize(),
334  sl->GetOrdinal());
335  sl->GetSocket()->Send(mesg);
336  usedsms.Add(sl);
337  } else {
338  Warning("ValidateDSet", "not a TSlave object");
339  }
340  }
341  }
342  }
343 
344  PDB(kGlobal,1)
345  Info("ValidateDSet","Calling Collect");
346  Collect(&usedsms);
347  SetDSet(0);
348 }
349 
350 ////////////////////////////////////////////////////////////////////////////////
351 /// Construct a TProofPlayer object. The player string specifies which
352 /// player should be created: remote, slave, sm (supermaster) or base.
353 /// Default is sm. Socket is needed in case a slave player is created.
354 
356 {
357  if (!player)
358  player = "sm";
359 
360  SetPlayer(TVirtualProofPlayer::Create(player, this, s));
361  return GetPlayer();
362 }
363 
Bool_t StartSlaves(Bool_t)
Start up PROOF submasters.
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
Definition: TObject.cxx:899
void ValidateDSet(TDSet *dset)
Validate a TDSet.
long long Long64_t
Definition: RtypesCore.h:69
virtual EQueryAction GetWorkers(TList *workers, Int_t &prioritychange, Bool_t resume=kFALSE)
Get list of workers to be used from now on.
Collectable string class.
Definition: TObjString.h:32
const char Option_t
Definition: RtypesCore.h:62
virtual Int_t Send(const TMessage &mess)
Send a TMessage object.
Definition: TSocket.cxx:520
This class represents a WWW compatible URL.
Definition: TUrl.h:41
void SetPlayer(TVirtualProofPlayer *player)
Set a new PROOF player.
Definition: TProof.cxx:10171
This class implements a data set to be used for PROOF processing.
Definition: TDSet.h:153
const char * GetGroup() const
Definition: TProofServ.h:256
virtual void SetOwner(Bool_t enable=kTRUE)
Set whether this collection is the owner (enable==true) of its content.
The PROOF manager interacts with the PROOF server coordinator to create or destroy a PROOF session...
Definition: TProofMgr.h:53
TSocket * GetSocket() const
Definition: TProofServ.h:271
TString fImage
Definition: TProof.h:600
#define R__ASSERT(e)
Definition: TError.h:98
#define gROOT
Definition: TROOT.h:364
virtual void Add(TSocket *sock, Int_t interest=kRead)
Add socket to the monitor&#39;s active list.
Definition: TMonitor.cxx:168
Long64_t GetFirst() const
Definition: TDSet.h:114
const char * GetOrdinal() const
Definition: TSlave.h:135
Basic string class.
Definition: TString.h:137
void SetDSet(TDSet *dset)
Definition: TProof.h:767
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
const TString & GetImage() const
Implementation of TProof controlling PROOF federated clusters.
TList * GetListOfElements() const
Definition: TDSet.h:231
Iterator of linked list.
Definition: TList.h:187
virtual TObject * FindObject(const char *name) const
Find an object in this list using its name.
Definition: TList.cxx:497
virtual Long64_t Process(TDSet *set, const char *selector, Option_t *option="", Long64_t nentries=-1, Long64_t firstentry=0)=0
Manages an element of a TDSet.
Definition: TDSet.h:68
#define SafeDelete(p)
Definition: RConfig.h:507
TList * fBadSlaves
Definition: TProof.h:604
virtual Bool_t IsValid() const
Definition: TSlave.h:154
Long64_t GetNum() const
Definition: TDSet.h:116
#define PDB(mask, level)
Definition: TProofDebug.h:58
TSocket * GetSocket() const
Definition: TSlave.h:138
void Init(TClassEdit::TInterpreterLookupHelper *helper)
Definition: TClassEdit.cxx:119
Int_t Collect(const TSlave *sl, Long_t timeout=-1, Int_t endtype=-1, Bool_t deactonfail=kFALSE)
Collect responses from slave sl.
Definition: TProof.cxx:2647
const char * GetDirectory() const
Return directory where to look for object.
Definition: TDSet.cxx:234
A sorted doubly linked list.
Definition: TSortedList.h:30
TSlave * CreateSubmaster(const char *url, const char *ord, const char *image, const char *msd, Int_t nwk=1)
Create a new TSlave of type TSlave::kMaster.
Definition: TProof.cxx:1853
TList * fSlaves
Definition: TProof.h:602
const Bool_t kSortDescending
Definition: TList.h:41
TObject * Value() const
Definition: TMap.h:125
static TVirtualProofPlayer * Create(const char *player, TProof *p, TSocket *s=0)
Create a PROOF player.
A doubly linked list.
Definition: TList.h:47
const char * GetObjName() const
Definition: TDSet.h:122
const char *const kPROOF_ConfFile
Definition: TProof.h:152
const char * GetName() const
Returns name of object.
Definition: TObjString.h:42
Bool_t ElementsValid()
Check if all elements are valid.
Definition: TDSet.cxx:1537
TMonitor * fAllMonitor
Definition: TProof.h:605
The purpose of this class is to provide a complete node description for masters, submasters and worke...
const TString & GetNodeName() const
R__EXTERN TSystem * gSystem
Definition: TSystem.h:549
TObject * Key() const
Definition: TMap.h:124
Long_t ExecPlugin(int nargs, const T &... params)
const char *const kPROOF_ConfDir
Definition: TProof.h:153
Long64_t Process(TDSet *set, const char *selector, Option_t *option="", Long64_t nentries=-1, Long64_t firstentry=0)
Process a data set (TDSet) using the specified selector (.C) file.
unsigned int UInt_t
Definition: RtypesCore.h:42
TMarker * m
Definition: textangle.C:8
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:925
char * Form(const char *fmt,...)
const TString & GetConfig() const
const Int_t kPROOF_Protocol
Definition: TProof.h:150
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:311
void SetName(const char *name)
Definition: TCollection.h:116
Int_t fProtocol
Definition: TProof.h:601
const TString & GetMsd() const
long Long_t
Definition: RtypesCore.h:50
Class used by TMap to store (key,value) pairs.
Definition: TMap.h:106
virtual TVirtualProofPlayer * MakePlayer(const char *player=0, TSocket *s=0)
Construct a TProofPlayer object.
#define ClassImp(name)
Definition: Rtypes.h:279
TVirtualProofPlayer * GetPlayer() const
Definition: TProof.h:746
virtual const char * HostName()
Return the system&#39;s host name.
Definition: TSystem.cxx:308
TPluginHandler * GetProgressDialog() const
Definition: TProof.h:772
int nentries
Definition: THbookFile.cxx:89
const char * GetFileName() const
Definition: TDSet.h:113
const char * GetType() const
Definition: TDSet.h:228
Int_t GetPort() const
Definition: TUrl.h:87
Bool_t IsNull() const
Definition: TString.h:387
Mother of all ROOT objects.
Definition: TObject.h:37
char Char_t
Definition: RtypesCore.h:29
const char * GetImage() const
Definition: TProofServ.h:258
R__EXTERN TProofServ * gProofServ
Definition: TProofServ.h:361
virtual void Add(TObject *obj)
Definition: TList.h:81
const char * GetWorkDir() const
Definition: TProofServ.h:257
const char * GetMsd() const
Definition: TDSet.h:119
void ResetBit(UInt_t f)
Definition: TObject.h:156
Bool_t IsValid() const
Definition: TProof.h:967
Abstract interface for the PROOF player.
Definition: first.py:1
virtual Int_t GetSize() const
Definition: TCollection.h:95
Class describing a PROOF worker server.
Definition: TSlave.h:50
const char * GetUser() const
Definition: TProofServ.h:255
const char * GetObjName() const
Definition: TDSet.h:229
const Bool_t kTRUE
Definition: Rtypes.h:91
TList * GetListOfActiveSlaves() const
Definition: TProof.h:753
TUrl fUrl
Definition: TProof.h:597
const char * GetOrdinal() const
Definition: TProofServ.h:267
virtual Int_t SetupServ(Int_t stype, const char *conffile)
Init a PROOF slave object.
Definition: TSlave.cxx:179
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:911
const char * GetDirectory() const
Definition: TDSet.h:230
const char * Data() const
Definition: TString.h:349
Int_t GetPort() const