Logo ROOT   6.12/07
Reference Guide
TWebFile.cxx
Go to the documentation of this file.
1 // @(#)root/net:$Id$
2 // Author: Fons Rademakers 17/01/97
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////////////////////
13 // //
14 // TWebFile //
15 // //
16 // A TWebFile is like a normal TFile except that it reads its data //
17 // via a standard apache web server. A TWebFile is a read-only file. //
18 // //
19 //////////////////////////////////////////////////////////////////////////
20 
21 #include "TWebFile.h"
22 #include "TROOT.h"
23 #include "TSocket.h"
24 #include "Bytes.h"
25 #include "TError.h"
26 #include "TSystem.h"
27 #include "TBase64.h"
28 #include "TVirtualPerfStats.h"
29 #ifdef R__SSL
30 #include "TSSLSocket.h"
31 #endif
32 
33 #include <errno.h>
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #ifdef WIN32
38 # ifndef EADDRINUSE
39 # define EADDRINUSE 10048
40 # endif
41 # ifndef EISCONN
42 # define EISCONN 10056
43 # endif
44 #endif
45 
46 static const char *gUserAgent = "User-Agent: ROOT-TWebFile/1.1";
47 
49 
51 
52 
53 // Internal class used to manage the socket that may stay open between
54 // calls when HTTP/1.1 protocol is used
55 class TWebSocket {
56 private:
57  TWebFile *fWebFile; // associated web file
58 public:
59  TWebSocket(TWebFile *f);
60  ~TWebSocket();
61  void ReOpen();
62 };
63 
64 ////////////////////////////////////////////////////////////////////////////////
65 /// Open web file socket.
66 
67 TWebSocket::TWebSocket(TWebFile *f)
68 {
69  fWebFile = f;
70  if (!f->fSocket)
71  ReOpen();
72 }
73 
74 ////////////////////////////////////////////////////////////////////////////////
75 /// Close socket in case not HTTP/1.1 protocol or when explicitly requested.
76 
77 TWebSocket::~TWebSocket()
78 {
79  if (!fWebFile->fHTTP11) {
80  delete fWebFile->fSocket;
81  fWebFile->fSocket = 0;
82  }
83 }
84 
85 ////////////////////////////////////////////////////////////////////////////////
86 /// Re-open web file socket.
87 
88 void TWebSocket::ReOpen()
89 {
90  if (fWebFile->fSocket) {
91  delete fWebFile->fSocket;
92  fWebFile->fSocket = 0;
93  }
94 
95  TUrl connurl;
96  if (fWebFile->fProxy.IsValid())
97  connurl = fWebFile->fProxy;
98  else
99  connurl = fWebFile->fUrl;
100 
101  for (Int_t i = 0; i < 5; i++) {
102  if (strcmp(connurl.GetProtocol(), "https") == 0) {
103 #ifdef R__SSL
104  fWebFile->fSocket = new TSSLSocket(connurl.GetHost(), connurl.GetPort());
105 #else
106  ::Error("TWebSocket::ReOpen", "library compiled without SSL, https not supported");
107  return;
108 #endif
109  } else
110  fWebFile->fSocket = new TSocket(connurl.GetHost(), connurl.GetPort());
111 
112  if (!fWebFile->fSocket || !fWebFile->fSocket->IsValid()) {
113  delete fWebFile->fSocket;
114  fWebFile->fSocket = 0;
115  if (gSystem->GetErrno() == EADDRINUSE || gSystem->GetErrno() == EISCONN) {
116  gSystem->Sleep(i*10);
117  } else {
118  ::Error("TWebSocket::ReOpen", "cannot connect to host %s (errno=%d)",
119  fWebFile->fUrl.GetHost(), gSystem->GetErrno());
120  return;
121  }
122  } else
123  return;
124  }
125 }
126 
127 
129 
130 ////////////////////////////////////////////////////////////////////////////////
131 /// Create a Web file object. A web file is the same as a read-only
132 /// TFile except that it is being read via a HTTP server. The url
133 /// argument must be of the form: http://host.dom.ain/file.root.
134 /// The opt can be "NOPROXY", to bypass any set "http_proxy" shell
135 /// variable. The proxy can be specified as (in sh, or equivalent csh):
136 /// export http_proxy=http://pcsalo.cern.ch:3128
137 /// The proxy can also be specified via the static method TWebFile::SetProxy().
138 /// Basic authentication (AuthType Basic) is supported. The user name and
139 /// passwd can be specified in the url like this:
140 /// http://username:mypasswd@pcsalo.cern.ch/files/aap.root
141 /// If the file specified in the URL does not exist or is not accessible
142 /// the kZombie bit will be set in the TWebFile object. Use IsZombie()
143 /// to see if the file is accessible. The preferred interface to this
144 /// constructor is via TFile::Open().
145 
146 TWebFile::TWebFile(const char *url, Option_t *opt) : TFile(url, "WEB"), fSocket(0)
147 {
148  TString option = opt;
149  fNoProxy = kFALSE;
150  if (option.Contains("NOPROXY", TString::kIgnoreCase))
151  fNoProxy = kTRUE;
152  CheckProxy();
153 
154  Bool_t headOnly = kFALSE;
155  if (option.Contains("HEADONLY", TString::kIgnoreCase))
156  headOnly = kTRUE;
157 
158  if (option == "IO")
159  return;
160 
161  Init(headOnly);
162 }
163 
164 ////////////////////////////////////////////////////////////////////////////////
165 /// Create a Web file object. A web file is the same as a read-only
166 /// TFile except that it is being read via a HTTP server. Make sure url
167 /// is a valid TUrl object.
168 /// The opt can be "NOPROXY", to bypass any set "http_proxy" shell
169 /// variable. The proxy can be specified as (in sh, or equivalent csh):
170 /// export http_proxy=http://pcsalo.cern.ch:3128
171 /// The proxy can also be specified via the static method TWebFile::SetProxy().
172 /// Basic authentication (AuthType Basic) is supported. The user name and
173 /// passwd can be specified in the url like this:
174 /// http://username:mypasswd@pcsalo.cern.ch/files/aap.root
175 /// If the file specified in the URL does not exist or is not accessible
176 /// the kZombie bit will be set in the TWebFile object. Use IsZombie()
177 /// to see if the file is accessible.
178 
179 TWebFile::TWebFile(TUrl url, Option_t *opt) : TFile(url.GetUrl(), "WEB"), fSocket(0)
180 {
181  TString option = opt;
182  fNoProxy = kFALSE;
183  if (option.Contains("NOPROXY", TString::kIgnoreCase))
184  fNoProxy = kTRUE;
185  CheckProxy();
186 
187  Bool_t headOnly = kFALSE;
188  if (option.Contains("HEADONLY", TString::kIgnoreCase))
189  headOnly = kTRUE;
190 
191  Init(headOnly);
192 }
193 
194 ////////////////////////////////////////////////////////////////////////////////
195 /// Cleanup.
196 
198 {
199  delete fSocket;
200  if (fFullCache) {
201  free(fFullCache);
202  fFullCache = 0;
203  fFullCacheSize = 0;
204  }
205 }
206 
207 ////////////////////////////////////////////////////////////////////////////////
208 /// Initialize a TWebFile object.
209 
210 void TWebFile::Init(Bool_t readHeadOnly)
211 {
212  char buf[4];
213  int err;
214 
215  fSocket = 0;
216  fSize = -1;
218  fHTTP11 = kFALSE;
219  fFullCache = 0;
220  fFullCacheSize = 0;
222 
223  if ((err = GetHead()) < 0) {
224  if (readHeadOnly) {
225  fD = -1;
226  fWritten = err;
227  return;
228  }
229  if (err == -2) {
230  Error("TWebFile", "%s does not exist", fBasicUrl.Data());
231  MakeZombie();
232  gDirectory = gROOT;
233  return;
234  }
235  // err == -3 HEAD not supported, fall through and try ReadBuffer()
236  }
237  if (readHeadOnly) {
238  fD = -1;
239  return;
240  }
241 
242  if (fIsRootFile) {
243  Seek(0);
244  if (ReadBuffer(buf, 4)) {
245  MakeZombie();
246  gDirectory = gROOT;
247  return;
248  }
249 
250  if (strncmp(buf, "root", 4) && strncmp(buf, "PK", 2)) { // PK is zip file
251  Error("TWebFile", "%s is not a ROOT file", fBasicUrl.Data());
252  MakeZombie();
253  gDirectory = gROOT;
254  return;
255  }
256  }
257 
259  fD = -2; // so TFile::IsOpen() will return true when in TFile::~TFile
260 }
261 
262 ////////////////////////////////////////////////////////////////////////////////
263 /// Set GET command for use by ReadBuffer(s)10(), handle redirection if
264 /// needed. Give full URL so Apache's virtual hosts solution works.
265 
266 void TWebFile::SetMsgReadBuffer10(const char *redirectLocation, Bool_t tempRedirect)
267 {
268  TUrl oldUrl;
269  TString oldBasicUrl;
270 
271  if (redirectLocation) {
272  if (tempRedirect) { // temp redirect
273  fUrlOrg = fUrl;
275  } else { // permanent redirect
276  fUrlOrg = "";
277  fBasicUrlOrg = "";
278  }
279 
280  oldUrl = fUrl;
281  oldBasicUrl = fBasicUrl;
282 
283  fUrl.SetUrl(redirectLocation);
285  fBasicUrl += "://";
286  fBasicUrl += fUrl.GetHost();
287  fBasicUrl += ":";
288  fBasicUrl += fUrl.GetPort();
289  fBasicUrl += "/";
290  fBasicUrl += fUrl.GetFile();
291  // add query string again
292  TString rdl(redirectLocation);
293  if (rdl.Index("?") >= 0) {
294  rdl = rdl(rdl.Index("?"), rdl.Length());
295  fBasicUrl += rdl;
296  }
297  }
298 
299  if (fMsgReadBuffer10 != "") {
300  // patch up existing command
301  if (oldBasicUrl != "") {
302  // change to redirection location
303  fMsgReadBuffer10.ReplaceAll(oldBasicUrl, fBasicUrl);
304  fMsgReadBuffer10.ReplaceAll(TString("Host: ")+oldUrl.GetHost(), TString("Host: ")+fUrl.GetHost());
305  } else if (fBasicUrlOrg != "") {
306  // change back from temp redirection location
309  fUrl = fUrlOrg;
311  fUrlOrg = "";
312  fBasicUrlOrg = "";
313  }
314  }
315 
316  if (fBasicUrl == "") {
318  fBasicUrl += "://";
319  fBasicUrl += fUrl.GetHost();
320  fBasicUrl += ":";
321  fBasicUrl += fUrl.GetPort();
322  fBasicUrl += "/";
323  fBasicUrl += fUrl.GetFile();
324  fBasicUrl += "?";
326  }
327 
328  if (fMsgReadBuffer10 == "") {
329  fMsgReadBuffer10 = "GET ";
331  if (fHTTP11)
332  fMsgReadBuffer10 += " HTTP/1.1";
333  else
334  fMsgReadBuffer10 += " HTTP/1.0";
335  fMsgReadBuffer10 += "\r\n";
336  if (fHTTP11) {
337  fMsgReadBuffer10 += "Host: ";
339  fMsgReadBuffer10 += "\r\n";
340  }
343  fMsgReadBuffer10 += "\r\n";
344  fMsgReadBuffer10 += "Range: bytes=";
345  }
346 }
347 
348 ////////////////////////////////////////////////////////////////////////////////
349 /// Check if shell var "http_proxy" has been set and should be used.
350 
352 {
353  if (fNoProxy)
354  return;
355 
356  if (fgProxy.IsValid()) {
357  fProxy = fgProxy;
358  return;
359  }
360 
361  TString proxy = gSystem->Getenv("http_proxy");
362  if (proxy != "") {
363  TUrl p(proxy);
364  if (strcmp(p.GetProtocol(), "http")) {
365  Error("CheckProxy", "protocol must be HTTP in proxy URL %s",
366  proxy.Data());
367  return;
368  }
369  fProxy = p;
370  if (gDebug > 0)
371  Info("CheckProxy", "using HTTP proxy %s", fProxy.GetUrl());
372  }
373 }
374 
375 ////////////////////////////////////////////////////////////////////////////////
376 /// A TWebFile that has been correctly constructed is always considered open.
377 
379 {
380  return IsZombie() ? kFALSE : kTRUE;
381 }
382 
383 ////////////////////////////////////////////////////////////////////////////////
384 /// Reopen a file with a different access mode, like from READ to
385 /// UPDATE or from NEW, CREATE, RECREATE, UPDATE to READ. Thus the
386 /// mode argument can be either "READ" or "UPDATE". The method returns
387 /// 0 in case the mode was successfully modified, 1 in case the mode
388 /// did not change (was already as requested or wrong input arguments)
389 /// and -1 in case of failure, in which case the file cannot be used
390 /// anymore. A TWebFile cannot be reopened in update mode.
391 
393 {
394  TString opt = mode;
395  opt.ToUpper();
396 
397  if (opt != "READ" && opt != "UPDATE")
398  Error("ReOpen", "mode must be either READ or UPDATE, not %s", opt.Data());
399 
400  if (opt == "UPDATE")
401  Error("ReOpen", "update mode not allowed for a TWebFile");
402 
403  return 1;
404 }
405 
406 ////////////////////////////////////////////////////////////////////////////////
407 /// Read specified byte range from remote file via HTTP daemon. This
408 /// routine connects to the remote host, sends the request and returns
409 /// the buffer. Returns kTRUE in case of error.
410 
412 {
413  Int_t st;
414  if ((st = ReadBufferViaCache(buf, len))) {
415  if (st == 2)
416  return kTRUE;
417  return kFALSE;
418  }
419 
420  if (!fHasModRoot)
421  return ReadBuffer10(buf, len);
422 
423  // Give full URL so Apache's virtual hosts solution works.
424  // Use protocol 0.9 for efficiency, we are not interested in the 1.0 headers.
425  if (fMsgReadBuffer == "") {
426  fMsgReadBuffer = "GET ";
428  fMsgReadBuffer += "?";
429  }
430  TString msg = fMsgReadBuffer;
431  msg += fOffset;
432  msg += ":";
433  msg += len;
434  msg += "\r\n";
435 
436  if (GetFromWeb(buf, len, msg) == -1)
437  return kTRUE;
438 
439  fOffset += len;
440 
441  return kFALSE;
442 }
443 
444 ////////////////////////////////////////////////////////////////////////////////
445 /// Read specified byte range from remote file via HTTP daemon. This
446 /// routine connects to the remote host, sends the request and returns
447 /// the buffer. Returns kTRUE in case of error.
448 
450 {
451  SetOffset(pos);
452  return ReadBuffer(buf, len);
453 }
454 
455 ////////////////////////////////////////////////////////////////////////////////
456 /// Read specified byte range from remote file via HTTP 1.0 daemon (without
457 /// mod-root installed). This routine connects to the remote host, sends the
458 /// request and returns the buffer. Returns kTRUE in case of error.
459 
461 {
463 
465  msg += fOffset;
466  msg += "-";
467  msg += fOffset+len-1;
468  msg += "\r\n\r\n";
469 
471 
472  // in case when server does not support segments, let chance to recover
473  Int_t n = GetFromWeb10(buf, len, msg, 1, &apos, &len);
474  if (n == -1)
475  return kTRUE;
476  // The -2 error condition typically only happens when
477  // GetHead() failed because not implemented, in the first call to
478  // ReadBuffer() in Init(), it is not checked in ReadBuffers10().
479  if (n == -2) {
480  Error("ReadBuffer10", "%s does not exist", fBasicUrl.Data());
481  MakeZombie();
482  gDirectory = gROOT;
483  return kTRUE;
484  }
485 
486  fOffset += len;
487 
488  return kFALSE;
489 }
490 
491 ////////////////////////////////////////////////////////////////////////////////
492 /// Read specified byte ranges from remote file via HTTP daemon.
493 /// Reads the nbuf blocks described in arrays pos and len,
494 /// where pos[i] is the seek position of block i of length len[i].
495 /// Note that for nbuf=1, this call is equivalent to TFile::ReafBuffer
496 /// This function is overloaded by TNetFile, TWebFile, etc.
497 /// Returns kTRUE in case of failure.
498 
499 Bool_t TWebFile::ReadBuffers(char *buf, Long64_t *pos, Int_t *len, Int_t nbuf)
500 {
501  if (!fHasModRoot)
502  return ReadBuffers10(buf, pos, len, nbuf);
503 
504  // Give full URL so Apache's virtual hosts solution works.
505  // Use protocol 0.9 for efficiency, we are not interested in the 1.0 headers.
506  if (fMsgReadBuffer == "") {
507  fMsgReadBuffer = "GET ";
509  fMsgReadBuffer += "?";
510  }
511  TString msg = fMsgReadBuffer;
512 
513  Int_t k = 0, n = 0, cnt = 0;
514  for (Int_t i = 0; i < nbuf; i++) {
515  if (n) msg += ",";
516  msg += pos[i] + fArchiveOffset;
517  msg += ":";
518  msg += len[i];
519  n += len[i];
520  cnt++;
521  if ((msg.Length() > 8000) || (cnt >= 200)) {
522  msg += "\r\n";
523  if (GetFromWeb(&buf[k], n, msg) == -1)
524  return kTRUE;
525  msg = fMsgReadBuffer;
526  k += n;
527  n = 0;
528  cnt = 0;
529  }
530  }
531 
532  msg += "\r\n";
533 
534  if (GetFromWeb(&buf[k], n, msg) == -1)
535  return kTRUE;
536 
537  return kFALSE;
538 }
539 
540 ////////////////////////////////////////////////////////////////////////////////
541 /// Read specified byte ranges from remote file via HTTP 1.0 daemon (without
542 /// mod-root installed). Read the nbuf blocks described in arrays pos and len,
543 /// where pos[i] is the seek position of block i of length len[i].
544 /// Note that for nbuf=1, this call is equivalent to TFile::ReafBuffer
545 /// This function is overloaded by TNetFile, TWebFile, etc.
546 /// Returns kTRUE in case of failure.
547 
548 Bool_t TWebFile::ReadBuffers10(char *buf, Long64_t *pos, Int_t *len, Int_t nbuf)
549 {
551 
553 
554  Int_t k = 0, n = 0, r, cnt = 0;
555  for (Int_t i = 0; i < nbuf; i++) {
556  if (n) msg += ",";
557  msg += pos[i] + fArchiveOffset;
558  msg += "-";
559  msg += pos[i] + fArchiveOffset + len[i] - 1;
560  n += len[i];
561  cnt++;
562  if ((msg.Length() > 8000) || (cnt >= 200) || (i+1 == nbuf)) {
563  msg += "\r\n\r\n";
564  r = GetFromWeb10(&buf[k], n, msg, cnt, pos + (i+1-cnt), len + (i+1-cnt));
565  if (r == -1)
566  return kTRUE;
567  msg = fMsgReadBuffer10;
568  k += n;
569  n = 0;
570  cnt = 0;
571  }
572  }
573 
574  return kFALSE;
575 }
576 
577 ////////////////////////////////////////////////////////////////////////////////
578 /// Extract requested segments from the cached content.
579 /// Such cache can be produced when server suddenly returns full data instead of segments
580 /// Returns -1 in case of error, 0 in case of success
581 
582 Int_t TWebFile::GetFromCache(char *buf, Int_t len, Int_t nseg, Long64_t *seg_pos, Int_t *seg_len)
583 {
584  if (!fFullCache) return -1;
585 
586  if (gDebug > 0)
587  Info("GetFromCache", "Extract %d segments total len %d from cached data", nseg, len);
588 
589  Int_t curr = 0;
590  for (Int_t cnt=0;cnt<nseg;cnt++) {
591  // check that target buffer has enough space
592  if (curr + seg_len[cnt] > len) return -1;
593  // check that segment is inside cached area
594  if (fArchiveOffset + seg_pos[cnt] + seg_len[cnt] > fFullCacheSize) return -1;
595  char* src = (char*) fFullCache + fArchiveOffset + seg_pos[cnt];
596  memcpy(buf + curr, src, seg_len[cnt]);
597  curr += seg_len[cnt];
598  }
599 
600  return 0;
601 }
602 
603 ////////////////////////////////////////////////////////////////////////////////
604 /// Read request from web server. Returns -1 in case of error,
605 /// 0 in case of success.
606 
607 Int_t TWebFile::GetFromWeb(char *buf, Int_t len, const TString &msg)
608 {
609  TSocket *s;
610 
611  if (!len) return 0;
612 
613  Double_t start = 0;
614  if (gPerfStats) start = TTimeStamp();
615 
616  TUrl connurl;
617  if (fProxy.IsValid())
618  connurl = fProxy;
619  else
620  connurl = fUrl;
621 
622  if (strcmp(connurl.GetProtocol(), "https") == 0) {
623 #ifdef R__SSL
624  s = new TSSLSocket(connurl.GetHost(), connurl.GetPort());
625 #else
626  Error("GetFromWeb", "library compiled without SSL, https not supported");
627  return -1;
628 #endif
629  } else
630  s = new TSocket(connurl.GetHost(), connurl.GetPort());
631 
632  if (!s->IsValid()) {
633  Error("GetFromWeb", "cannot connect to host %s", fUrl.GetHost());
634  delete s;
635  return -1;
636  }
637 
638  if (s->SendRaw(msg.Data(), msg.Length()) == -1) {
639  Error("GetFromWeb", "error sending command to host %s", fUrl.GetHost());
640  delete s;
641  return -1;
642  }
643 
644  if (s->RecvRaw(buf, len) == -1) {
645  Error("GetFromWeb", "error receiving data from host %s", fUrl.GetHost());
646  delete s;
647  return -1;
648  }
649 
650  // collect statistics
651  fBytesRead += len;
652  fReadCalls++;
653 #ifdef R__WIN32
656 #else
657  fgBytesRead += len;
658  fgReadCalls++;
659 #endif
660 
661  if (gPerfStats)
662  gPerfStats->FileReadEvent(this, len, start);
663 
664  delete s;
665  return 0;
666 }
667 
668 ////////////////////////////////////////////////////////////////////////////////
669 /// Read multiple byte range request from web server.
670 /// Uses HTTP 1.0 daemon wihtout mod-root.
671 /// Returns -2 in case file does not exist, -1 in case
672 /// of error and 0 in case of success.
673 
674 Int_t TWebFile::GetFromWeb10(char *buf, Int_t len, const TString &msg, Int_t nseg, Long64_t *seg_pos, Int_t *seg_len)
675 {
676  if (!len) return 0;
677 
678  // if file content was cached, reuse it
679  if (fFullCache && (nseg>0))
680  return GetFromCache(buf, len, nseg, seg_pos, seg_len);
681 
682  Double_t start = 0;
683  if (gPerfStats) start = TTimeStamp();
684 
685  // open fSocket and close it when going out of scope
686  TWebSocket ws(this);
687 
688  if (!fSocket || !fSocket->IsValid()) {
689  Error("GetFromWeb10", "cannot connect to host %s", fUrl.GetHost());
690  return -1;
691  }
692 
693  if (gDebug > 0)
694  Info("GetFromWeb10", "sending HTTP request:\n%s", msg.Data());
695 
696  if (fSocket->SendRaw(msg.Data(), msg.Length()) == -1) {
697  Error("GetFromWeb10", "error sending command to host %s", fUrl.GetHost());
698  return -1;
699  }
700 
701  char line[8192];
702  Int_t n, ret = 0, nranges = 0, ltot = 0, redirect = 0;
703  TString boundary, boundaryEnd;
704  Long64_t first = -1, last = -1, tot, fullsize = 0;
705  TString redir;
706 
707  while ((n = GetLine(fSocket, line, sizeof(line))) >= 0) {
708  if (n == 0) {
709  if (ret < 0)
710  return ret;
711  if (redirect) {
712  if (redir.IsNull()) {
713  // Some sites (s3.amazonaws.com) do not return a Location field on 301
714  Error("GetFromWeb10", "error - redirect without location from host %s", fUrl.GetHost());
715  return -1;
716  }
717 
718  ws.ReOpen();
719  // set message to reflect the redirectLocation and add bytes field
720  TString msg_1 = fMsgReadBuffer10;
721  msg_1 += fOffset;
722  msg_1 += "-";
723  msg_1 += fOffset+len-1;
724  msg_1 += "\r\n\r\n";
725  return GetFromWeb10(buf, len, msg_1);
726  }
727 
728  if (first >= 0) {
729  Int_t ll = Int_t(last - first) + 1;
730  Int_t rsize;
731  if ((rsize = fSocket->RecvRaw(&buf[ltot], ll)) == -1) {
732  Error("GetFromWeb10", "error receiving data from host %s", fUrl.GetHost());
733  return -1;
734  }
735  else if (ll != rsize) {
736  Error("GetFromWeb10", "expected %d bytes, got %d", ll, rsize);
737  return -1;
738  }
739  ltot += ll;
740 
741  first = -1;
742 
743  if (boundary == "")
744  break; // not a multipart response
745  }
746 
747  if (fullsize > 0) {
748 
749  if (nseg <= 0) {
750  Error("GetFromWeb10","Need segments data to extract parts from full size %lld", fullsize);
751  return -1;
752  }
753 
754  if (len > fullsize) {
755  Error("GetFromWeb10","Requested part %d longer than full size %lld", len, fullsize);
756  return -1;
757  }
758 
759  if ((fFullCache == 0) && (fullsize <= GetMaxFullCacheSize())) {
760  // try to read file content into cache and than reuse it, limit cache by 2 GB
761  fFullCache = malloc(fullsize);
762  if (fFullCache != 0) {
763  if (fSocket->RecvRaw(fFullCache, fullsize) != fullsize) {
764  Error("GetFromWeb10", "error receiving data from host %s", fUrl.GetHost());
765  free(fFullCache); fFullCache = 0;
766  return -1;
767  }
768  fFullCacheSize = fullsize;
769  return GetFromCache(buf, len, nseg, seg_pos, seg_len);
770  }
771  // when cache allocation failed, try without cache
772  }
773 
774  // check all segemnts are inside range and in sorted order
775  for (Int_t cnt=0;cnt<nseg;cnt++) {
776  if (fArchiveOffset + seg_pos[cnt] + seg_len[cnt] > fullsize) {
777  Error("GetFromWeb10","Requested segment %lld len %d is outside of full range %lld", seg_pos[cnt], seg_len[cnt], fullsize);
778  return -1;
779  }
780  if ((cnt>0) && (seg_pos[cnt-1] + seg_len[cnt-1] > seg_pos[cnt])) {
781  Error("GetFromWeb10","Requested segments are not in sorted order");
782  return -1;
783  }
784  }
785 
786  Long64_t pos = 0;
787  char* curr = buf;
788  char dbuf[2048]; // dummy buffer for skip data
789 
790  // now read complete file and take only requested segments into the buffer
791  for (Int_t cnt=0; cnt<nseg; cnt++) {
792  // first skip data before segment
793  while (pos < fArchiveOffset + seg_pos[cnt]) {
794  Long64_t ll = fArchiveOffset + seg_pos[cnt] - pos;
795  if (ll > Int_t(sizeof(dbuf))) ll = sizeof(dbuf);
796  if (fSocket->RecvRaw(dbuf, ll) != ll) {
797  Error("GetFromWeb10", "error receiving data from host %s", fUrl.GetHost());
798  return -1;
799  }
800  pos += ll;
801  }
802 
803  // reading segment itself
804  if (fSocket->RecvRaw(curr, seg_len[cnt]) != seg_len[cnt]) {
805  Error("GetFromWeb10", "error receiving data from host %s", fUrl.GetHost());
806  return -1;
807  }
808  curr += seg_len[cnt];
809  pos += seg_len[cnt];
810  ltot += seg_len[cnt];
811  }
812 
813  // now read file to the end
814  while (pos < fullsize) {
815  Long64_t ll = fullsize - pos;
816  if (ll > Int_t(sizeof(dbuf))) ll = sizeof(dbuf);
817  if (fSocket->RecvRaw(dbuf, ll) != ll) {
818  Error("GetFromWeb10", "error receiving data from host %s", fUrl.GetHost());
819  return -1;
820  }
821  pos += ll;
822  }
823 
824  if (gDebug>0) Info("GetFromWeb10","Complete reading %d bytes in %d segments out of full size %lld", len, nseg, fullsize);
825 
826  break;
827  }
828 
829  continue;
830  }
831 
832  if (gDebug > 0)
833  Info("GetFromWeb10", "header: %s", line);
834 
835  if (boundaryEnd == line) {
836  if (gDebug > 0)
837  Info("GetFromWeb10", "got all headers");
838  break;
839  }
840  if (boundary == line) {
841  nranges++;
842  if (gDebug > 0)
843  Info("GetFromWeb10", "get new multipart byte range (%d)", nranges);
844  }
845 
846  TString res = line;
847 
848  if (res.BeginsWith("HTTP/1.")) {
849  if (res.BeginsWith("HTTP/1.1")) {
850  if (!fHTTP11)
851  fMsgReadBuffer10 = "";
852  fHTTP11 = kTRUE;
853  }
854  TString scode = res(9, 3);
855  Int_t code = scode.Atoi();
856  if (code >= 500) {
857  ret = -1;
858  TString mess = res(13, 1000);
859  Error("GetFromWeb10", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
860  } else if (code >= 400) {
861  if (code == 404)
862  ret = -2; // file does not exist
863  else {
864  ret = -1;
865  TString mess = res(13, 1000);
866  Error("GetFromWeb10", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
867  }
868  } else if (code >= 300) {
869  if (code == 301 || code == 303) {
870  redirect = 1; // permanent redirect
871  } else if (code == 302 || code == 307) {
872  // treat 302 as 303: permanent redirect
873  redirect = 1;
874  //redirect = 2; // temp redirect
875  } else {
876  ret = -1;
877  TString mess = res(13, 1000);
878  Error("GetFromWeb10", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
879  }
880  } else if (code > 200) {
881  if (code != 206) {
882  ret = -1;
883  TString mess = res(13, 1000);
884  Error("GetFromWeb10", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
885  }
886  } else if (code == 200) {
887  fullsize = -200; // make indication of code 200
888  Warning("GetFromWeb10",
889  "Server %s response with complete file, but only part of it was requested.\n"
890  "Check MaxRanges configuration parameter (if Apache is used)",
891  fUrl.GetHost());
892 
893  }
894  } else if (res.BeginsWith("Content-Type: multipart")) {
895  boundary = res(res.Index("boundary=")+9, 1000);
896  if (boundary[0]=='"' && boundary[boundary.Length()-1]=='"') {
897  boundary = boundary(1,boundary.Length()-2);
898  }
899  boundary = "--" + boundary;
900  boundaryEnd = boundary + "--";
901  } else if (res.BeginsWith("Content-range:")) {
902 #ifdef R__WIN32
903  sscanf(res.Data(), "Content-range: bytes %I64d-%I64d/%I64d", &first, &last, &tot);
904 #else
905  sscanf(res.Data(), "Content-range: bytes %lld-%lld/%lld", &first, &last, &tot);
906 #endif
907  if (fSize == -1) fSize = tot;
908  } else if (res.BeginsWith("Content-Range:")) {
909 #ifdef R__WIN32
910  sscanf(res.Data(), "Content-Range: bytes %I64d-%I64d/%I64d", &first, &last, &tot);
911 #else
912  sscanf(res.Data(), "Content-Range: bytes %lld-%lld/%lld", &first, &last, &tot);
913 #endif
914  if (fSize == -1) fSize = tot;
915  } else if (res.BeginsWith("Content-Length:") && (fullsize == -200)) {
916 #ifdef R__WIN32
917  sscanf(res.Data(), "Content-Length: %I64d", &fullsize);
918 #else
919  sscanf(res.Data(), "Content-Length: %lld", &fullsize);
920 #endif
921  } else if (res.BeginsWith("Location:") && redirect) {
922  redir = res(10, 1000);
923  if (redirect == 2) // temp redirect
924  SetMsgReadBuffer10(redir, kTRUE);
925  else // permanent redirect
926  SetMsgReadBuffer10(redir, kFALSE);
927  }
928  }
929 
930  if (redirect && redir.IsNull()) {
931  ret = -1;
932  Error("GetFromWeb10", "error - redirect without location from host %s", fUrl.GetHost());
933  }
934 
935  if (n == -1 && fHTTP11) {
936  if (gDebug > 0)
937  Info("GetFromWeb10", "HTTP/1.1 socket closed, reopen");
938  if (fBasicUrlOrg != "") {
939  // if we have to close temp redirection, set back to original url
941  }
942  ws.ReOpen();
943  return GetFromWeb10(buf, len, msg);
944  }
945 
946  if (ltot != len) {
947  Error("GetFromWeb10", "error receiving expected amount of data (got %d, expected %d) from host %s",
948  ltot, len, fUrl.GetHost());
949  return -1;
950  }
951 
952  // collect statistics
953  fBytesRead += len;
954  fReadCalls++;
955 #ifdef R__WIN32
958 #else
959  fgBytesRead += len;
960  fgReadCalls++;
961 #endif
962 
963  if (gPerfStats)
964  gPerfStats->FileReadEvent(this, len, start);
965 
966  return 0;
967 }
968 
969 ////////////////////////////////////////////////////////////////////////////////
970 /// Set position from where to start reading.
971 
973 {
974  switch (pos) {
975  case kBeg:
976  fOffset = offset + fArchiveOffset;
977  break;
978  case kCur:
979  fOffset += offset;
980  break;
981  case kEnd:
982  // this option is not used currently in the ROOT code
983  if (fArchiveOffset)
984  Error("Seek", "seeking from end in archive is not (yet) supported");
985  fOffset = fEND - offset; // is fEND really EOF or logical EOF?
986  break;
987  }
988 }
989 
990 ////////////////////////////////////////////////////////////////////////////////
991 /// Return maximum file size.
992 
994 {
995  if (!fHasModRoot || fSize >= 0)
996  return fSize;
997 
998  Long64_t size;
999  char asize[64];
1000 
1001  TString msg = "GET ";
1002  msg += fBasicUrl;
1003  msg += "?";
1004  msg += -1;
1005  msg += "\r\n";
1006 
1007  if (const_cast<TWebFile*>(this)->GetFromWeb(asize, 64, msg) == -1)
1008  return kMaxInt;
1009 
1010 #ifndef R__WIN32
1011  size = atoll(asize);
1012 #else
1013  size = _atoi64(asize);
1014 #endif
1015 
1016  fSize = size;
1017 
1018  return size;
1019 }
1020 
1021 ////////////////////////////////////////////////////////////////////////////////
1022 /// Get the HTTP header. Depending on the return code we can see if
1023 /// the file exists and if the server uses mod_root.
1024 /// Returns -1 in case of an error, -2 in case the file does not exists,
1025 /// -3 in case HEAD is not supported (dCache HTTP door) and
1026 /// 0 in case of success.
1027 
1029 {
1030  // Give full URL so Apache's virtual hosts solution works.
1031  if (fMsgGetHead == "") {
1032  fMsgGetHead = "HEAD ";
1034  if (fHTTP11)
1035  fMsgGetHead += " HTTP/1.1";
1036  else
1037  fMsgGetHead += " HTTP/1.0";
1038  fMsgGetHead += "\r\n";
1039  if (fHTTP11) {
1040  fMsgGetHead += "Host: ";
1041  fMsgGetHead += fUrl.GetHost();
1042  fMsgGetHead += "\r\n";
1043  }
1046  fMsgGetHead += "\r\n\r\n";
1047  }
1048  TString msg = fMsgGetHead;
1049 
1050  TUrl connurl;
1051  if (fProxy.IsValid())
1052  connurl = fProxy;
1053  else
1054  connurl = fUrl;
1055 
1056  TSocket *s = 0;
1057  for (Int_t i = 0; i < 5; i++) {
1058  if (strcmp(connurl.GetProtocol(), "https") == 0) {
1059 #ifdef R__SSL
1060  s = new TSSLSocket(connurl.GetHost(), connurl.GetPort());
1061 #else
1062  Error("GetHead", "library compiled without SSL, https not supported");
1063  return -1;
1064 #endif
1065  } else
1066  s = new TSocket(connurl.GetHost(), connurl.GetPort());
1067 
1068  if (!s->IsValid()) {
1069  delete s;
1070  if (gSystem->GetErrno() == EADDRINUSE || gSystem->GetErrno() == EISCONN) {
1071  s = 0;
1072  gSystem->Sleep(i*10);
1073  } else {
1074  Error("GetHead", "cannot connect to host %s (errno=%d)", fUrl.GetHost(),
1075  gSystem->GetErrno());
1076  return -1;
1077  }
1078  } else
1079  break;
1080  }
1081  if (!s)
1082  return -1;
1083 
1084  if (gDebug > 0) {
1085  Info("GetHead", "connected to host %s", connurl.GetHost());
1086  Info("GetHead", "sending HTTP request:\n%s", msg.Data());
1087  }
1088 
1089  if (s->SendRaw(msg.Data(), msg.Length()) == -1) {
1090  Error("GetHead", "error sending command to host %s", fUrl.GetHost());
1091  delete s;
1092  return -1;
1093  }
1094 
1095  char line[8192];
1096  Int_t n, ret = 0, redirect = 0;
1097  TString redir;
1098 
1099  while ((n = GetLine(s, line, sizeof(line))) >= 0) {
1100  if (n == 0) {
1101  if (gDebug > 0)
1102  Info("GetHead", "got all headers");
1103  delete s;
1104  if (fBasicUrlOrg != "" && !redirect) {
1105  // set back to original url in case of temp redirect
1107  fMsgGetHead = "";
1108  }
1109  if (ret < 0)
1110  return ret;
1111  if (redirect) {
1112  if (redir.IsNull()) {
1113  // Some sites (s3.amazonaws.com) do not return a Location field on 301
1114  Error("GetHead", "error - redirect without location from host %s", fUrl.GetHost());
1115  return -1;
1116  }
1117  return GetHead();
1118  }
1119  return 0;
1120  }
1121 
1122  if (gDebug > 0)
1123  Info("GetHead", "header: %s", line);
1124 
1125  TString res = line;
1126  ProcessHttpHeader(res);
1127  if (res.BeginsWith("HTTP/1.")) {
1128  if (res.BeginsWith("HTTP/1.1")) {
1129  if (!fHTTP11) {
1130  fMsgGetHead = "";
1131  fMsgReadBuffer10 = "";
1132  }
1133  fHTTP11 = kTRUE;
1134  }
1135  TString scode = res(9, 3);
1136  Int_t code = scode.Atoi();
1137  if (code >= 500) {
1138  if (code == 500)
1139  fHasModRoot = kTRUE;
1140  else {
1141  ret = -1;
1142  TString mess = res(13, 1000);
1143  Error("GetHead", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
1144  }
1145  } else if (code >= 400) {
1146  if (code == 400)
1147  ret = -3; // command not supported
1148  else if (code == 404)
1149  ret = -2; // file does not exist
1150  else {
1151  ret = -1;
1152  TString mess = res(13, 1000);
1153  Error("GetHead", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
1154  }
1155  } else if (code >= 300) {
1156  if (code == 301 || code == 303)
1157  redirect = 1; // permanent redirect
1158  else if (code == 302 || code == 307)
1159  redirect = 2; // temp redirect
1160  else {
1161  ret = -1;
1162  TString mess = res(13, 1000);
1163  Error("GetHead", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
1164  }
1165  } else if (code > 200) {
1166  ret = -1;
1167  TString mess = res(13, 1000);
1168  Error("GetHead", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
1169  }
1170  } else if (res.BeginsWith("Content-Length:")) {
1171  TString slen = res(16, 1000);
1172  fSize = slen.Atoll();
1173  } else if (res.BeginsWith("Location:") && redirect) {
1174  redir = res(10, 1000);
1175  if (redirect == 2) // temp redirect
1176  SetMsgReadBuffer10(redir, kTRUE);
1177  else // permanent redirect
1178  SetMsgReadBuffer10(redir, kFALSE);
1179  fMsgGetHead = "";
1180  }
1181  }
1182 
1183  delete s;
1184 
1185  return ret;
1186 }
1187 
1188 ////////////////////////////////////////////////////////////////////////////////
1189 /// Read a line from the socket. Reads at most one less than the number of
1190 /// characters specified by maxsize. Reading stops when a newline character
1191 /// is found, The newline (\n) and cr (\r), if any, are removed.
1192 /// Returns -1 in case of error, or the number of characters read (>= 0)
1193 /// otherwise.
1194 
1196 {
1197  Int_t n = GetHunk(s, line, maxsize);
1198  if (n < 0) {
1199  if (!fHTTP11 || gDebug > 0)
1200  Error("GetLine", "error receiving data from host %s", fUrl.GetHost());
1201  return -1;
1202  }
1203 
1204  if (n > 0 && line[n-1] == '\n') {
1205  n--;
1206  if (n > 0 && line[n-1] == '\r')
1207  n--;
1208  line[n] = '\0';
1209  }
1210  return n;
1211 }
1212 
1213 ////////////////////////////////////////////////////////////////////////////////
1214 /// Read a hunk of data from the socket, up until a terminator. The hunk is
1215 /// limited by whatever the TERMINATOR callback chooses as its
1216 /// terminator. For example, if terminator stops at newline, the hunk
1217 /// will consist of a line of data; if terminator stops at two
1218 /// newlines, it can be used to read the head of an HTTP response.
1219 /// Upon determining the boundary, the function returns the data (up to
1220 /// the terminator) in hunk.
1221 ///
1222 /// In case of read error, -1 is returned. In case of having read some
1223 /// data, but encountering EOF before seeing the terminator, the data
1224 /// that has been read is returned, but it will (obviously) not contain the
1225 /// terminator.
1226 ///
1227 /// The TERMINATOR function is called with three arguments: the
1228 /// beginning of the data read so far, the beginning of the current
1229 /// block of peeked-at data, and the length of the current block.
1230 /// Depending on its needs, the function is free to choose whether to
1231 /// analyze all data or just the newly arrived data. If TERMINATOR
1232 /// returns 0, it means that the terminator has not been seen.
1233 /// Otherwise it should return a pointer to the character immediately
1234 /// following the terminator.
1235 ///
1236 /// The idea is to be able to read a line of input, or otherwise a hunk
1237 /// of text, such as the head of an HTTP request, without crossing the
1238 /// boundary, so that the next call to RecvRaw() etc. reads the data
1239 /// after the hunk. To achieve that, this function does the following:
1240 ///
1241 /// 1. Peek at incoming data.
1242 ///
1243 /// 2. Determine whether the peeked data, along with the previously
1244 /// read data, includes the terminator.
1245 ///
1246 /// 3a. If yes, read the data until the end of the terminator, and
1247 /// exit.
1248 ///
1249 /// 3b. If no, read the peeked data and goto 1.
1250 ///
1251 /// The function is careful to assume as little as possible about the
1252 /// implementation of peeking. For example, every peek is followed by
1253 /// a read. If the read returns a different amount of data, the
1254 /// process is retried until all data arrives safely.
1255 ///
1256 /// Reads at most one less than the number of characters specified by maxsize.
1257 
1258 Int_t TWebFile::GetHunk(TSocket *s, char *hunk, Int_t maxsize)
1259 {
1260  if (maxsize <= 0) return 0;
1261 
1262  Int_t bufsize = maxsize;
1263  Int_t tail = 0; // tail position in HUNK
1264 
1265  while (1) {
1266  const char *end;
1267  Int_t pklen, rdlen, remain;
1268 
1269  // First, peek at the available data.
1270  pklen = s->RecvRaw(hunk+tail, bufsize-1-tail, kPeek);
1271  if (pklen < 0) {
1272  return -1;
1273  }
1274  end = HttpTerminator(hunk, hunk+tail, pklen);
1275  if (end) {
1276  // The data contains the terminator: we'll drain the data up
1277  // to the end of the terminator.
1278  remain = end - (hunk + tail);
1279  if (remain == 0) {
1280  // No more data needs to be read.
1281  hunk[tail] = '\0';
1282  return tail;
1283  }
1284  if (bufsize - 1 < tail + remain) {
1285  Error("GetHunk", "hunk buffer too small for data from host %s (%d bytes needed)",
1286  fUrl.GetHost(), tail + remain + 1);
1287  hunk[tail] = '\0';
1288  return -1;
1289  }
1290  } else {
1291  // No terminator: simply read the data we know is (or should
1292  // be) available.
1293  remain = pklen;
1294  }
1295 
1296  // Now, read the data. Note that we make no assumptions about
1297  // how much data we'll get. (Some TCP stacks are notorious for
1298  // read returning less data than the previous MSG_PEEK.)
1299  rdlen = s->RecvRaw(hunk+tail, remain, kDontBlock);
1300  if (rdlen < 0) {
1301  return -1;
1302  }
1303  tail += rdlen;
1304  hunk[tail] = '\0';
1305 
1306  if (rdlen == 0) {
1307  if (tail == 0) {
1308  // EOF without anything having been read
1309  return tail;
1310  } else {
1311  // EOF seen: return the data we've read.
1312  return tail;
1313  }
1314  }
1315  if (end && rdlen == remain) {
1316  // The terminator was seen and the remaining data drained --
1317  // we got what we came for.
1318  return tail;
1319  }
1320 
1321  // Keep looping until all the data arrives.
1322 
1323  if (tail == bufsize - 1) {
1324  Error("GetHunk", "hunk buffer too small for data from host %s",
1325  fUrl.GetHost());
1326  return -1;
1327  }
1328  }
1329 }
1330 
1331 ////////////////////////////////////////////////////////////////////////////////
1332 /// Determine whether [START, PEEKED + PEEKLEN) contains an HTTP new
1333 /// line [\\r]\\n. If so, return the pointer to the position after the line,
1334 /// otherwise return 0. This is used as callback to GetHunk(). The data
1335 /// between START and PEEKED has been read and cannot be "unread"; the
1336 /// data after PEEKED has only been peeked.
1337 
1338 const char *TWebFile::HttpTerminator(const char *start, const char *peeked,
1339  Int_t peeklen)
1340 {
1341 #if 0
1342  const char *p, *end;
1343 
1344  // Look for "[\r]\n", and return the following position if found.
1345  // Start one char before the current to cover the possibility that
1346  // part of the terminator (e.g. "\r") arrived in the previous batch.
1347  p = peeked - start < 1 ? start : peeked - 1;
1348  end = peeked + peeklen;
1349 
1350  // Check for \r\n anywhere in [p, end-2).
1351  for (; p < end - 1; p++)
1352  if (p[0] == '\r' && p[1] == '\n')
1353  return p + 2;
1354 
1355  // p==end-1: check for \r\n directly preceding END.
1356  if (p[0] == '\r' && p[1] == '\n')
1357  return p + 2;
1358 #else
1359  if (start) { } // start unused, silence compiler
1360  const char *p = (const char*) memchr(peeked, '\n', peeklen);
1361  if (p)
1362  // p+1 because the line must include '\n'
1363  return p + 1;
1364 #endif
1365  return 0;
1366 }
1367 
1368 ////////////////////////////////////////////////////////////////////////////////
1369 /// Return basic authentication scheme, to be added to the request.
1370 
1372 {
1373  TString msg;
1374  if (strlen(fUrl.GetUser())) {
1375  TString auth = fUrl.GetUser();
1376  if (strlen(fUrl.GetPasswd())) {
1377  auth += ":";
1378  auth += fUrl.GetPasswd();
1379  }
1380  msg += "Authorization: Basic ";
1381  msg += TBase64::Encode(auth);
1382  msg += "\r\n";
1383  }
1384  return msg;
1385 }
1386 
1387 ////////////////////////////////////////////////////////////////////////////////
1388 /// Static method setting global proxy URL.
1389 
1390 void TWebFile::SetProxy(const char *proxy)
1391 {
1392  if (proxy && *proxy) {
1393  TUrl p(proxy);
1394  if (strcmp(p.GetProtocol(), "http")) {
1395  :: Error("TWebFile::SetProxy", "protocol must be HTTP in proxy URL %s",
1396  proxy);
1397  return;
1398  }
1399  fgProxy = p;
1400  }
1401 }
1402 
1403 ////////////////////////////////////////////////////////////////////////////////
1404 /// Static method returning the global proxy URL.
1405 
1406 const char *TWebFile::GetProxy()
1407 {
1408  if (fgProxy.IsValid())
1409  return fgProxy.GetUrl();
1410  return "";
1411 }
1412 
1413 ////////////////////////////////////////////////////////////////////////////////
1414 /// Process the HTTP header in the argument. This method is intended to be
1415 /// overwritten by subclasses that exploit the information contained in the
1416 /// HTTP headers.
1417 
1419 {
1420 }
1421 
1422 ////////////////////////////////////////////////////////////////////////////////
1423 /// Static method returning maxmimal size of full cache,
1424 /// which can be preserved by file instance
1425 
1427 {
1428  return fgMaxFullCacheSize;
1429 }
1430 
1431 ////////////////////////////////////////////////////////////////////////////////
1432 /// Static method, set maxmimal size of full cache,
1433 // which can be preserved by file instance
1434 
1436 {
1437  fgMaxFullCacheSize = sz;
1438 }
1439 
1440 
1441 ////////////////////////////////////////////////////////////////////////////////
1442 /// Create helper class that allows directory access via httpd.
1443 /// The name must start with '-' to bypass the TSystem singleton check.
1444 
1445 TWebSystem::TWebSystem() : TSystem("-http", "HTTP Helper System")
1446 {
1447  SetName("http");
1448 
1449  fDirp = 0;
1450 }
1451 
1452 ////////////////////////////////////////////////////////////////////////////////
1453 /// Make a directory via httpd. Not supported.
1454 
1456 {
1457  return -1;
1458 }
1459 
1460 ////////////////////////////////////////////////////////////////////////////////
1461 /// Open a directory via httpd. Returns an opaque pointer to a dir
1462 /// structure. Returns 0 in case of error.
1463 
1464 void *TWebSystem::OpenDirectory(const char *)
1465 {
1466  if (fDirp) {
1467  Error("OpenDirectory", "invalid directory pointer (should never happen)");
1468  fDirp = 0;
1469  }
1470 
1471  fDirp = 0; // not implemented for the time being
1472 
1473  return fDirp;
1474 }
1475 
1476 ////////////////////////////////////////////////////////////////////////////////
1477 /// Free directory via httpd.
1478 
1480 {
1481  if (dirp != fDirp) {
1482  Error("FreeDirectory", "invalid directory pointer (should never happen)");
1483  return;
1484  }
1485 
1486  fDirp = 0;
1487 }
1488 
1489 ////////////////////////////////////////////////////////////////////////////////
1490 /// Get directory entry via httpd. Returns 0 in case no more entries.
1491 
1492 const char *TWebSystem::GetDirEntry(void *dirp)
1493 {
1494  if (dirp != fDirp) {
1495  Error("GetDirEntry", "invalid directory pointer (should never happen)");
1496  return 0;
1497  }
1498 
1499  return 0;
1500 }
1501 
1502 ////////////////////////////////////////////////////////////////////////////////
1503 /// Get info about a file. Info is returned in the form of a FileStat_t
1504 /// structure (see TSystem.h).
1505 /// The function returns 0 in case of success and 1 if the file could
1506 /// not be stat'ed.
1507 
1509 {
1510  TWebFile *f = new TWebFile(path, "HEADONLY");
1511 
1512  if (f->fWritten == 0) {
1513 
1514  buf.fDev = 0;
1515  buf.fIno = 0;
1516  buf.fMode = 0;
1517  buf.fUid = 0;
1518  buf.fGid = 0;
1519  buf.fSize = f->GetSize();
1520  buf.fMtime = 0;
1521  buf.fIsLink = kFALSE;
1522 
1523  delete f;
1524  return 0;
1525  }
1526 
1527  delete f;
1528  return 1;
1529 }
1530 
1531 ////////////////////////////////////////////////////////////////////////////////
1532 /// Returns FALSE if one can access a file using the specified access mode.
1533 /// Mode is the same as for the Unix access(2) function.
1534 /// Attention, bizarre convention of return value!!
1535 
1537 {
1538  TWebFile *f = new TWebFile(path, "HEADONLY");
1539  if (f->fWritten == 0) {
1540  delete f;
1541  return kFALSE;
1542  }
1543  delete f;
1544  return kTRUE;
1545 }
1546 
1547 ////////////////////////////////////////////////////////////////////////////////
1548 /// Unlink, i.e. remove, a file or directory. Returns 0 when successful,
1549 /// -1 in case of failure. Not supported for httpd.
1550 
1552 {
1553  return -1;
1554 }
for(Int_t i=0;i< n;i++)
Definition: legend1.C:18
virtual Bool_t ReadBuffer10(char *buf, Int_t len)
Read specified byte range from remote file via HTTP 1.0 daemon (without mod-root installed).
Definition: TWebFile.cxx:460
Int_t fSocket
Definition: TSocket.h:88
virtual Bool_t IsValid() const
Definition: TSocket.h:151
static Long64_t GetMaxFullCacheSize()
Static method returning maxmimal size of full cache, which can be preserved by file instance...
Definition: TWebFile.cxx:1426
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
Definition: TObject.cxx:854
long long Long64_t
Definition: RtypesCore.h:69
static std::atomic< Long64_t > fgBytesRead
Number of bytes read by all TFile objects.
Definition: TFile.h:122
TLine * line
virtual TString BasicAuthentication()
Return basic authentication scheme, to be added to the request.
Definition: TWebFile.cxx:1371
void ws()
Definition: ws.C:62
const char Option_t
Definition: RtypesCore.h:62
virtual void SetOffset(Long64_t offset, ERelativeTo pos=kBeg)
Set position from where to start reading.
Definition: TFile.cxx:2129
This class represents a WWW compatible URL.
Definition: TUrl.h:35
Int_t fUid
Definition: TSystem.h:129
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:638
const char * GetProtocol() const
Definition: TUrl.h:67
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition: TNamed.cxx:140
Long64_t fFullCacheSize
complete content of the file, some http server may return complete content
Definition: TWebFile.h:55
void SetUrl(const char *url, Bool_t defaultIsFile=kFALSE)
Parse url character string and split in its different subcomponents.
Definition: TUrl.cxx:110
Int_t MakeDirectory(const char *name)
Make a directory via httpd. Not supported.
Definition: TWebFile.cxx:1455
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
virtual Int_t GetHunk(TSocket *s, char *hunk, Int_t maxsize)
Read a hunk of data from the socket, up until a terminator.
Definition: TWebFile.cxx:1258
static void SetFileReadCalls(Int_t readcalls=0)
Definition: TFile.cxx:4458
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1112
virtual void SetMsgReadBuffer10(const char *redirectLocation=0, Bool_t tempRedirect=kFALSE)
Set GET command for use by ReadBuffer(s)10(), handle redirection if needed.
Definition: TWebFile.cxx:266
#define gROOT
Definition: TROOT.h:402
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:585
virtual Int_t GetFromCache(char *buf, Int_t len, Int_t nseg, Long64_t *seg_pos, Int_t *seg_len)
Extract requested segments from the cached content.
Definition: TWebFile.cxx:582
static TUrl fgProxy
size of the cached content
Definition: TWebFile.h:57
Basic string class.
Definition: TString.h:125
TUrl fUrlOrg
Definition: TWebFile.h:52
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
virtual Int_t GetFromWeb10(char *buf, Int_t len, const TString &msg, Int_t nseg=0, Long64_t *seg_pos=0, Int_t *seg_len=0)
Read multiple byte range request from web server.
Definition: TWebFile.cxx:674
TString fMsgReadBuffer
Definition: TWebFile.h:48
const char * GetOptions() const
Definition: TUrl.h:74
Long_t fMtime
Definition: TSystem.h:132
ERelativeTo
Definition: TFile.h:171
Int_t fReadCalls
Number of read calls ( not counting the cache calls )
Definition: TFile.h:82
TString fUrl
Definition: TUrl.h:38
#define malloc
Definition: civetweb.c:818
Long64_t fSize
Definition: TSystem.h:131
static void SetProxy(const char *url)
Static method setting global proxy URL.
Definition: TWebFile.cxx:1390
Bool_t IsValid() const
Definition: TUrl.h:82
virtual void ProcessHttpHeader(const TString &headerLine)
Process the HTTP header in the argument.
Definition: TWebFile.cxx:1418
const char * GetUrl(Bool_t withDeflt=kFALSE) const
Return full URL.
Definition: TUrl.cxx:387
static void SetMaxFullCacheSize(Long64_t sz)
Static method, set maxmimal size of full cache,.
Definition: TWebFile.cxx:1435
virtual Int_t GetLine(TSocket *s, char *line, Int_t maxsize)
Read a line from the socket.
Definition: TWebFile.cxx:1195
static Int_t GetErrno()
Static function returning system error number.
Definition: TSystem.cxx:268
Int_t fMode
Definition: TSystem.h:128
TUrl fProxy
Definition: TWebFile.h:44
const char * GetFile() const
Definition: TUrl.h:72
static const char * GetProxy()
Static method returning the global proxy URL.
Definition: TWebFile.cxx:1406
static TString Encode(const char *data)
Transform data into a null terminated base64 string.
Definition: TBase64.cxx:115
virtual Int_t SendRaw(const void *buffer, Int_t length, ESendRecvOptions opt=kDefault)
Send a raw buffer of specified length.
Definition: TSocket.cxx:625
const char * GetHost() const
Definition: TUrl.h:70
virtual const char * HttpTerminator(const char *start, const char *peeked, Int_t peeklen)
Determine whether [START, PEEKED + PEEKLEN) contains an HTTP new line [\r]\n.
Definition: TWebFile.cxx:1338
TUrl fUrl
!URL of file
Definition: TFile.h:103
Int_t fD
File descriptor.
Definition: TFile.h:75
virtual void Sleep(UInt_t milliSec)
Sleep milliSec milli seconds.
Definition: TSystem.cxx:445
Bool_t fHasModRoot
Definition: TWebFile.h:45
Bool_t fNoProxy
Definition: TWebFile.h:47
virtual Bool_t IsOpen() const
A TWebFile that has been correctly constructed is always considered open.
Definition: TWebFile.cxx:378
static Long64_t GetFileBytesRead()
Static function returning the total number of bytes read from all files.
Definition: TFile.cxx:4418
virtual const char * Getenv(const char *env)
Get environment variable.
Definition: TSystem.cxx:1638
const char * GetDirEntry(void *dirp)
Get directory entry via httpd. Returns 0 in case no more entries.
Definition: TWebFile.cxx:1492
virtual Bool_t ReadBuffers(char *buf, Long64_t *pos, Int_t *len, Int_t nbuf)
Read specified byte ranges from remote file via HTTP daemon.
Definition: TWebFile.cxx:499
Long64_t Atoll() const
Return long long value of string.
Definition: TString.cxx:2001
static const char * gUserAgent
Definition: TWebFile.cxx:46
void Error(const char *location, const char *msgfmt,...)
Int_t fGid
Definition: TSystem.h:130
R__ALWAYS_INLINE Bool_t IsZombie() const
Definition: TObject.h:134
const char * GetUser() const
Definition: TUrl.h:68
TString fBasicUrlOrg
Definition: TWebFile.h:53
const char * GetPasswd() const
Definition: TUrl.h:69
void * fFullCache
Definition: TWebFile.h:54
virtual Bool_t ReadBuffer(char *buf, Int_t len)
Read specified byte range from remote file via HTTP daemon.
Definition: TWebFile.cxx:411
ROOT::R::TRInterface & r
Definition: Object.C:4
Bool_t fIsLink
Definition: TSystem.h:133
Long64_t fEND
Last used byte in file.
Definition: TFile.h:72
R__EXTERN TSystem * gSystem
Definition: TSystem.h:540
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:561
TString fBasicUrl
Definition: TWebFile.h:51
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:880
Ssiz_t Length() const
Definition: TString.h:386
Definition: TSocket.h:52
Int_t Unlink(const char *path)
Unlink, i.e.
Definition: TWebFile.cxx:1551
TWebFile()
Definition: TWebFile.h:39
static std::atomic< Int_t > fgReadCalls
Number of bytes read from all TFile objects.
Definition: TFile.h:124
static Int_t GetFileReadCalls()
Static function returning the total number of read calls from all files.
Definition: TFile.cxx:4435
virtual void Init(Bool_t create)
Initialize a TFile object.
Definition: TFile.cxx:596
Int_t ReadBufferViaCache(char *buf, Int_t len)
Read buffer via cache.
Definition: TFile.cxx:1766
virtual void Seek(Long64_t offset, ERelativeTo pos=kBeg)
Set position from where to start reading.
Definition: TWebFile.cxx:972
#define gPerfStats
const Bool_t kFALSE
Definition: RtypesCore.h:88
void FreeDirectory(void *dirp)
Free directory via httpd.
Definition: TWebFile.cxx:1479
virtual void CheckProxy()
Check if shell var "http_proxy" has been set and should be used.
Definition: TWebFile.cxx:351
static Long64_t fgMaxFullCacheSize
Definition: TWebFile.h:58
#define ClassImp(name)
Definition: Rtypes.h:359
virtual ~TWebFile()
Cleanup.
Definition: TWebFile.cxx:197
double Double_t
Definition: RtypesCore.h:55
#define free
Definition: civetweb.c:821
The TTimeStamp encapsulates seconds and ns since EPOCH.
Definition: TTimeStamp.h:71
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:570
static constexpr double s
EAccessMode
Definition: TSystem.h:44
Int_t GetPort() const
Definition: TUrl.h:81
Bool_t IsNull() const
Definition: TString.h:383
TString fMsgReadBuffer10
Definition: TWebFile.h:49
Bool_t fIsRootFile
!True is this is a ROOT file, raw file otherwise
Definition: TFile.h:97
TString fMsgGetHead
Definition: TWebFile.h:50
void * OpenDirectory(const char *name)
Open a directory via httpd.
Definition: TWebFile.cxx:1464
Int_t GetPathInfo(const char *path, FileStat_t &buf)
Get info about a file.
Definition: TWebFile.cxx:1508
Long64_t fSize
Definition: TWebFile.h:42
friend class TWebSocket
Definition: TWebFile.h:35
void MakeZombie()
Definition: TObject.h:49
const Int_t kMaxInt
Definition: RtypesCore.h:99
Long_t fIno
Definition: TSystem.h:127
virtual Int_t GetHead()
Get the HTTP header.
Definition: TWebFile.cxx:1028
virtual void Init(Bool_t readHeadOnly)
Initialize a TWebFile object.
Definition: TWebFile.cxx:210
R__EXTERN Int_t gDebug
Definition: Rtypes.h:86
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1975
Bool_t fHTTP11
Definition: TWebFile.h:46
Long64_t fArchiveOffset
!Offset at which file starts in archive
Definition: TFile.h:94
#define gDirectory
Definition: TDirectory.h:213
TWebSystem()
Create helper class that allows directory access via httpd.
Definition: TWebFile.cxx:1445
Definition: first.py:1
Int_t fWritten
Number of objects written so far.
Definition: TFile.h:80
Long64_t fOffset
!Seek offset cache
Definition: TFile.h:89
Long_t fDev
Definition: TSystem.h:126
static void SetFileBytesRead(Long64_t bytes=0)
Definition: TFile.cxx:4452
Abstract base class defining a generic interface to the underlying Operating System.
Definition: TSystem.h:248
virtual Int_t RecvRaw(void *buffer, Int_t length, ESendRecvOptions opt=kDefault)
Receive a raw buffer of specified length bytes.
Definition: TSocket.cxx:902
virtual Long64_t GetSize() const
Return maximum file size.
Definition: TWebFile.cxx:993
const Bool_t kTRUE
Definition: RtypesCore.h:87
void * fDirp
Definition: TWebFile.h:101
const Int_t n
Definition: legend1.C:16
virtual Bool_t ReadBuffers10(char *buf, Long64_t *pos, Int_t *len, Int_t nbuf)
Read specified byte ranges from remote file via HTTP 1.0 daemon (without mod-root installed)...
Definition: TWebFile.cxx:548
Long64_t fBytesRead
Number of bytes read from this file.
Definition: TFile.h:69
TSocket * fSocket
Definition: TWebFile.h:43
virtual Int_t GetFromWeb(char *buf, Int_t len, const TString &msg)
Read request from web server.
Definition: TWebFile.cxx:607
const char * cnt
Definition: TXMLSetup.cxx:74
Bool_t AccessPathName(const char *path, EAccessMode mode)
Returns FALSE if one can access a file using the specified access mode.
Definition: TWebFile.cxx:1536
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:866
virtual Int_t ReOpen(Option_t *mode)
Reopen a file with a different access mode, like from READ to UPDATE or from NEW, CREATE...
Definition: TWebFile.cxx:392
const char * Data() const
Definition: TString.h:345