Logo ROOT  
Reference Guide
TUri.cxx
Go to the documentation of this file.
1// @(#)root/base:$Id$
2// Author: Gerhard E. Bruckner 15/07/07
3
4/*************************************************************************
5 * Copyright (C) 1995-2007, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/** \class TUri
13\ingroup Base
14
15This class represents a RFC 3986 compatible URI.
16See http://rfc.net/rfc3986.html.
17It provides member functions to set and return the different
18the different parts of an URI. The functionality is that of
19a validating parser.
20*/
21
22#include <ctype.h> // for tolower()
23#include "TUri.h"
24#include "TObjArray.h"
25#include "TObjString.h"
26#include "TPRegexp.h"
27
28//RFC3986:
29// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
30const char* const kURI_pchar = "(?:[[:alpha:][:digit:]-._~!$&'()*+,;=:@]|%[0-9A-Fa-f][0-9A-Fa-f])";
31
32//unreserved characters, see chapter 2.3
33const char* const kURI_unreserved = "[[:alpha:][:digit:]-._~]";
34
35// reserved characters, see chapter
36// reserved = gen-delims / sub-delims
37//const char* const kURI_reserved = "[:/?#[]@!$&'()*+,;=]";
38
39// gen-delims, see chapter 2.2
40// delimiters of the generic URI components
41//const char* const kURI_gendelims = "[:/?#[]@]";
42
43// sub-delims, see chapter 2.2
44//const char* const kURI_subdelims = "[!$&'()*+,;=]";
45
46
48
49////////////////////////////////////////////////////////////////////////////////
50/// Constructor that calls SetUri with a complete URI.
51
53{
54 SetUri(uri);
55}
56
57////////////////////////////////////////////////////////////////////////////////
58/// Constructor that calls SetUri with a complete URI.
59
60TUri::TUri(const char *uri)
61{
62 SetUri(uri);
63}
64
65////////////////////////////////////////////////////////////////////////////////
66/// TUri copy ctor.
67
68TUri::TUri(const TUri &uri) : TObject(uri)
69{
70 fScheme = uri.fScheme;
71 fUserinfo = uri.fUserinfo;
72 fHost = uri.fHost;
73 fPort = uri.fPort;
74 fPath = uri.fPath;
75 fQuery = uri.fQuery;
76 fFragment = uri.fFragment;
79 fHasHost = uri.fHasHost;
80 fHasPort = uri.fHasPort;
81 fHasPath = uri.fHasPath;
82 fHasQuery = uri.fHasQuery;
84}
85
86////////////////////////////////////////////////////////////////////////////////
87/// TUri assignment operator.
88
90{
91 if (this != &rhs) {
93 fScheme = rhs.fScheme;
94 fUserinfo = rhs.fUserinfo;
95 fHost = rhs.fHost;
96 fPort = rhs.fPort;
97 fPath = rhs.fPath;
98 fQuery = rhs.fQuery;
99 fFragment = rhs.fFragment;
102 fHasHost = rhs.fHasHost;
103 fHasPort = rhs.fHasPort;
104 fHasPath = rhs.fHasPath;
105 fHasQuery = rhs.fHasQuery;
107 }
108 return *this;
109}
110
111////////////////////////////////////////////////////////////////////////////////
112/// Implementation of a TUri Equivalence operator
113/// that uses syntax-based normalisation
114/// see chapter 6.2.2.
115
116Bool_t operator== (const TUri &u1, const TUri &u2)
117{
118 // make temporary copies of the operands
119 TUri u11 = u1;
120 TUri u22 = u2;
121 // normalise them
122 u11.Normalise();
123 u22.Normalise();
124 // compare them as TStrings
125 return u11.GetUri() == u22.GetUri();
126}
127
128////////////////////////////////////////////////////////////////////////////////
129/// Returns the whole URI -
130/// an implementation of chapter 5.3 component recomposition.
131/// The result URI is composed out of the five basic parts.
132/// ~~~ {.cpp}
133/// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
134/// hier-part = "//" authority path-abempty
135/// / path-absolute
136/// / path-rootless
137/// / path-empty
138/// ~~~
139
141{
142 TString result = "";
143 if (fHasScheme)
144 result = fScheme + ":";
145 result += GetHierPart();
146 if (fHasQuery)
147 result += TString("?") + fQuery;
148 if (fHasFragment)
149 result += TString("#") + fFragment;
150 return result;
151}
152
153////////////////////////////////////////////////////////////////////////////////
154/// This functions implements the "remove_dot_segments" routine
155/// of chapter 5.2.4 "for interpreting and removing the
156/// special '.' and '..' complete path segments from a
157/// referenced path".
158
160{
161 TString source = inp;
162 TString sink = TString(""); // sink buffer
163
164 // Step 2 "While the source buffer is not empty, loop as follows:"
165 while (source.Length() > 0) {
166 // Rule 2.A
167 if (TPRegexp("^\\.\\.?/(.*)$").Substitute(source, "/$1") > 0)
168 continue;
169
170 // Rule 2.B
171 if (TPRegexp("^/\\./(.*)$|^/\\.($)").Substitute(source, "/$1") > 0)
172 continue;
173
174 // Rule 2.C
175 if (TPRegexp("^/\\.\\./(.*)$|^/\\.\\.($)").Substitute(source, "/$1") > 0) {
176 Ssiz_t last = sink.Last('/');
177 if (last == -1)
178 last = 0;
179 sink.Remove(last, sink.Length() - last);
180 continue;
181 }
182
183 // Rule 2.D
184 if (source.CompareTo(".") == 0 || source.CompareTo("..") == 0) {
185 source.Remove(0, source.Length() - 11);
186 continue;
187 }
188
189 // Rule 2.E
190 TPRegexp regexp = TPRegexp("^(/?[^/]*)(?:/|$)");
191 TObjArray *tokens = regexp.MatchS(source);
192 TString segment = ((TObjString*) tokens->At(1))->GetString();
193 sink += segment;
194 source.Remove(0, segment.Length());
195 delete tokens;
196 }
197
198 // Step 3: return sink buffer
199 return sink;
200}
201
202////////////////////////////////////////////////////////////////////////////////
203/// Returns kTRUE if instance qualifies as absolute-URI
204/// absolute-URI = scheme ":" hier-part [ "?" query ]
205/// cf. Appendix A.
206
208{
209 return (HasScheme() && HasHierPart() && !HasFragment());
210}
211
212////////////////////////////////////////////////////////////////////////////////
213/// Returns kTRUE if instance qualifies as relative-ref
214/// relative-ref = relative-part [ "?" query ] [ "#" fragment ]
215/// cf. Appendix A.
216
218{
219 return (!HasScheme() && HasRelativePart());
220}
221
222////////////////////////////////////////////////////////////////////////////////
223/// Returns kTRUE if instance qualifies as URI
224/// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
225/// cf. Appendix A.
226
228{
229 return (HasScheme() && HasHierPart());
230}
231
232////////////////////////////////////////////////////////////////////////////////
233/// Returns kTRUE if instance qualifies as URI-reference
234/// URI-reference = URI / relative-ref
235/// cf. Appendix A.
236
238{
239 return (IsUri() || IsRelative());
240}
241
242////////////////////////////////////////////////////////////////////////////////
243/// Set scheme component of URI:
244/// ~~~ {.cpp}
245/// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
246/// ~~~
247
249{
250 if (!scheme) {
252 return kTRUE;
253 }
254 if (IsScheme(scheme)) {
255 fScheme = scheme;
257 return kTRUE;
258 } else {
259 Error("SetScheme", "<scheme> component \"%s\" of URI is not compliant with RFC 3986.", scheme.Data());
260 return kFALSE;
261 }
262}
263
264////////////////////////////////////////////////////////////////////////////////
265/// Returns kTRUE if string qualifies as URI scheme:
266/// ~~~ {.cpp}
267/// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
268/// ~~~
269
271{
272 return TPRegexp(
273 "^[[:alpha:]][[:alpha:][:digit:]+-.]*$"
274 ).Match(string);
275}
276
277////////////////////////////////////////////////////////////////////////////////
278/// Returns the authority part of the instance:
279/// ~~~ {.cpp}
280/// authority = [ userinfo "@" ] host [ ":" port ]
281/// ~~~
282
284{
285 TString authority = fHasUserinfo ? fUserinfo + "@" + fHost : fHost;
286 if (fHasPort && !fPort.IsNull())
287 // add port only if not empty
288 authority += TString(":") + TString(fPort);
289 return (authority);
290}
291
292////////////////////////////////////////////////////////////////////////////////
293/// Set query component of URI:
294/// ~~~ {.cpp}
295/// query = *( pchar / "/" / "?" )
296/// ~~~
297
299{
300 if (!query) {
302 return kTRUE;
303 }
304 if (IsQuery(query)) {
305 fQuery = query;
307 return kTRUE;
308 } else {
309 Error("SetQuery", "<query> component \"%s\" of URI is not compliant with RFC 3986.", query.Data());
310 return kFALSE;
311 }
312}
313
314////////////////////////////////////////////////////////////////////////////////
315/// Returns kTRUE if string qualifies as URI query:
316/// ~~~ {.cpp}
317/// query = *( pchar / "/" / "?" )
318/// ~~~
319
321{
322 return TPRegexp(
323 TString("^([/?]|") + kURI_pchar + ")*$"
324 ).Match(string);
325}
326
327////////////////////////////////////////////////////////////////////////////////
328/// Set authority part of URI:
329/// ~~~ {.cpp}
330/// authority = [ userinfo "@" ] host [ ":" port ]
331/// ~~~
332///
333/// Split into components {userinfo@, host, :port},
334/// remember that according to the RFC, it is necessary to
335/// distinguish between missing component (no delimiter)
336/// and empty component (delimiter present).
337
339{
340 if (authority.IsNull()) {
344 return kTRUE;
345 }
346 TPRegexp regexp = TPRegexp("^(?:(.*@))?([^:]*)((?::.*)?)$");
347 TObjArray *tokens = regexp.MatchS(authority);
348
349 if (tokens->GetEntries() != 4) {
350 Error("SetAuthority", "<authority> component \"%s\" of URI is not compliant with RFC 3986.", authority.Data());
351 return kFALSE;
352 }
353
354 Bool_t valid = kTRUE;
355
356 // handle userinfo
357 TString userinfo = ((TObjString*) tokens->At(1))->GetString();
358 if (userinfo.EndsWith("@")) {
359 userinfo.Remove(TString::kTrailing, '@');
360 valid &= SetUserInfo(userinfo);
361 }
362
363 // handle host
364 TString host = ((TObjString*) tokens->At(2))->GetString();
365 valid &= SetHost(host);
366
367 // handle port
368 TString port = ((TObjString*) tokens->At(3))->GetString();
369 if (port.BeginsWith(":")) {
370 port.Remove(TString::kLeading, ':');
371 valid &= SetPort(port);
372 }
373
374 return valid;
375}
376
377////////////////////////////////////////////////////////////////////////////////
378/// Returns kTRUE if string qualifies as valid URI authority:
379/// ~~~ {.cpp}
380/// authority = [ userinfo "@" ] host [ ":" port ]
381/// ~~~
382
384{
385 // split into parts {userinfo, host, port}
386 TPRegexp regexp = TPRegexp("^(?:(.*)@)?([^:]*)(?::(.*))?$");
387 TObjArray *tokens = regexp.MatchS(string);
388 TString userinfo = ((TObjString*) tokens->At(1))->GetString();
389 TString host = ((TObjString*) tokens->At(2))->GetString();
390 TString port;
391 // port is optional
392 if (tokens->GetEntries() == 4)
393 port = ((TObjString*) tokens->At(3))->GetString();
394 else
395 port = "";
396 return (IsHost(host) && IsUserInfo(userinfo) && IsPort(port));
397}
398
399////////////////////////////////////////////////////////////////////////////////
400/// Set userinfo component of URI:
401/// ~~~ {.cpp}
402/// userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
403/// ~~~
404
406{
407 if (userinfo.IsNull()) {
409 return kTRUE;
410 }
411 if (IsUserInfo(userinfo)) {
412 fUserinfo = userinfo;
414 return kTRUE;
415 } else {
416 Error("SetUserInfo", "<userinfo> component \"%s\" of URI is not compliant with RFC 3986.", userinfo.Data());
417 return kFALSE;
418 }
419}
420
421////////////////////////////////////////////////////////////////////////////////
422/// Return kTRUE is string qualifies as valid URI userinfo:
423/// ~~~ {.cpp}
424/// userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
425/// ~~~
426/// this equals to pchar without the '@' character
427
429{
430 return (TPRegexp(
431 "^" + TString(kURI_pchar) + "*$"
432 ).Match(string) > 0 && !TString(string).Contains("@"));
433}
434
435////////////////////////////////////////////////////////////////////////////////
436/// Set host component of URI:
437/// ~~~ {.cpp}
438/// RFC 3986: host = IP-literal / IPv4address / reg-name
439/// implemented: host = IPv4address / reg-name
440/// ~~~
441
443{
444 if (IsHost(host)) {
445 fHost = host;
446 fHasHost = kTRUE;
447 return kTRUE;
448 } else {
449 Error("SetHost", "<host> component \"%s\" of URI is not compliant with RFC 3986.", host.Data());
450 return kFALSE;
451 }
452}
453
454////////////////////////////////////////////////////////////////////////////////
455/// Set port component of URI:
456/// ~~~ {.cpp}
457/// port = *DIGIT
458/// ~~~
459
461{
462 if (IsPort(port)) {
463 fPort = port;
464 fHasPort = kTRUE;
465 return kTRUE;
466 }
467 Error("SetPort", "<port> component \"%s\" of URI is not compliant with RFC 3986.", port.Data());
468 return kFALSE;
469}
470
471////////////////////////////////////////////////////////////////////////////////
472/// Set path component of URI:
473/// ~~~ {.cpp}
474/// path = path-abempty ; begins with "/" or is empty
475/// / path-absolute ; begins with "/" but not "//"
476/// / path-noscheme ; begins with a non-colon segment
477/// / path-rootless ; begins with a segment
478/// / path-empty ; zero characters
479/// ~~~
480
482{
483 if (IsPath(path)) {
484 fPath = path;
485 fHasPath = kTRUE;
486 return kTRUE;
487 }
488 Error("SetPath", "<path> component \"%s\" of URI is not compliant with RFC 3986.", path.Data());
489 return kFALSE;
490}
491
492////////////////////////////////////////////////////////////////////////////////
493/// Set fragment component of URI:
494/// ~~~ {.cpp}
495/// fragment = *( pchar / "/" / "?" )
496/// ~~~
497
499{
500 if (IsFragment(fragment)) {
501 fFragment = fragment;
503 return kTRUE;
504 } else {
505 Error("SetFragment", "<fragment> component \"%s\" of URI is not compliant with RFC 3986.", fragment.Data());
506 return kFALSE;
507 }
508}
509
510////////////////////////////////////////////////////////////////////////////////
511/// Returns kTRUE if string qualifies as valid fragment component
512/// ~~~ {.cpp}
513/// fragment = *( pchar / "/" / "?" )
514/// ~~~
515
517{
518 return (TPRegexp(
519 "^(" + TString(kURI_pchar) + "|[/?])*$"
520 ).Match(string) > 0);
521}
522
523////////////////////////////////////////////////////////////////////////////////
524/// Display function,
525/// - option "d" .. debug output
526/// - anything else .. simply print URI.
527
528void TUri::Print(Option_t *option) const
529{
530 if (strcmp(option, "d") != 0) {
531 Printf("%s", GetUri().Data());
532 return ;
533 }
534 // debug output
535 Printf("URI: <%s>", GetUri().Data());
536 Printf("(%c) |--scheme---------<%s>", fHasScheme ? 't' : 'f', fScheme.Data());
537 Printf(" |--hier-----------<%s>", GetHierPart().Data());
538 Printf("(%c) |--authority------<%s>", HasAuthority() ? 't' : 'f', GetAuthority().Data());
539 Printf("(%c) |--userinfo---<%s>", fHasUserinfo ? 't' : 'f', fUserinfo.Data());
540 Printf("(%c) |--host-------<%s>", fHasHost ? 't' : 'f', fHost.Data());
541 Printf("(%c) |--port-------<%s>", fHasPort ? 't' : 'f', fPort.Data());
542 Printf("(%c) |--path-------<%s>", fHasPath ? 't' : 'f', fPath.Data());
543 Printf("(%c) |--query------<%s>", fHasQuery ? 't' : 'f', fQuery.Data());
544 Printf("(%c) |--fragment---<%s>", fHasFragment ? 't' : 'f', fFragment.Data());
545 printf("path flags: ");
546 if (IsPathAbempty(fPath))
547 printf("abempty ");
549 printf("absolute ");
551 printf("rootless ");
552 if (IsPathEmpty(fPath))
553 printf("empty ");
554 printf("\nURI flags: ");
555 if (IsAbsolute())
556 printf("absolute-URI ");
557 if (IsRelative())
558 printf("relative-ref ");
559 if (IsUri())
560 printf("URI ");
561 if (IsReference())
562 printf("URI-reference ");
563 printf("\n");
564}
565
566////////////////////////////////////////////////////////////////////////////////
567/// Initialize this URI object.
568/// Set all TString members to empty string,
569/// set all Bool_t members to kFALSE.
570
572{
573 fScheme = "";
574 fUserinfo = "";
575 fHost = "";
576 fPort = "";
577 fPath = "";
578 fQuery = "";
579 fFragment = "";
580
588}
589
590////////////////////////////////////////////////////////////////////////////////
591/// Parse URI and set the member variables accordingly,
592/// returns kTRUE if URI validates, and kFALSE otherwise:
593/// ~~~ {.cpp}
594/// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
595/// hier-part = "//" authority path-abempty
596/// / path-absolute
597/// / path-rootless
598/// / path-empty
599/// ~~~
600
602{
603 // Reset member variables
604 Reset();
605
606 // regular expression taken from appendix B
607 // reference points 12 3 4 5 6 7 8 9
608 TPRegexp regexp = TPRegexp("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)([?]([^#]*))?(#(.*))?");
609 TObjArray *tokens = regexp.MatchS(uri);
610
611 // collect bool values to see if all setters succeed
612 Bool_t valid = kTRUE;
613 //tokens->Print();
614 switch (tokens->GetEntries()) {
615 case 10:
616 // URI contains fragment delimiter '#'
617 valid &= SetFragment(((TObjString*) tokens->At(9))->GetString());
618 // fallthrough
619
620 case 8:
621 // URI does not contain a fragment delimiter
622 // if there is a query delimiter '?', set query
623 if (!((TString)((TObjString*) tokens->At(6))->GetString()).IsNull())
624 valid &= SetQuery(((TObjString*) tokens->At(7))->GetString());
625 // fallthrough
626
627 case 6:
628 // URI does not contain fragment or query delimiters
629 valid &= SetPath(((TObjString*) tokens->At(5))->GetString());
630 // if there is an authority delimiter '//', set authority
631 if (!((TString)((TObjString*) tokens->At(3))->GetString()).IsNull())
632 valid &= SetAuthority(((TObjString*) tokens->At(4))->GetString());
633 // if there is a scheme delimiter ':', set scheme
634 if (!((TString)((TObjString*) tokens->At(1))->GetString()).IsNull())
635 valid &= SetScheme(((TObjString*) tokens->At(2))->GetString());
636 break;
637
638 default:
639 // regular expression did not match
640 Error("SetUri", "URI \"%s\" is not is not compliant with RFC 3986.", uri.Data());
641 valid = kFALSE;
642 }
643
644 // reset member variables once again, if one at least setter failed
645 if (!valid)
646 Reset();
647
648 delete tokens;
649 return valid;
650}
651
652////////////////////////////////////////////////////////////////////////////////
653/// ~~~ {.cpp}
654/// hier-part = "//" authority path-abempty
655/// / path-absolute
656/// / path-rootless
657/// / path-empty
658/// ~~~
659
661{
663 return (TString("//") + GetAuthority() + fPath);
664 else
665 return fPath;
666}
667
668////////////////////////////////////////////////////////////////////////////////
669/// relative-part = "//" authority path-abempty
670/// ~~~ {.cpp}
671/// / path-absolute
672/// / path-noscheme
673/// / path-empty
674/// ~~~
675
677{
679 return (TString("//") + GetAuthority() + fPath);
680 else
681 return fPath;
682}
683
684////////////////////////////////////////////////////////////////////////////////
685/// returns hier-part component of URI
686/// ~~~ {.cpp}
687/// hier-part = "//" authority path-abempty
688/// / path-absolute
689/// / path-rootless
690/// / path-empty
691/// ~~~
692
694{
695 /* if ( IsPathAbsolute(hier) || IsPathRootless(hier) || IsPathEmpty(hier) ) {
696 SetPath (hier);
697 return kTRUE;
698 }
699 */
700
701 // reference points: 1 2 3
702 TPRegexp regexp = TPRegexp("^(//([^/?#]*))?([^?#]*)$");
703 TObjArray *tokens = regexp.MatchS(hier);
704
705 if (tokens->GetEntries() == 0) {
706 Error("SetHierPart", "<hier-part> component \"%s\" of URI is not compliant with RFC 3986.", hier.Data());
707 delete tokens;
708 return false;
709 }
710
711 TString delm = ((TObjString*) tokens->At(1))->GetString();
712 TString auth = ((TObjString*) tokens->At(2))->GetString();
713 TString path = ((TObjString*) tokens->At(3))->GetString();
714
715 Bool_t valid = kTRUE;
716
717 if (!delm.IsNull() && IsPathAbempty(path)) {
718 // URI contains an authority delimiter '//' ...
719 valid &= SetAuthority(auth);
720 valid &= SetPath(path);
721 } else {
722 // URI does not contain an authority
723 if (IsPathAbsolute(path) || IsPathRootless(path) || IsPathEmpty(path))
724 valid &= SetPath(path);
725 else {
726 valid = kFALSE;
727 Error("SetHierPart", "<hier-part> component \"%s\" of URI is not compliant with RFC 3986.", hier.Data());
728 }
729 }
730 delete tokens;
731 return valid;
732}
733
734////////////////////////////////////////////////////////////////////////////////
735/// Returns kTRUE if string qualifies as hier-part:
736/// ~~~ {.cpp}
737/// hier-part = "//" authority path-abempty
738/// / path-absolute
739/// / path-rootless
740/// / path-empty
741/// ~~~
742
744{
745 // use functionality of SetHierPart
746 // in order to avoid duplicate code
747 TUri uri;
748 return (uri.SetHierPart(string));
749}
750
751////////////////////////////////////////////////////////////////////////////////
752/// Returns kTRUE is string qualifies as relative-part:
753/// ~~~ {.cpp}
754/// relative-part = "//" authority path-abempty
755/// / path-absolute
756/// / path-noscheme
757/// / path-empty
758/// ~~~
759
761{
762 // use functionality of SetRelativePart
763 // in order to avoid duplicate code
764 TUri uri;
765 return (uri.SetRelativePart(string));
766}
767
768////////////////////////////////////////////////////////////////////////////////
769/// Returns kTRUE is string qualifies as relative-part:
770/// ~~~ {.cpp}
771/// relative-part = "//" authority path-abempty
772/// / path-absolute
773/// / path-noscheme
774/// / path-empty
775/// ~~~
776
778{
779 // reference points: 1 2 3
780 TPRegexp regexp = TPRegexp("^(//([^/?#]*))?([^?#]*)$");
781 TObjArray *tokens = regexp.MatchS(relative);
782
783 if (tokens->GetEntries() == 0) {
784 Error("SetRelativePath", "<relative-part> component \"%s\" of URI is not compliant with RFC 3986.", relative.Data());
785 delete tokens;
786 return false;
787 }
788 TString delm = ((TObjString*) tokens->At(1))->GetString();
789 TString auth = ((TObjString*) tokens->At(2))->GetString();
790 TString path = ((TObjString*) tokens->At(3))->GetString();
791
792 Bool_t valid = kTRUE;
793
794 if (!delm.IsNull() && IsPathAbempty(path)) {
795 // URI contains an authority delimiter '//' ...
796 valid &= SetAuthority(auth);
797 valid &= SetPath(path);
798 } else {
799 // URI does not contain an authority
800 if (IsPathAbsolute(path) || IsPathNoscheme(path) || IsPathEmpty(path))
801 valid &= SetPath(path);
802 else {
803 valid = kFALSE;
804 Error("SetRelativePath", "<relative-part> component \"%s\" of URI is not compliant with RFC 3986.", relative.Data());
805 }
806 }
807 delete tokens;
808 return valid;
809}
810
811////////////////////////////////////////////////////////////////////////////////
812/// Percent-encode and return the given string according to RFC 3986
813/// in principle, this function cannot fail or produce an error.
814
815const TString TUri::PctEncode(const TString &source)
816{
817 TString sink = "";
818 // iterate through source
819 for (Int_t i = 0; i < source.Length(); i++) {
820 if (IsUnreserved(TString(source(i)))) {
821 // unreserved character -> copy
822 sink = sink + source[i];
823 } else {
824 // reserved character -> encode to 2 digit hex
825 // preceded by '%'
826 char buffer[4];
827 sprintf(buffer, "%%%02X", source[i]);
828 sink = sink + buffer;
829 }
830 }
831 return sink;
832}
833
834////////////////////////////////////////////////////////////////////////////////
835/// Returns kTRUE if string qualifies as valid host component:
836/// host = IP-literal / IPv4address / reg-name
837/// implemented: host = IPv4address / reg-name
838
840{
841 return (IsRegName(string) || IsIpv4(string));
842}
843
844////////////////////////////////////////////////////////////////////////////////
845/// Returns kTRUE if string qualifies as valid path component:
846/// ~~~ {.cpp}
847/// path = path-abempty ; begins with "/" or is empty
848/// / path-absolute ; begins with "/" but not "//"
849/// / path-noscheme ; begins with a non-colon segment
850/// / path-rootless ; begins with a segment
851/// / path-empty ; zero characters
852/// ~~~
853
855{
856 return (IsPathAbempty(string) ||
857 IsPathAbsolute(string) ||
858 IsPathNoscheme(string) ||
859 IsPathRootless(string) ||
860 IsPathEmpty(string));
861}
862
863////////////////////////////////////////////////////////////////////////////////
864/// Returns kTRUE if string qualifies as valid path-abempty component:
865/// ~~~ {.cpp}
866/// path-abempty = *( "/" segment )
867/// segment = *pchar
868/// ~~~
869
871{
872 return (TPRegexp(
873 TString("^(/") + TString(kURI_pchar) + "*)*$"
874 ).Match(string) > 0);
875}
876
877////////////////////////////////////////////////////////////////////////////////
878/// Returns kTRUE if string qualifies as valid path-absolute component
879/// ~~~ {.cpp}
880/// path-absolute = "/" [ segment-nz *( "/" segment ) ]
881/// segment-nz = 1*pchar
882/// segment = *pchar
883/// ~~~
884
886{
887 return (TPRegexp(
888 TString("^/(") + TString(kURI_pchar) + "+(/" + TString(kURI_pchar) + "*)*)?$"
889 ).Match(string) > 0);
890}
891
892////////////////////////////////////////////////////////////////////////////////
893/// Returns kTRUE if string qualifies as valid path-noscheme component:
894/// ~~~ {.cpp}
895/// path-noscheme = segment-nz-nc *( "/" segment )
896/// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
897/// segment = *pchar
898/// ~~~
899
901{
902 return (TPRegexp(
903 TString("^(([[:alpha:][:digit:]-._~!$&'()*+,;=@]|%[0-9A-Fa-f][0-9A-Fa-f])+)(/") + TString(kURI_pchar) + "*)*$"
904 ).Match(string) > 0);
905}
906
907////////////////////////////////////////////////////////////////////////////////
908/// Returns kTRUE if string qualifies as valid path-rootless component:
909/// ~~~ {.cpp}
910/// path-rootless = segment-nz *( "/" segment )
911/// ~~~
912
914{
915 return TPRegexp(
916 TString("^") + TString(kURI_pchar) + "+(/" + TString(kURI_pchar) + "*)*$"
917 ).Match(string);
918}
919
920////////////////////////////////////////////////////////////////////////////////
921/// Returns kTRUE if string qualifies as valid path-empty component:
922/// ~~~ {.cpp}
923/// path-empty = 0<pchar>
924/// ~~~
925
927{
928 return TString(string).IsNull();
929}
930
931////////////////////////////////////////////////////////////////////////////////
932/// Returns kTRUE if string qualifies as valid port component:
933/// ~~~ {.cpp}
934/// RFC 3986: port = *DIGIT
935/// ~~~
936
938{
939 return (TPRegexp("^[[:digit:]]*$").Match(string) > 0);
940}
941
942////////////////////////////////////////////////////////////////////////////////
943/// Returns kTRUE if string qualifies as valid reg-name:
944/// ~~~ {.cpp}
945/// reg-name = *( unreserved / pct-encoded / sub-delims )
946/// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
947/// / "*" / "+" / "," / ";" / "="
948/// ~~~
949
951{
952 return (TPRegexp(
953 "^([[:alpha:][:digit:]-._~!$&'()*+,;=]|%[0-9A-Fa-f][0-9A-Fa-f])*$").Match(string) > 0);
954}
955
956////////////////////////////////////////////////////////////////////////////////
957/// Returns kTRUE, if string holds a valid IPv4 address
958/// currently only decimal variant supported.
959/// Existence of leading 0s or numeric range remains unchecked
960/// IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet.
961
963{
964 return (TPRegexp(
965 "^([[:digit:]]{1,3}[.]){3}[[:digit:]]{1,3}$").Match(string) > 0);
966}
967
968////////////////////////////////////////////////////////////////////////////////
969/// Returns kTRUE, if the given string does not contain
970/// RFC 3986 reserved characters
971/// ~~~ {.cpp}
972/// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
973/// ~~~
974
976{
977 return (TPRegexp(
978 "^" + TString(kURI_unreserved) + "*$").Match(string) > 0);
979}
980
981////////////////////////////////////////////////////////////////////////////////
982/// Syntax based normalisation according to
983/// RFC chapter 6.2.2.
984
986{
987 // case normalisation of host and scheme
988 // cf. chapter 6.2.2.1
990 if (fHasHost) {
991 TString host = GetHost();
992 host.ToLower();
993 SetHost(host);
994 }
995 // percent-encoding normalisation (6.2.2.2) for
996 // userinfo, host (reg-name), path, query, fragment
1002
1003 // path segment normalisation (6.2.2.3)
1004 if (fHasPath)
1006}
1007
1008////////////////////////////////////////////////////////////////////////////////
1009/// Percent-decode the given string according to chapter 2.1
1010/// we assume a valid pct-encoded string.
1011
1013{
1014 TString sink = "";
1015 Int_t i = 0;
1016 while (i < source.Length()) {
1017 if (source[i] == '%') {
1018 if (source.Length() < i+2) {
1019 // abort if out of bounds
1020 return sink;
1021 }
1022 // two hex digits follow -> decode to ASCII
1023 // upper nibble, bits 4-7
1024 char c1 = tolower(source[i + 1]) - '0';
1025 if (c1 > 9) // a-f
1026 c1 -= 39;
1027 // lower nibble, bits 0-3
1028 char c0 = tolower(source[i + 2]) - '0';
1029 if (c0 > 9) // a-f
1030 c0 -= 39;
1031 char decoded = c1 << 4 | c0;
1032 if (TPRegexp(kURI_unreserved).Match(decoded) > 0) {
1033 // we have an unreserved character -> store decoded version
1034 sink = sink + decoded;
1035 } else {
1036 // this is a reserved character
1037 TString pct = source(i,3);
1038 pct.ToUpper();
1039 sink = sink + pct;
1040 }
1041 // advance 2 characters
1042 i += 2;
1043 } else {
1044 // regular character -> copy
1045 sink = sink + source[i];
1046 }
1047 i++;
1048 }
1049 return sink;
1050}
1051
1052////////////////////////////////////////////////////////////////////////////////
1053/// Normalise the percent-encoded parts of the string
1054/// i.e. uppercase the hexadecimal digits
1055/// %[:alpha:][:alpha:] -> %[:ALPHA:][:ALPHA:]
1056
1058{
1059 TString sink = "";
1060 Int_t i = 0;
1061 while (i < source.Length()) {
1062 if (source[i] == '%') {
1063 if (source.Length() < i+2) {
1064 // abort if out of bounds
1065 return sink;
1066 }
1067 TString pct = source(i,3);
1068 // uppercase the pct part
1069 pct.ToUpper();
1070 sink = sink + pct;
1071 // advance 2 characters
1072 i += 2;
1073 } else {
1074 // regular character -> copy
1075 sink = sink + source[i];
1076 }
1077 i++;
1078 }
1079 return sink;
1080}
1081
1082////////////////////////////////////////////////////////////////////////////////
1083/// Percent-decode the given string according to chapter 2.1
1084/// we assume a valid pct-encoded string.
1085
1086TString const TUri::PctDecode(const TString &source)
1087{
1088 TString sink = "";
1089 Int_t i = 0;
1090 while (i < source.Length()) {
1091 if (source[i] == '%') {
1092 if (source.Length() < i+2) {
1093 // abort if out of bounds
1094 return sink;
1095 }
1096 // two hex digits follow -> decode to ASCII
1097 // upper nibble, bits 4-7
1098 char c1 = tolower(source[i + 1]) - '0';
1099 if (c1 > 9) // a-f
1100 c1 -= 39;
1101 // lower nibble, bits 0-3
1102 char c0 = tolower(source[i + 2]) - '0';
1103 if (c0 > 9) // a-f
1104 c0 -= 39;
1105 sink = sink + (char)(c1 << 4 | c0);
1106 // advance 2 characters
1107 i += 2;
1108 } else {
1109 // regular character -> copy
1110 sink = sink + source[i];
1111 }
1112 i++;
1113 }
1114 return sink;
1115}
1116
1117////////////////////////////////////////////////////////////////////////////////
1118/// Transform a URI reference into its target URI using
1119/// given a base URI.
1120/// This is an implementation of the pseudocode in chapter 5.2.2.
1121
1122TUri TUri::Transform(const TUri &reference, const TUri &base)
1123{
1124 TUri target;
1125 if (reference.HasScheme()) {
1126 target.SetScheme(reference.GetScheme());
1127 if (reference.HasAuthority())
1128 target.SetAuthority(reference.GetAuthority());
1129 if (reference.HasPath())
1130 target.SetPath(RemoveDotSegments(reference.GetPath()));
1131 if (reference.HasQuery())
1132 target.SetQuery(reference.GetQuery());
1133 } else {
1134 if (reference.HasAuthority()) {
1135 target.SetAuthority(reference.GetAuthority());
1136 if (reference.HasPath())
1137 target.SetPath(RemoveDotSegments(reference.GetPath()));
1138 if (reference.HasQuery())
1139 target.SetQuery(reference.GetQuery());
1140 } else {
1141 if (reference.GetPath().IsNull()) {
1142 target.SetPath(base.GetPath());
1143 if (reference.HasQuery()) {
1144 target.SetQuery(reference.GetQuery());
1145 } else {
1146 if (base.HasQuery())
1147 target.SetQuery(base.GetQuery());
1148 }
1149 } else {
1150 if (reference.GetPath().BeginsWith("/")) {
1151 target.SetPath(RemoveDotSegments(reference.GetPath()));
1152 } else {
1153 target.SetPath(RemoveDotSegments(MergePaths(reference, base)));
1154 }
1155 if (reference.HasQuery())
1156 target.SetQuery(reference.GetQuery());
1157 }
1158 if (base.HasAuthority())
1159 target.SetAuthority(base.GetAuthority());
1160 }
1161 if (base.HasScheme())
1162 target.SetScheme(base.GetScheme());
1163 }
1164 if (reference.HasFragment())
1165 target.SetFragment(reference.GetFragment());
1166 return target;
1167}
1168
1169////////////////////////////////////////////////////////////////////////////////
1170/// RFC 3986, 5.3.2.
1171/// If the base URI has a defined authority component and an empty
1172/// path, then return a string consisting of "/" concatenated with the
1173/// reference's path; otherwise,
1174/// return a string consisting of the reference's path component
1175/// appended to all but the last segment of the base URI's path (i.e.,
1176/// excluding any characters after the right-most "/" in the base URI
1177/// path, or excluding the entire base URI path if it does not contain
1178/// any "/" characters).
1179
1180const TString TUri::MergePaths(const TUri &reference, const TUri &base)
1181{
1182 TString result = "";
1183 if (base.HasAuthority() && base.GetPath().IsNull()) {
1184 result = TString("/") + reference.GetPath();
1185 } else {
1186 TString basepath = base.GetPath();
1187 Ssiz_t last = basepath.Last('/');
1188 if (last == -1)
1189 result = reference.GetPath();
1190 else
1191 result = basepath(0, last + 1) + reference.GetPath();
1192 }
1193 return result;
1194}
int Int_t
Definition: RtypesCore.h:41
int Ssiz_t
Definition: RtypesCore.h:63
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kTRUE
Definition: RtypesCore.h:87
const char Option_t
Definition: RtypesCore.h:62
#define ClassImp(name)
Definition: Rtypes.h:365
void Printf(const char *fmt,...)
Bool_t operator==(const TUri &u1, const TUri &u2)
Implementation of a TUri Equivalence operator that uses syntax-based normalisation see chapter 6....
Definition: TUri.cxx:116
const char *const kURI_unreserved
Definition: TUri.cxx:33
const char *const kURI_pchar
Definition: TUri.cxx:30
An array of TObjects.
Definition: TObjArray.h:37
Int_t GetEntries() const
Return the number of objects in array (i.e.
Definition: TObjArray.cxx:522
TObject * At(Int_t idx) const
Definition: TObjArray.h:166
Collectable string class.
Definition: TObjString.h:28
Mother of all ROOT objects.
Definition: TObject.h:37
TObject & operator=(const TObject &rhs)
TObject assignment operator.
Definition: TObject.h:268
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:880
Int_t Match(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10, TArrayI *pos=0)
The number of matches is returned, this equals the full match + sub-pattern matches.
Definition: TPRegexp.cxx:339
TObjArray * MatchS(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
Returns a TObjArray of matched substrings as TObjString's.
Definition: TPRegexp.cxx:370
Basic string class.
Definition: TString.h:131
Ssiz_t Length() const
Definition: TString.h:405
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1125
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
Definition: TString.cxx:418
Bool_t EndsWith(const char *pat, ECaseCompare cmp=kExact) const
Return true if string ends with the specified string.
Definition: TString.cxx:2177
const char * Data() const
Definition: TString.h:364
@ kLeading
Definition: TString.h:262
@ kTrailing
Definition: TString.h:262
Ssiz_t Last(char c) const
Find last occurrence of a character c.
Definition: TString.cxx:892
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1138
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:610
Bool_t IsNull() const
Definition: TString.h:402
TString & Remove(Ssiz_t pos)
Definition: TString.h:668
This class represents a RFC 3986 compatible URI.
Definition: TUri.h:35
const TString GetRelativePart() const
relative-part = "//" authority path-abempty
Definition: TUri.cxx:676
static Bool_t IsPath(const TString &)
Returns kTRUE if string qualifies as valid path component:
Definition: TUri.cxx:854
Bool_t HasPath() const
Definition: TUri.h:97
Bool_t SetScheme(const TString &scheme)
Set scheme component of URI:
Definition: TUri.cxx:248
Bool_t IsRelative() const
Returns kTRUE if instance qualifies as relative-ref relative-ref = relative-part [ "?...
Definition: TUri.cxx:217
static const TString RemoveDotSegments(const TString &)
This functions implements the "remove_dot_segments" routine of chapter 5.2.4 "for interpreting and re...
Definition: TUri.cxx:159
Bool_t SetFragment(const TString &fragment)
Set fragment component of URI:
Definition: TUri.cxx:498
Bool_t SetPath(const TString &path)
Set path component of URI:
Definition: TUri.cxx:481
static Bool_t IsPathAbempty(const TString &)
Returns kTRUE if string qualifies as valid path-abempty component:
Definition: TUri.cxx:870
Bool_t fHasPort
Definition: TUri.h:66
Bool_t SetRelativePart(const TString &)
Returns kTRUE is string qualifies as relative-part:
Definition: TUri.cxx:777
Bool_t IsAbsolute() const
Returns kTRUE if instance qualifies as absolute-URI absolute-URI = scheme ":" hier-part [ "?...
Definition: TUri.cxx:207
Bool_t fHasFragment
Definition: TUri.h:69
const TString GetHost() const
Definition: TUri.h:85
static Bool_t IsFragment(const TString &)
Returns kTRUE if string qualifies as valid fragment component.
Definition: TUri.cxx:516
TString fQuery
Definition: TUri.h:60
Bool_t SetUserInfo(const TString &userinfo)
Set userinfo component of URI:
Definition: TUri.cxx:405
static Bool_t IsQuery(const TString &)
Returns kTRUE if string qualifies as URI query:
Definition: TUri.cxx:320
TString fPort
Definition: TUri.h:58
Bool_t IsUri() const
Returns kTRUE if instance qualifies as URI URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] ...
Definition: TUri.cxx:227
TString fScheme
Definition: TUri.h:55
static const TString PctEncode(const TString &source)
Percent-encode and return the given string according to RFC 3986 in principle, this function cannot f...
Definition: TUri.cxx:815
static const TString PctNormalise(const TString &source)
Normalise the percent-encoded parts of the string i.e.
Definition: TUri.cxx:1057
const TString GetPath() const
Definition: TUri.h:87
const TString GetHierPart() const
Definition: TUri.cxx:660
void Normalise()
Syntax based normalisation according to RFC chapter 6.2.2.
Definition: TUri.cxx:985
static Bool_t IsUserInfo(const TString &)
Return kTRUE is string qualifies as valid URI userinfo:
Definition: TUri.cxx:428
const TString GetUri() const
Returns the whole URI - an implementation of chapter 5.3 component recomposition.
Definition: TUri.cxx:140
Bool_t fHasHost
Definition: TUri.h:65
const TString GetFragment() const
Definition: TUri.h:89
static Bool_t IsPathRootless(const TString &)
Returns kTRUE if string qualifies as valid path-rootless component:
Definition: TUri.cxx:913
static Bool_t IsHost(const TString &)
Returns kTRUE if string qualifies as valid host component: host = IP-literal / IPv4address / reg-name...
Definition: TUri.cxx:839
static Bool_t IsScheme(const TString &)
Returns kTRUE if string qualifies as URI scheme:
Definition: TUri.cxx:270
const TString GetQuery() const
Definition: TUri.h:88
Bool_t SetAuthority(const TString &authority)
Set authority part of URI:
Definition: TUri.cxx:338
void Print(Option_t *option="") const
Display function,.
Definition: TUri.cxx:528
Bool_t fHasScheme
Definition: TUri.h:63
static TUri Transform(const TUri &reference, const TUri &base)
Transform a URI reference into its target URI using given a base URI.
Definition: TUri.cxx:1122
TString fUserinfo
Definition: TUri.h:56
void Reset()
Initialize this URI object.
Definition: TUri.cxx:571
const TString GetAuthority() const
Returns the authority part of the instance:
Definition: TUri.cxx:283
Bool_t HasFragment() const
Definition: TUri.h:99
static Bool_t IsPort(const TString &)
Returns kTRUE if string qualifies as valid port component:
Definition: TUri.cxx:937
static Bool_t IsAuthority(const TString &)
Returns kTRUE if string qualifies as valid URI authority:
Definition: TUri.cxx:383
Bool_t HasRelativePart() const
Definition: TUri.h:100
static Bool_t IsPathNoscheme(const TString &)
Returns kTRUE if string qualifies as valid path-noscheme component:
Definition: TUri.cxx:900
Bool_t fHasQuery
Definition: TUri.h:68
Bool_t HasQuery() const
Definition: TUri.h:98
static const TString MergePaths(const TUri &reference, const TUri &base)
RFC 3986, 5.3.2.
Definition: TUri.cxx:1180
static Bool_t IsPathAbsolute(const TString &)
Returns kTRUE if string qualifies as valid path-absolute component.
Definition: TUri.cxx:885
static Bool_t IsUnreserved(const TString &string)
Returns kTRUE, if the given string does not contain RFC 3986 reserved characters.
Definition: TUri.cxx:975
Bool_t SetPort(const TString &port)
Set port component of URI:
Definition: TUri.cxx:460
TString fPath
Definition: TUri.h:59
Bool_t SetQuery(const TString &path)
Set query component of URI:
Definition: TUri.cxx:298
Bool_t HasScheme() const
Definition: TUri.h:91
Bool_t HasHierPart() const
Definition: TUri.h:92
TString fFragment
Definition: TUri.h:61
static Bool_t IsHierPart(const TString &)
Returns kTRUE if string qualifies as hier-part:
Definition: TUri.cxx:743
static Bool_t IsRelativePart(const TString &)
Returns kTRUE is string qualifies as relative-part:
Definition: TUri.cxx:760
static Bool_t IsIpv4(const TString &)
Returns kTRUE, if string holds a valid IPv4 address currently only decimal variant supported.
Definition: TUri.cxx:962
Bool_t IsReference() const
Returns kTRUE if instance qualifies as URI-reference URI-reference = URI / relative-ref cf.
Definition: TUri.cxx:237
static Bool_t IsPathEmpty(const TString &)
Returns kTRUE if string qualifies as valid path-empty component:
Definition: TUri.cxx:926
TUri & operator=(const TUri &rhs)
TUri assignment operator.
Definition: TUri.cxx:89
Bool_t SetUri(const TString &uri)
Parse URI and set the member variables accordingly, returns kTRUE if URI validates,...
Definition: TUri.cxx:601
Bool_t SetHost(const TString &host)
Set host component of URI:
Definition: TUri.cxx:442
Bool_t SetHierPart(const TString &hier)
returns hier-part component of URI
Definition: TUri.cxx:693
TUri()
Definition: TUri.h:73
static const TString PctDecodeUnreserved(const TString &source)
Percent-decode the given string according to chapter 2.1 we assume a valid pct-encoded string.
Definition: TUri.cxx:1012
Bool_t HasAuthority() const
Definition: TUri.h:93
Bool_t fHasPath
Definition: TUri.h:67
Bool_t fHasUserinfo
Definition: TUri.h:64
TString fHost
Definition: TUri.h:57
const TString GetScheme() const
Definition: TUri.h:80
static const TString PctDecode(const TString &source)
Percent-decode the given string according to chapter 2.1 we assume a valid pct-encoded string.
Definition: TUri.cxx:1086
static Bool_t IsRegName(const TString &)
Returns kTRUE if string qualifies as valid reg-name:
Definition: TUri.cxx:950
return c1
Definition: legend1.C:41