Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TPRegexp.cxx
Go to the documentation of this file.
1// @(#)root/base:$Id$
2// Author: Eddy Offermann 24/06/05
3
4/*************************************************************************
5 * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/* \class TPRegexp
13\ingroup Base
14
15C++ Wrapper for the "Perl Compatible Regular Expressions" library
16 The PCRE lib can be found at: http://www.pcre.org/
17
18Extensive documentation about Regular expressions in Perl can be
19found at : http://perldoc.perl.org/perlre.html
20*/
21
22#include <iostream>
23#include "TPRegexp.h"
24#include "TObjArray.h"
25#include "TObjString.h"
26#include "TError.h"
27
28#ifdef USE_PCRE2
29#ifdef R__WIN32
30#define PCRE2_STATIC
31#endif
32#define PCRE2_CODE_UNIT_WIDTH 8
33#include <pcre2.h>
34#define PCRE_CASELESS PCRE2_CASELESS
35#define PCRE_MULTILINE PCRE2_MULTILINE
36#define PCRE_DOTALL PCRE2_DOTALL
37#define PCRE_EXTENDED PCRE2_EXTENDED
38#define PCRE_ERROR_NOMATCH PCRE2_ERROR_NOMATCH
39#else
40#ifdef R__WIN32
41#define PCRE_STATIC
42#endif
43#include <pcre.h>
44#endif
45
46#include <vector>
47#include <stdexcept>
48
49struct PCREPriv_t {
50#ifdef USE_PCRE2
52 PCREPriv_t() { fPCRE = nullptr; }
53#else
56 PCREPriv_t() { fPCRE = nullptr; fPCREExtra = nullptr; }
57#endif
58};
59
60
61
63
64////////////////////////////////////////////////////////////////////////////////
65/// Default ctor.
66
68{
69 fPriv = new PCREPriv_t;
70 fPCREOpts = 0;
71}
72
73////////////////////////////////////////////////////////////////////////////////
74/// Create and initialize with pat.
75
77{
78 fPattern = pat;
79 fPriv = new PCREPriv_t;
80 fPCREOpts = 0;
81}
82
83////////////////////////////////////////////////////////////////////////////////
84/// Copy ctor.
85
87{
88 fPattern = p.fPattern;
89 fPriv = new PCREPriv_t;
90 fPCREOpts = p.fPCREOpts;
91}
92
93////////////////////////////////////////////////////////////////////////////////
94/// Cleanup.
95
97{
98#ifdef USE_PCRE2
99 if (fPriv->fPCRE)
101#else
102 if (fPriv->fPCRE)
104 if (fPriv->fPCREExtra)
106#endif
107 delete fPriv;
108}
109
110////////////////////////////////////////////////////////////////////////////////
111/// Assignment operator.
112
114{
115 if (this != &p) {
116 fPattern = p.fPattern;
117#ifdef USE_PCRE2
118 if (fPriv->fPCRE)
120 fPriv->fPCRE = nullptr;
121#else
122 if (fPriv->fPCRE)
124 fPriv->fPCRE = nullptr;
125 if (fPriv->fPCREExtra)
127 fPriv->fPCREExtra = nullptr;
128#endif
129 fPCREOpts = p.fPCREOpts;
130 }
131 return *this;
132}
133
134////////////////////////////////////////////////////////////////////////////////
135/// Translate Perl modifier flags into pcre flags.
136/// The supported modStr characters are: g, i, m, o, s, x, and the
137/// special d for debug. The meaning of the letters is:
138/// - m
139/// Treat string as multiple lines. That is, change "^" and "$" from
140/// matching the start or end of the string to matching the start or
141/// end of any line anywhere within the string.
142/// - s
143/// Treat string as single line. That is, change "." to match any
144/// character whatsoever, even a newline, which normally it would not match.
145/// Used together, as /ms, they let the "." match any character whatsoever,
146/// while still allowing "^" and "$" to match, respectively, just after and
147/// just before newlines within the string.
148/// - i
149/// Do case-insensitive pattern matching.
150/// - x
151/// Extend your pattern's legibility by permitting whitespace and comments.
152/// - p
153/// Preserve the string matched such that ${^PREMATCH}, ${^MATCH},
154/// and ${^POSTMATCH} are available for use after matching.
155/// - g and c
156/// Global matching, and keep the Current position after failed matching.
157/// Unlike i, m, s and x, these two flags affect the way the regex is used
158/// rather than the regex itself. See Using regular expressions in Perl in
159/// perlretut for further explanation of the g and c modifiers.
160/// For more detail see: http://perldoc.perl.org/perlre.html#Modifiers.
161
163{
164 UInt_t opts = 0;
165
166 if (modStr.Length() <= 0)
167 return fPCREOpts;
168
169 //translate perl flags into pcre flags
170 const char *m = modStr;
171 while (*m) {
172 switch (*m) {
173 case 'g':
175 break;
176 case 'i':
178 break;
179 case 'm':
181 break;
182 case 'o':
184 break;
185 case 's':
186 opts |= PCRE_DOTALL;
187 break;
188 case 'x':
190 break;
191 case 'd': // special flag to enable debug printing (not Perl compat.)
193 break;
194 default:
195 Error("ParseMods", "illegal pattern modifier: %c", *m);
196 opts = 0;
197 }
198 ++m;
199 }
200 return opts;
201}
202
203////////////////////////////////////////////////////////////////////////////////
204/// Return PCRE modifier options as string.
205/// For meaning of mods see ParseMods().
206
208{
209 TString ret;
210
211 if (fPCREOpts & kPCRE_GLOBAL) ret += 'g';
212 if (fPCREOpts & PCRE_CASELESS) ret += 'i';
213 if (fPCREOpts & PCRE_MULTILINE) ret += 'm';
214 if (fPCREOpts & PCRE_DOTALL) ret += 's';
215 if (fPCREOpts & PCRE_EXTENDED) ret += 'x';
216 if (fPCREOpts & kPCRE_OPTIMIZE) ret += 'o';
217 if (fPCREOpts & kPCRE_DEBUG_MSGS) ret += 'd';
218
219 return ret;
220}
221
222////////////////////////////////////////////////////////////////////////////////
223/// Compile the fPattern.
224
226{
227#ifdef USE_PCRE2
228 if (fPriv->fPCRE)
230#else
231 if (fPriv->fPCRE)
233#endif
234
236 Info("Compile", "PREGEX compiling %s", fPattern.Data());
237
238#ifdef USE_PCRE2
239 int errcode;
243 &errcode, &patIndex, nullptr);
244#else
245 const char *errstr;
248 &errstr, &patIndex, nullptr);
249#endif
250
251 if (!fPriv->fPCRE) {
252#ifdef USE_PCRE2
253 PCRE2_UCHAR errstr[256];
255#endif
257 throw std::runtime_error
258 (TString::Format("TPRegexp::Compile() compilation of TPRegexp(%s) failed at: %d because %s",
259 fPattern.Data(), (int)patIndex, errstr).Data());
260 } else {
261 Error("Compile", "compilation of TPRegexp(%s) failed at: %d because %s",
262 fPattern.Data(), (int)patIndex, errstr);
263 return;
264 }
265 }
266
267#ifndef USE_PCRE2
269 Optimize();
270#endif
271}
272
273////////////////////////////////////////////////////////////////////////////////
274/// Send the pattern through the optimizer.
275
277{
278#ifndef USE_PCRE2
279 if (fPriv->fPCREExtra)
281
283 Info("Optimize", "PREGEX studying %s", fPattern.Data());
284
285 const char *errstr;
286 // pcre_study allows less options - see pcre_internal.h PUBLIC_STUDY_OPTIONS.
288
289 if (!fPriv->fPCREExtra && errstr) {
290 Error("Optimize", "Optimization of TPRegexp(%s) failed: %s",
291 fPattern.Data(), errstr);
292 }
293#endif
294}
295
296////////////////////////////////////////////////////////////////////////////////
297/// Returns the number of expanded '$' constructs.
298
300 const TString &replacePattern,
301 Int_t *offVec, Int_t nrMatch) const
302{
303 Int_t nrSubs = 0;
304 const char *p = replacePattern;
305
306 Int_t state = 0;
307 Int_t subnum = 0;
308 while (state != -1) {
309 switch (state) {
310 case 0:
311 if (!*p) {
312 state = -1;
313 break;
314 }
315 if (*p == '$') {
316 state = 1;
317 subnum = 0;
318 if (p[1] == '&') {
319 p++;
320 if (isdigit(p[1]))
321 p++;
322 } else if (!isdigit(p[1])) {
323 Error("ReplaceSubs", "badly formed replacement pattern: %s",
324 replacePattern.Data());
325 }
326 } else
327 final += *p;
328 break;
329 case 1:
330 if (isdigit(*p)) {
331 subnum *= 10;
332 subnum += (*p)-'0';
333 } else {
335 Info("ReplaceSubs", "PREGEX appending substr #%d", subnum);
337 Error("ReplaceSubs","bad string number: %d",subnum);
338 } else {
339 const TString subStr = s(offVec[2*subnum],offVec[2*subnum+1]-offVec[2*subnum]);
340 final += subStr;
341 nrSubs++;
342 }
343 state = 0;
344 continue; // send char to start state
345 }
346 }
347 p++;
348 }
349 return nrSubs;
350}
351
352////////////////////////////////////////////////////////////////////////////////
353/// Perform the actual matching - protected method.
354
356 Int_t nMaxMatch, TArrayI *pos) const
357{
358 Int_t *offVec = new Int_t[3*nMaxMatch];
359
360#ifdef USE_PCRE2
364 s.Length(), start, 0,
365 match_data, nullptr);
366#else
367 // pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS.
369 s.Length(), start, 0,
370 offVec, 3*nMaxMatch);
371#endif
372
374 nrMatch = 0;
375 else if (nrMatch <= 0) {
376 Error("Match","pcre_exec error = %d", nrMatch);
377#ifdef USE_PCRE2
379#endif
380 delete [] offVec;
381 return 0;
382 }
383
384 if (pos) {
385#ifdef USE_PCRE2
387 for (int i = 0; i < 2 * nrMatch; ++i)
388 offVec[i] = oVec[i];
389#endif
390 pos->Set(2*nrMatch, offVec);
391 }
392
393#ifdef USE_PCRE2
395#endif
396 delete [] offVec;
397
398 return nrMatch;
399}
400
401////////////////////////////////////////////////////////////////////////////////
402/// The number of matches is returned, this equals the full match +
403/// sub-pattern matches.
404/// nMaxMatch is the maximum allowed number of matches.
405/// pos contains the string indices of the matches. Its usage is
406/// shown in the routine MatchS.
407/// For meaning of mods see ParseMods().
408
410 Int_t nMaxMatch, TArrayI *pos)
411{
413
414 if (!fPriv->fPCRE || opts != fPCREOpts) {
415 fPCREOpts = opts;
416 Compile();
417 }
418
419 return MatchInternal(s, start, nMaxMatch, pos);
420}
421
422
423////////////////////////////////////////////////////////////////////////////////
424/// Returns a TObjArray of matched substrings as TObjString's.
425/// The TObjArray is owner of the objects and must be deleted by the user.
426/// The first entry is the full matched pattern, followed by the sub-patterns.
427/// If a pattern was not matched, it will return an empty substring:
428/// ~~~ {.cpp}
429/// TObjArray *subStrL = TPRegexp("(a|(z))(bc)").MatchS("abc");
430/// for (Int_t i = 0; i < subStrL->GetLast()+1; i++) {
431/// const TString subStr = ((TObjString *)subStrL->At(i))->GetString();
432/// std::cout << "\"" << subStr << "\" ";
433/// }
434/// std::cout << subStr << std::endl;
435/// ~~~
436/// produces: "abc" "a" "" "bc"
437///
438/// For meaning of mods see ParseMods().
439
441 Int_t start, Int_t nMaxMatch)
442{
443 TArrayI pos;
444 Int_t nrMatch = Match(s, mods, start, nMaxMatch, &pos);
445
446 TObjArray *subStrL = new TObjArray();
447 subStrL->SetOwner();
448
449 for (Int_t i = 0; i < nrMatch; i++) {
450 Int_t startp = pos[2*i];
451 Int_t stopp = pos[2*i+1];
452 if (startp >= 0 && stopp >= 0) {
453 const TString subStr = s(pos[2*i], pos[2*i+1]-pos[2*i]);
454 subStrL->Add(new TObjString(subStr));
455 } else
456 subStrL->Add(new TObjString());
457 }
458
459 return subStrL;
460}
461
462////////////////////////////////////////////////////////////////////////////////
463/// Perform pattern substitution with optional back-ref replacement
464/// - protected method.
465
467 Int_t start, Int_t nMaxMatch,
468 Bool_t doDollarSubst) const
469{
470 Int_t *offVec = new Int_t[3*nMaxMatch];
471
472 TString fin;
473 Int_t nrSubs = 0;
474 Int_t offset = start;
475 Int_t last = 0;
476
477#ifdef USE_PCRE2
480#endif
481
482 while (kTRUE) {
483
484 // find next matching subs
485 // pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS.
486#ifdef USE_PCRE2
488 s.Length(), offset, 0,
489 match_data, nullptr);
490#else
492 s.Length(), offset, 0,
493 offVec, 3*nMaxMatch);
494#endif
495
497 break;
498 } else if (nrMatch <= 0) {
499 Error("Substitute", "pcre_exec error = %d", nrMatch);
500 break;
501 }
502
503#ifdef USE_PCRE2
505 for (int i = 0; i < 2 * nrMatch; ++i)
506 offVec[i] = oVec[i];
507#endif
508
509 // append anything previously unmatched, but not substituted
510 if (last <= offVec[0]) {
511 fin += s(last,offVec[0]-last);
512 last = offVec[1];
513 }
514
515 // replace stuff in s
516 if (doDollarSubst) {
518 } else {
520 }
521 ++nrSubs;
522
523 // if global gotta check match at every pos
524 if (!(fPCREOpts & kPCRE_GLOBAL))
525 break;
526
527 if (offVec[0] != offVec[1]) {
528 offset = offVec[1];
529 } else {
530 // matched empty string
531 if (offVec[1] == s.Length()) break;
532 offset = offVec[1]+1;
533 }
534 }
535
536#ifdef USE_PCRE2
538#endif
539 delete [] offVec;
540
541 fin += s(last,s.Length()-last);
542 s = fin;
543
544 return nrSubs;
545}
546
547////////////////////////////////////////////////////////////////////////////////
548/// Substitute replaces the string s by a new string in which matching
549/// patterns are replaced by the replacePattern string. The number of
550/// substitutions are returned.
551/// ~~~ {.cpp}
552/// TString s("aap noot mies");
553/// const Int_t nrSub = TPRegexp("(\\w*) noot (\\w*)").Substitute(s,"$2 noot $1");
554/// std::cout << nrSub << " \"" << s << "\"" <<std::endl;
555/// ~~~
556/// produces: 2 "mies noot aap"
557///
558/// For meaning of mods see ParseMods().
559
561 const TString &mods, Int_t start, Int_t nMaxMatch)
562{
564
565 if (!fPriv->fPCRE || opts != fPCREOpts) {
566 fPCREOpts = opts;
567 Compile();
568 }
569
571}
572
573
574////////////////////////////////////////////////////////////////////////////////
575/// Returns true if underlying PCRE structure has been successfully
576/// generated via regexp compilation.
577
579{
580 return fPriv->fPCRE != nullptr;
581}
582
583////////////////////////////////////////////////////////////////////////////////
584/// Get value of static flag controlling whether exception should be thrown upon an
585/// error during regular expression compilation by the PCRE engine.
586
591
592////////////////////////////////////////////////////////////////////////////////
593/// Set static flag controlling whether exception should be thrown upon an
594/// error during regular expression compilation by the PCRE engine.
595
600
601////////////////////////////////////////////////////////////////////////////////
602// //
603// TString member functions, put here so the linker will include //
604// them only if regular expressions are used. //
605// //
606////////////////////////////////////////////////////////////////////////////////
607
608////////////////////////////////////////////////////////////////////////////////
609/// Find the first occurrence of the regexp in string and return the position.
610/// Start is the offset at which the search should start.
611
613{
614 TArrayI pos;
615 Int_t nrMatch = r.Match(*this,"",start,10,&pos);
616 if (nrMatch > 0)
617 return pos[0];
618 else
619 return -1;
620}
621
622////////////////////////////////////////////////////////////////////////////////
623/// Find the first occurrence of the regexp in string and return the position.
624/// Extent is length of the matched string and start is the offset at which
625/// the matching should start.
626
628{
629 TArrayI pos;
630 const Int_t nrMatch = r.Match(*this,"",start,10,&pos);
631 if (nrMatch > 0) {
632 *extent = pos[1]-pos[0];
633 return pos[0];
634 } else {
635 *extent = 0;
636 return -1;
637 }
638}
639
640////////////////////////////////////////////////////////////////////////////////
641/// Return the substring found by applying the regexp starting at start.
642
644{
645 Ssiz_t len;
646 Ssiz_t begin = Index(r, &len, start);
647 return TSubString(*this, begin, len);
648}
649
650////////////////////////////////////////////////////////////////////////////////
651/// Return the substring found by applying the regexp.
652
654{
655 return (*this)(r, 0);
656}
657
658
659/** \class TPMERegexp
660
661Wrapper for PCRE library (Perl Compatible Regular Expressions).
662Based on PME - PCRE Made Easy by Zachary Hansen.
663
664Supports main Perl operations using regular expressions (Match,
665Substitute and Split). To retrieve the results one can simply use
666operator[] returning a TString.
667
668See regexp_pme.C for example.
669*/
670
671
672////////////////////////////////////////////////////////////////////////////////
673/// Default constructor. This regexp will match an empty string.
674
676 TPRegexp(),
677 fNMaxMatches(10),
678 fNMatches(0),
679 fAddressOfLastString(nullptr),
680 fLastGlobalPosition(0)
681{
682 Compile();
683}
684
685////////////////////////////////////////////////////////////////////////////////
686/// Constructor.
687///
688/// \param[in] s string to compile into regular expression
689/// \param[in] opts perl-style character flags to be set on TPME object
690/// \param[in] nMatchMax maximum number of matches
691
693 TPRegexp(s),
694 fNMaxMatches(nMatchMax),
695 fNMatches(0),
696 fAddressOfLastString(nullptr),
697 fLastGlobalPosition(0)
698{
700 Compile();
701}
702
703////////////////////////////////////////////////////////////////////////////////
704/// Constructor.
705///
706/// \param[in] s string to compile into regular expression
707/// \param[in] opts PCRE-style option flags to be set on TPME object
708/// \param[in] nMatchMax maximum number of matches
709
711 TPRegexp(s),
712 fNMaxMatches(nMatchMax),
713 fNMatches(0),
714 fAddressOfLastString(nullptr),
715 fLastGlobalPosition(0)
716{
717 fPCREOpts = opts;
718 Compile();
719}
720
721////////////////////////////////////////////////////////////////////////////////
722/// Copy constructor.
723/// Only PCRE specifics are copied, not last-match or global-match
724/// information.
725
727 TPRegexp(r),
728 fNMaxMatches(r.fNMaxMatches),
729 fNMatches(0),
730 fAddressOfLastString(nullptr),
731 fLastGlobalPosition(0)
732{
733 Compile();
734}
735
736////////////////////////////////////////////////////////////////////////////////
737/// Reset the pattern and options.
738/// If 'nMatchMax' other than -1 (the default) is passed, it is also set.
739
741{
743}
744
745////////////////////////////////////////////////////////////////////////////////
746/// Reset the pattern and options.
747/// If 'nMatchMax' other than -1 (the default) is passed, it is also set.
748
750{
751 fPattern = s;
752 fPCREOpts = opts;
753 Compile();
754
755 if (nMatchMax != -1)
757 fNMatches = 0;
759}
760
761////////////////////////////////////////////////////////////////////////////////
762/// Copy global-match state from 're; so that this regexp can continue
763/// parsing the string from where 're' left off.
764///
765/// Alternatively, GetGlobalPosition() get be used to retrieve the
766/// last match position so that it can passed to Match().
767///
768/// Ideally, as it is done in PERL, the last match position would be
769/// stored in the TString itself.
770
772{
773 fLastStringMatched = re.fLastStringMatched;
774 fLastGlobalPosition = re.fLastGlobalPosition;
775}
776
777////////////////////////////////////////////////////////////////////////////////
778/// Reset state of global match.
779/// This happens automatically when a new string is passed for matching.
780/// But be carefull, as the address of last TString object is used
781/// to make this decision.
782
787
788////////////////////////////////////////////////////////////////////////////////
789/// Runs a match on s against the regex 'this' was created with.
790///
791/// \param[in] s string to match against
792/// \param[in] start offset at which to start matching
793/// \return number of matches found
794
796{
797 // If we got a new string, reset the global position counter.
798 if (fAddressOfLastString != (void*) &s) {
800 }
801
802 if (fPCREOpts & kPCRE_GLOBAL) {
803 start += fLastGlobalPosition;
804 }
805
806 //fprintf(stderr, "string: '%s' length: %d offset: %d\n", s.Data(), s.length(), offset);
808
809 //fprintf(stderr, "MatchInternal_exec result = %d\n", fNMatches);
810
812 fAddressOfLastString = (void*) &s;
813
814 if (fPCREOpts & kPCRE_GLOBAL) {
816 // fprintf(stderr, "TPME RESETTING: reset for no match\n");
817 fLastGlobalPosition = 0; // reset the position for next match (perl does this)
818 } else if (fNMatches > 0) {
819 // fprintf(stderr, "TPME RESETTING: setting to %d\n", marks[0].second);
820 fLastGlobalPosition = fMarkers[1]; // set to the end of the match
821 } else {
822 // fprintf(stderr, "TPME RESETTING: reset for no unknown\n");
824 }
825 }
826
827 return fNMatches;
828}
829
830////////////////////////////////////////////////////////////////////////////////
831/// Splits into at most maxfields. If maxfields is unspecified or
832/// 0, trailing empty matches are discarded. If maxfields is
833/// positive, no more than maxfields fields will be returned and
834/// trailing empty matches are preserved. If maxfields is empty,
835/// all fields (including trailing empty ones) are returned. This
836/// *should* be the same as the perl behaviour.
837///
838/// If pattern produces sub-matches, these are also stored in
839/// the result.
840///
841/// A pattern matching the null string will split the value of EXPR
842/// into separate characters at each point it matches that way.
843///
844/// \param[in] s string to split
845/// \param[in] maxfields maximum number of fields to be split out. 0 means
846/// split all fields, but discard any trailing empty bits.
847/// Negative means split all fields and keep trailing empty bits.
848/// Positive means keep up to N fields including any empty fields
849/// less than N. Anything remaining is in the last field.
850/// \return number of fields found
851
853{
854 typedef std::pair<int, int> MarkerLoc_t;
855 typedef std::vector<MarkerLoc_t> MarkerLocVec_t;
856
857 // stores the marks for the split
859
860 // this is a list of current trailing empty matches if maxfields is
861 // unspecified or 0. If there is stuff in it and a non-empty match
862 // is found, then everything in here is pushed into oMarks and then
863 // the new match is pushed on. If the end of the string is reached
864 // and there are empty matches in here, they are discarded.
866
867 Int_t nOffset = 0;
869
870 // while we are still finding matches and maxfields is 0 or negative
871 // (meaning we get all matches), or we haven't gotten to the number
872 // of specified matches
874 while ((matchRes = Match(s, nOffset)) &&
875 ((maxfields < 1) || nMatchesFound < maxfields)) {
877
878 if (fMarkers[1] - fMarkers[0] == 0) {
879 oMarks.push_back(MarkerLoc_t(nOffset, nOffset + 1));
880 ++nOffset;
881 if (nOffset >= s.Length())
882 break;
883 else
884 continue;
885 }
886
887 // match can be empty
888 if (nOffset != fMarkers[0]) {
889 if (!oCurrentTrailingEmpties.empty()) {
890 oMarks.insert(oMarks.end(),
894 }
895 oMarks.push_back(MarkerLoc_t(nOffset, fMarkers[0]));
896 } else {
897 // empty match
898 if (maxfields == 0) {
899 // store for possible later inclusion
901 } else {
902 oMarks.push_back(MarkerLoc_t(nOffset, nOffset));
903 }
904 }
905
906 nOffset = fMarkers[1];
907
908 if (matchRes > 1) {
909 for (Int_t i = 1; i < matchRes; ++i)
910 oMarks.push_back(MarkerLoc_t(fMarkers[2*i], fMarkers[2*i + 1]));
911 }
912 }
913
914
915 // if there were no matches found, push the whole thing on
916 if (nMatchesFound == 0) {
917 oMarks.push_back(MarkerLoc_t(0, s.Length()));
918 }
919 // if we ran out of matches, then append the rest of the string
920 // onto the end of the last split field
921 else if (maxfields > 0 && nMatchesFound >= maxfields) {
922 oMarks[oMarks.size() - 1].second = s.Length();
923 }
924 // else we have to add another entry for the end of the string
925 else {
926 Bool_t last_empty = (nOffset == s.Length());
927 if (!last_empty || maxfields < 0) {
928 if (!oCurrentTrailingEmpties.empty()) {
929 oMarks.insert(oMarks.end(),
932 }
933 oMarks.push_back(MarkerLoc_t(nOffset, s.Length()));
934 }
935 }
936
937 fNMatches = oMarks.size();
939 for (Int_t i = 0; i < fNMatches; ++i) {
940 fMarkers[2*i] = oMarks[i].first;
941 fMarkers[2*i + 1] = oMarks[i].second;
942 }
943
944 // fprintf(stderr, "match returning %d\n", fNMatches);
945 return fNMatches;
946}
947
948////////////////////////////////////////////////////////////////////////////////
949/// Substitute matching part of s with r, dollar back-ref
950/// substitution is performed if doDollarSubst is true (default).
951/// Returns the number of substitutions made.
952///
953/// After the substitution, another pass is made over the resulting
954/// string and the following special tokens are interpreted:
955/// - `\l` lowercase next char,
956/// - `\u` uppercase next char,
957/// - `\L` lowercase till `\E`,
958/// - `\U` uppercase till `\E`, and
959/// - `\E` end case modification.
960
962{
964
965 TString ret;
966 Int_t state = 0;
967 Ssiz_t pos = 0, len = s.Length();
968 const Char_t *data = s.Data();
969 while (pos < len) {
970 Char_t c = data[pos];
971 if (c == '\\') {
972 c = data[pos+1]; // Rely on string-data being null-terminated.
973 switch (c) {
974 case 0 : ret += '\\'; break;
975 case 'l': state = 1; break;
976 case 'u': state = 2; break;
977 case 'L': state = 3; break;
978 case 'U': state = 4; break;
979 case 'E': state = 0; break;
980 default : ret += '\\'; ret += c; break;
981 }
982 pos += 2;
983 } else {
984 switch (state) {
985 case 0: ret += c; break;
986 case 1: ret += (Char_t) tolower(c); state = 0; break;
987 case 2: ret += (Char_t) toupper(c); state = 0; break;
988 case 3: ret += (Char_t) tolower(c); break;
989 case 4: ret += (Char_t) toupper(c); break;
990 default: Error("TPMERegexp::Substitute", "invalid state.");
991 }
992 ++pos;
993 }
994 }
995
996 s = ret;
997
998 return cnt;
999}
1000
1001////////////////////////////////////////////////////////////////////////////////
1002/// Returns the sub-string from the internal fMarkers vector.
1003/// Requires having run match or split first.
1004
1006{
1007 if (index >= fNMatches)
1008 return "";
1009
1010 Int_t begin = fMarkers[2*index];
1011 Int_t end = fMarkers[2*index + 1];
1012 return fLastStringMatched(begin, end-begin);
1013}
1014
1015////////////////////////////////////////////////////////////////////////////////
1016/// Print the regular expression and modifier options.
1017/// If 'option' contains "all", prints also last string match and
1018/// match results.
1019
1021{
1022 TString opt = option;
1023 opt.ToLower();
1024
1025 Printf("Regexp='%s', Opts='%s'", fPattern.Data(), GetModifiers().Data());
1026 if (opt.Contains("all")) {
1027 Printf(" last string='%s'", fLastStringMatched.Data());
1028 Printf(" number of matches = %d", fNMatches);
1029 for (Int_t i=0; i<fNMatches; ++i)
1030 Printf(" %d - %s", i, operator[](i).Data());
1031 }
1032}
1033
1034
1035/** \class TStringToken
1036Provides iteration through tokens of a given string.
1037
1038 - fFullStr stores the string to be split. It is never modified.
1039 - fSplitRe is the perl-re that is used to separate the tokens.
1040 - fReturnVoid if true, empty strings will be returned.
1041
1042Current token is stored in the TString base-class.
1043During construction no match is done, use NextToken() to get the first
1044and all subsequent tokens.
1045*/
1046
1047
1048////////////////////////////////////////////////////////////////////////////////
1049/// Constructor.
1050
1052 fFullStr (fullStr),
1053 fSplitRe (splitRe),
1054 fReturnVoid (retVoid),
1055 fPos (0)
1056{
1057}
1058
1059////////////////////////////////////////////////////////////////////////////////
1060/// Get the next token, it is stored in this TString.
1061/// Returns true if new token is available, false otherwise.
1062
1064{
1065 TArrayI x;
1066 while (fPos < fFullStr.Length()) {
1067 if (fSplitRe.Match(fFullStr, "", fPos, 2, &x)) {
1069 fPos = x[1];
1070 } else {
1072 fPos = fFullStr.Length() + 1;
1073 }
1074 if (Length() || fReturnVoid)
1075 return kTRUE;
1076 }
1077
1078 // Special case: void-strings are requested and the full-string
1079 // ends with the separator. Thus we return another empty string.
1080 if (fPos == fFullStr.Length() && fReturnVoid) {
1082 fPos = fFullStr.Length() + 1;
1083 return kTRUE;
1084 }
1085
1086 return kFALSE;
1087}
#define c(i)
Definition RSha256.hxx:101
bool Bool_t
Boolean (0=false, 1=true) (bool)
Definition RtypesCore.h:77
char Char_t
Character 1 byte (char)
Definition RtypesCore.h:51
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
const char Option_t
Option string (const char)
Definition RtypesCore.h:80
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
Definition TError.cxx:241
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition TError.cxx:208
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t option
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
void Printf(const char *fmt,...)
Formats a string in a circular formatting buffer and prints the string.
Definition TString.cxx:2509
const_iterator begin() const
const_iterator end() const
Array of integers (32 bits per element).
Definition TArrayI.h:27
void Set(Int_t n) override
Set size of this array to n ints.
Definition TArrayI.cxx:104
An array of TObjects.
Definition TObjArray.h:31
Collectable string class.
Definition TObjString.h:28
Wrapper for PCRE library (Perl Compatible Regular Expressions).
Definition TPRegexp.h:97
Int_t fLastGlobalPosition
Definition TPRegexp.h:110
void ResetGlobalState()
Reset state of global match.
Definition TPRegexp.cxx:783
Int_t fNMatches
Definition TPRegexp.h:104
void * fAddressOfLastString
Definition TPRegexp.h:108
virtual void Print(Option_t *option="")
Print the regular expression and modifier options.
Int_t Split(const TString &s, Int_t maxfields=0)
Splits into at most maxfields.
Definition TPRegexp.cxx:852
TPMERegexp()
Default constructor. This regexp will match an empty string.
Definition TPRegexp.cxx:675
Int_t Substitute(TString &s, const TString &r, Bool_t doDollarSubst=kTRUE)
Substitute matching part of s with r, dollar back-ref substitution is performed if doDollarSubst is t...
Definition TPRegexp.cxx:961
TString operator[](Int_t)
Returns the sub-string from the internal fMarkers vector.
Int_t Match(const TString &s, UInt_t start=0)
Runs a match on s against the regex 'this' was created with.
Definition TPRegexp.cxx:795
void Reset(const TString &s, const TString &opts="", Int_t nMatchMax=-1)
Reset the pattern and options.
Definition TPRegexp.cxx:740
Int_t fNMaxMatches
Definition TPRegexp.h:103
TArrayI fMarkers
Definition TPRegexp.h:105
TString fLastStringMatched
Definition TPRegexp.h:107
void AssignGlobalState(const TPMERegexp &re)
Copy global-match state from 're; so that this regexp can continue parsing the string from where 're'...
Definition TPRegexp.cxx:771
TPRegexp()
Default ctor.
Definition TPRegexp.cxx:67
void Compile()
Compile the fPattern.
Definition TPRegexp.cxx:225
Int_t SubstituteInternal(TString &s, const TString &replace, Int_t start, Int_t nMaxMatch0, Bool_t doDollarSubst) const
Perform pattern substitution with optional back-ref replacement.
Definition TPRegexp.cxx:466
Bool_t IsValid() const
Returns true if underlying PCRE structure has been successfully generated via regexp compilation.
Definition TPRegexp.cxx:578
TString fPattern
Definition TPRegexp.h:46
TPRegexp & operator=(const TPRegexp &p)
Assignment operator.
Definition TPRegexp.cxx:113
UInt_t ParseMods(const TString &mods) const
Translate Perl modifier flags into pcre flags.
Definition TPRegexp.cxx:162
UInt_t fPCREOpts
Definition TPRegexp.h:48
PCREPriv_t * fPriv
Definition TPRegexp.h:47
Int_t Match(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10, TArrayI *pos=nullptr)
The number of matches is returned, this equals the full match + sub-pattern matches.
Definition TPRegexp.cxx:409
TObjArray * MatchS(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
Returns a TObjArray of matched substrings as TObjString's.
Definition TPRegexp.cxx:440
static Bool_t fgThrowAtCompileError
Definition TPRegexp.h:50
Int_t ReplaceSubs(const TString &s, TString &final, const TString &replacePattern, Int_t *ovec, Int_t nmatch) const
Returns the number of expanded '$' constructs.
Definition TPRegexp.cxx:299
virtual ~TPRegexp()
Cleanup.
Definition TPRegexp.cxx:96
Int_t Substitute(TString &s, const TString &replace, const TString &mods="", Int_t start=0, Int_t nMatchMax=10)
Substitute replaces the string s by a new string in which matching patterns are replaced by the repla...
Definition TPRegexp.cxx:560
Int_t MatchInternal(const TString &s, Int_t start, Int_t nMaxMatch, TArrayI *pos=nullptr) const
Perform the actual matching - protected method.
Definition TPRegexp.cxx:355
TString GetModifiers() const
Return PCRE modifier options as string.
Definition TPRegexp.cxx:207
static Bool_t GetThrowAtCompileError()
Get value of static flag controlling whether exception should be thrown upon an error during regular ...
Definition TPRegexp.cxx:587
void Optimize()
Send the pattern through the optimizer.
Definition TPRegexp.cxx:276
@ kPCRE_GLOBAL
Definition TPRegexp.h:40
@ kPCRE_OPTIMIZE
Definition TPRegexp.h:41
@ kPCRE_DEBUG_MSGS
Definition TPRegexp.h:42
@ kPCRE_INTMASK
Definition TPRegexp.h:43
static void SetThrowAtCompileError(Bool_t throwp)
Set static flag controlling whether exception should be thrown upon an error during regular expressio...
Definition TPRegexp.cxx:596
TStringToken(const TString &fullStr, const TString &splitRe, Bool_t retVoid=kFALSE)
Constructor.
TPRegexp fSplitRe
Definition TPRegexp.h:147
const TString fFullStr
Definition TPRegexp.h:146
Bool_t NextToken()
Get the next token, it is stored in this TString.
Bool_t fReturnVoid
Definition TPRegexp.h:148
Basic string class.
Definition TString.h:138
Ssiz_t Length() const
Definition TString.h:425
friend class TSubString
Definition TString.h:141
char & operator()(Ssiz_t i)
Definition TString.h:732
void ToLower()
Change string to lower-case.
Definition TString.cxx:1189
const char * Data() const
Definition TString.h:384
TString & operator=(char s)
Assign character c to TString.
Definition TString.cxx:308
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2384
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition TString.h:640
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:659
A zero length substring is legal.
Definition TString.h:84
Double_t x[n]
Definition legend1.C:17
pcre_extra * fPCREExtra
Definition TPRegexp.cxx:55
pcre * fPCRE
Definition TPRegexp.cxx:54
TMarker m
Definition textangle.C:8