Logo ROOT   6.18/05
Reference Guide
TPRegexp.cxx
Go to the documentation of this file.
1// @(#)root/base:$Id$
2// Author: Eddy Offermann 24/06/05
3
4/*************************************************************************
5 * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/* \class TPRegexp
13\ingroup Base
14
15C++ Wrapper for the "Perl Compatible Regular Expressions" library
16 The PCRE lib can be found at: http://www.pcre.org/
17
18Extensive documentation about Regular expressions in Perl can be
19found at : http://perldoc.perl.org/perlre.html
20*/
21
22#include "Riostream.h"
23#include "TPRegexp.h"
24#include "TObjArray.h"
25#include "TObjString.h"
26#include "TError.h"
27
28#ifdef R__WIN32
29#define PCRE_STATIC
30#endif
31#include <pcre.h>
32
33#include <vector>
34#include <stdexcept>
35
36struct PCREPriv_t {
37 pcre *fPCRE;
38 pcre_extra *fPCREExtra;
39
40 PCREPriv_t() { fPCRE = 0; fPCREExtra = 0; }
41};
42
43
45
47
48////////////////////////////////////////////////////////////////////////////////
49/// Default ctor.
50
52{
53 fPriv = new PCREPriv_t;
54 fPCREOpts = 0;
55}
56
57////////////////////////////////////////////////////////////////////////////////
58/// Create and initialize with pat.
59
61{
62 fPattern = pat;
63 fPriv = new PCREPriv_t;
64 fPCREOpts = 0;
65}
66
67////////////////////////////////////////////////////////////////////////////////
68/// Copy ctor.
69
71{
73 fPriv = new PCREPriv_t;
75}
76
77////////////////////////////////////////////////////////////////////////////////
78/// Cleanup.
79
81{
82 if (fPriv->fPCRE)
83 pcre_free(fPriv->fPCRE);
84 if (fPriv->fPCREExtra)
85 pcre_free(fPriv->fPCREExtra);
86 delete fPriv;
87}
88
89////////////////////////////////////////////////////////////////////////////////
90/// Assignment operator.
91
93{
94 if (this != &p) {
96 if (fPriv->fPCRE)
97 pcre_free(fPriv->fPCRE);
98 fPriv->fPCRE = 0;
99 if (fPriv->fPCREExtra)
100 pcre_free(fPriv->fPCREExtra);
101 fPriv->fPCREExtra = 0;
103 }
104 return *this;
105}
106
107////////////////////////////////////////////////////////////////////////////////
108/// Translate Perl modifier flags into pcre flags.
109/// The supported modStr characters are: g, i, m, o, s, x, and the
110/// special d for debug. The meaning of the letters is:
111/// - m
112/// Treat string as multiple lines. That is, change "^" and "$" from
113/// matching the start or end of the string to matching the start or
114/// end of any line anywhere within the string.
115/// - s
116/// Treat string as single line. That is, change "." to match any
117/// character whatsoever, even a newline, which normally it would not match.
118/// Used together, as /ms, they let the "." match any character whatsoever,
119/// while still allowing "^" and "$" to match, respectively, just after and
120/// just before newlines within the string.
121/// - i
122/// Do case-insensitive pattern matching.
123/// - x
124/// Extend your pattern's legibility by permitting whitespace and comments.
125/// - p
126/// Preserve the string matched such that ${^PREMATCH}, ${^MATCH},
127/// and ${^POSTMATCH} are available for use after matching.
128/// - g and c
129/// Global matching, and keep the Current position after failed matching.
130/// Unlike i, m, s and x, these two flags affect the way the regex is used
131/// rather than the regex itself. See Using regular expressions in Perl in
132/// perlretut for further explanation of the g and c modifiers.
133/// For more detail see: http://perldoc.perl.org/perlre.html#Modifiers.
134
136{
137 UInt_t opts = 0;
138
139 if (modStr.Length() <= 0)
140 return fPCREOpts;
141
142 //translate perl flags into pcre flags
143 const char *m = modStr;
144 while (*m) {
145 switch (*m) {
146 case 'g':
147 opts |= kPCRE_GLOBAL;
148 break;
149 case 'i':
150 opts |= PCRE_CASELESS;
151 break;
152 case 'm':
153 opts |= PCRE_MULTILINE;
154 break;
155 case 'o':
156 opts |= kPCRE_OPTIMIZE;
157 break;
158 case 's':
159 opts |= PCRE_DOTALL;
160 break;
161 case 'x':
162 opts |= PCRE_EXTENDED;
163 break;
164 case 'd': // special flag to enable debug printing (not Perl compat.)
165 opts |= kPCRE_DEBUG_MSGS;
166 break;
167 default:
168 Error("ParseMods", "illegal pattern modifier: %c", *m);
169 opts = 0;
170 }
171 ++m;
172 }
173 return opts;
174}
175
176////////////////////////////////////////////////////////////////////////////////
177/// Return PCRE modifier options as string.
178/// For meaning of mods see ParseMods().
179
181{
182 TString ret;
183
184 if (fPCREOpts & kPCRE_GLOBAL) ret += 'g';
185 if (fPCREOpts & PCRE_CASELESS) ret += 'i';
186 if (fPCREOpts & PCRE_MULTILINE) ret += 'm';
187 if (fPCREOpts & PCRE_DOTALL) ret += 's';
188 if (fPCREOpts & PCRE_EXTENDED) ret += 'x';
189 if (fPCREOpts & kPCRE_OPTIMIZE) ret += 'o';
190 if (fPCREOpts & kPCRE_DEBUG_MSGS) ret += 'd';
191
192 return ret;
193}
194
195////////////////////////////////////////////////////////////////////////////////
196/// Compile the fPattern.
197
199{
200 if (fPriv->fPCRE)
201 pcre_free(fPriv->fPCRE);
202
204 Info("Compile", "PREGEX compiling %s", fPattern.Data());
205
206 const char *errstr;
207 Int_t patIndex;
208 fPriv->fPCRE = pcre_compile(fPattern.Data(), fPCREOpts & kPCRE_INTMASK,
209 &errstr, &patIndex, 0);
210
211 if (!fPriv->fPCRE) {
213 throw std::runtime_error
214 (TString::Format("TPRegexp::Compile() compilation of TPRegexp(%s) failed at: %d because %s",
215 fPattern.Data(), patIndex, errstr).Data());
216 } else {
217 Error("Compile", "compilation of TPRegexp(%s) failed at: %d because %s",
218 fPattern.Data(), patIndex, errstr);
219 return;
220 }
221 }
222
223 if (fPriv->fPCREExtra || (fPCREOpts & kPCRE_OPTIMIZE))
224 Optimize();
225}
226
227////////////////////////////////////////////////////////////////////////////////
228/// Send the pattern through the optimizer.
229
231{
232 if (fPriv->fPCREExtra)
233 pcre_free(fPriv->fPCREExtra);
234
236 Info("Optimize", "PREGEX studying %s", fPattern.Data());
237
238 const char *errstr;
239 // pcre_study allows less options - see pcre_internal.h PUBLIC_STUDY_OPTIONS.
240 fPriv->fPCREExtra = pcre_study(fPriv->fPCRE, 0, &errstr);
241
242 if (!fPriv->fPCREExtra && errstr) {
243 Error("Optimize", "Optimization of TPRegexp(%s) failed: %s",
244 fPattern.Data(), errstr);
245 }
246}
247
248////////////////////////////////////////////////////////////////////////////////
249/// Returns the number of expanded '$' constructs.
250
252 const TString &replacePattern,
253 Int_t *offVec, Int_t nrMatch) const
254{
255 Int_t nrSubs = 0;
256 const char *p = replacePattern;
257
258 Int_t state = 0;
259 Int_t subnum = 0;
260 while (state != -1) {
261 switch (state) {
262 case 0:
263 if (!*p) {
264 state = -1;
265 break;
266 }
267 if (*p == '$') {
268 state = 1;
269 subnum = 0;
270 if (p[1] == '&') {
271 p++;
272 if (isdigit(p[1]))
273 p++;
274 } else if (!isdigit(p[1])) {
275 Error("ReplaceSubs", "badly formed replacement pattern: %s",
276 replacePattern.Data());
277 }
278 } else
279 final += *p;
280 break;
281 case 1:
282 if (isdigit(*p)) {
283 subnum *= 10;
284 subnum += (*p)-'0';
285 } else {
287 Info("ReplaceSubs", "PREGEX appending substr #%d", subnum);
288 if (subnum < 0 || subnum > nrMatch-1) {
289 Error("ReplaceSubs","bad string number: %d",subnum);
290 } else {
291 const TString subStr = s(offVec[2*subnum],offVec[2*subnum+1]-offVec[2*subnum]);
292 final += subStr;
293 nrSubs++;
294 }
295 state = 0;
296 continue; // send char to start state
297 }
298 }
299 p++;
300 }
301 return nrSubs;
302}
303
304////////////////////////////////////////////////////////////////////////////////
305/// Perform the actual matching - protected method.
306
308 Int_t nMaxMatch, TArrayI *pos) const
309{
310 Int_t *offVec = new Int_t[3*nMaxMatch];
311 // pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS.
312 Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
313 s.Length(), start, 0,
314 offVec, 3*nMaxMatch);
315
316 if (nrMatch == PCRE_ERROR_NOMATCH)
317 nrMatch = 0;
318 else if (nrMatch <= 0) {
319 Error("Match","pcre_exec error = %d", nrMatch);
320 delete [] offVec;
321 return 0;
322 }
323
324 if (pos)
325 pos->Set(2*nrMatch, offVec);
326 delete [] offVec;
327
328 return nrMatch;
329}
330
331////////////////////////////////////////////////////////////////////////////////
332/// The number of matches is returned, this equals the full match +
333/// sub-pattern matches.
334/// nMaxMatch is the maximum allowed number of matches.
335/// pos contains the string indices of the matches. Its usage is
336/// shown in the routine MatchS.
337/// For meaning of mods see ParseMods().
338
339Int_t TPRegexp::Match(const TString &s, const TString &mods, Int_t start,
340 Int_t nMaxMatch, TArrayI *pos)
341{
342 UInt_t opts = ParseMods(mods);
343
344 if (!fPriv->fPCRE || opts != fPCREOpts) {
345 fPCREOpts = opts;
346 Compile();
347 }
348
349 return MatchInternal(s, start, nMaxMatch, pos);
350}
351
352
353////////////////////////////////////////////////////////////////////////////////
354/// Returns a TObjArray of matched substrings as TObjString's.
355/// The TObjArray is owner of the objects and must be deleted by the user.
356/// The first entry is the full matched pattern, followed by the sub-patterns.
357/// If a pattern was not matched, it will return an empty substring:
358/// ~~~ {.cpp}
359/// TObjArray *subStrL = TPRegexp("(a|(z))(bc)").MatchS("abc");
360/// for (Int_t i = 0; i < subStrL->GetLast()+1; i++) {
361/// const TString subStr = ((TObjString *)subStrL->At(i))->GetString();
362/// std::cout << "\"" << subStr << "\" ";
363/// }
364/// std::cout << subStr << std::endl;
365/// ~~~
366/// produces: "abc" "a" "" "bc"
367///
368/// For meaning of mods see ParseMods().
369
371 Int_t start, Int_t nMaxMatch)
372{
373 TArrayI pos;
374 Int_t nrMatch = Match(s, mods, start, nMaxMatch, &pos);
375
376 TObjArray *subStrL = new TObjArray();
377 subStrL->SetOwner();
378
379 for (Int_t i = 0; i < nrMatch; i++) {
380 Int_t startp = pos[2*i];
381 Int_t stopp = pos[2*i+1];
382 if (startp >= 0 && stopp >= 0) {
383 const TString subStr = s(pos[2*i], pos[2*i+1]-pos[2*i]);
384 subStrL->Add(new TObjString(subStr));
385 } else
386 subStrL->Add(new TObjString());
387 }
388
389 return subStrL;
390}
391
392////////////////////////////////////////////////////////////////////////////////
393/// Perform pattern substitution with optional back-ref replacement
394/// - protected method.
395
397 Int_t start, Int_t nMaxMatch,
398 Bool_t doDollarSubst) const
399{
400 Int_t *offVec = new Int_t[3*nMaxMatch];
401
402 TString final;
403 Int_t nrSubs = 0;
404 Int_t offset = start;
405 Int_t last = 0;
406
407 while (kTRUE) {
408
409 // find next matching subs
410 // pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS.
411 Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
412 s.Length(), offset, 0,
413 offVec, 3*nMaxMatch);
414
415 if (nrMatch == PCRE_ERROR_NOMATCH) {
416 nrMatch = 0;
417 break;
418 } else if (nrMatch <= 0) {
419 Error("Substitute", "pcre_exec error = %d", nrMatch);
420 break;
421 }
422
423 // append anything previously unmatched, but not substituted
424 if (last <= offVec[0]) {
425 final += s(last,offVec[0]-last);
426 last = offVec[1];
427 }
428
429 // replace stuff in s
430 if (doDollarSubst) {
431 ReplaceSubs(s, final, replacePattern, offVec, nrMatch);
432 } else {
433 final += replacePattern;
434 }
435 ++nrSubs;
436
437 // if global gotta check match at every pos
438 if (!(fPCREOpts & kPCRE_GLOBAL))
439 break;
440
441 if (offVec[0] != offVec[1])
442 offset = offVec[1];
443 else {
444 // matched empty string
445 if (offVec[1] == s.Length())
446 break;
447 offset = offVec[1]+1;
448 }
449 }
450
451 delete [] offVec;
452
453 final += s(last,s.Length()-last);
454 s = final;
455
456 return nrSubs;
457}
458
459////////////////////////////////////////////////////////////////////////////////
460/// Substitute replaces the string s by a new string in which matching
461/// patterns are replaced by the replacePattern string. The number of
462/// substitutions are returned.
463/// ~~~ {.cpp}
464/// TString s("aap noot mies");
465/// const Int_t nrSub = TPRegexp("(\\w*) noot (\\w*)").Substitute(s,"$2 noot $1");
466/// std::cout << nrSub << " \"" << s << "\"" <<std::endl;
467/// ~~~
468/// produces: 2 "mies noot aap"
469///
470/// For meaning of mods see ParseMods().
471
473 const TString &mods, Int_t start, Int_t nMaxMatch)
474{
475 UInt_t opts = ParseMods(mods);
476
477 if (!fPriv->fPCRE || opts != fPCREOpts) {
478 fPCREOpts = opts;
479 Compile();
480 }
481
482 return SubstituteInternal(s, replacePattern, start, nMaxMatch, kTRUE);
483}
484
485
486////////////////////////////////////////////////////////////////////////////////
487/// Returns true if underlying PCRE structure has been successfully
488/// generated via regexp compilation.
489
491{
492 return fPriv->fPCRE != 0;
493}
494
495////////////////////////////////////////////////////////////////////////////////
496/// Get value of static flag controlling whether exception should be thrown upon an
497/// error during regular expression compilation by the PCRE engine.
498
500{
502}
503
504////////////////////////////////////////////////////////////////////////////////
505/// Set static flag controlling whether exception should be thrown upon an
506/// error during regular expression compilation by the PCRE engine.
507
509{
510 fgThrowAtCompileError = throwp;
511}
512
513////////////////////////////////////////////////////////////////////////////////
514// //
515// TString member functions, put here so the linker will include //
516// them only if regular expressions are used. //
517// //
518////////////////////////////////////////////////////////////////////////////////
519
520////////////////////////////////////////////////////////////////////////////////
521/// Find the first occurrence of the regexp in string and return the position.
522/// Start is the offset at which the search should start.
523
525{
526 TArrayI pos;
527 Int_t nrMatch = r.Match(*this,"",start,10,&pos);
528 if (nrMatch > 0)
529 return pos[0];
530 else
531 return -1;
532}
533
534////////////////////////////////////////////////////////////////////////////////
535/// Find the first occurrence of the regexp in string and return the position.
536/// Extent is length of the matched string and start is the offset at which
537/// the matching should start.
538
540{
541 TArrayI pos;
542 const Int_t nrMatch = r.Match(*this,"",start,10,&pos);
543 if (nrMatch > 0) {
544 *extent = pos[1]-pos[0];
545 return pos[0];
546 } else {
547 *extent = 0;
548 return -1;
549 }
550}
551
552////////////////////////////////////////////////////////////////////////////////
553/// Return the substring found by applying the regexp starting at start.
554
556{
557 Ssiz_t len;
558 Ssiz_t begin = Index(r, &len, start);
559 return TSubString(*this, begin, len);
560}
561
562////////////////////////////////////////////////////////////////////////////////
563/// Return the substring found by applying the regexp.
564
566{
567 return (*this)(r, 0);
568}
569
570
571/** \class TPMERegexp
572
573Wrapper for PCRE library (Perl Compatible Regular Expressions).
574Based on PME - PCRE Made Easy by Zachary Hansen.
575
576Supports main Perl operations using regular expressions (Match,
577Substitute and Split). To retrieve the results one can simply use
578operator[] returning a TString.
579
580See $ROOTSYS/tutorials/regexp_pme.C for examples.
581*/
582
584
585////////////////////////////////////////////////////////////////////////////////
586/// Default constructor. This regexp will match an empty string.
587
589 TPRegexp(),
590 fNMaxMatches(10),
591 fNMatches(0),
592 fAddressOfLastString(0),
593 fLastGlobalPosition(0)
594{
595 Compile();
596}
597
598////////////////////////////////////////////////////////////////////////////////
599/// Constructor.
600///
601/// \param[in] s string to compile into regular expression
602/// \param[in] opts perl-style character flags to be set on TPME object
603/// \param[in] nMatchMax maximum number of matches
604
605TPMERegexp::TPMERegexp(const TString& s, const TString& opts, Int_t nMatchMax) :
606 TPRegexp(s),
607 fNMaxMatches(nMatchMax),
608 fNMatches(0),
609 fAddressOfLastString(0),
610 fLastGlobalPosition(0)
611{
612 fPCREOpts = ParseMods(opts);
613 Compile();
614}
615
616////////////////////////////////////////////////////////////////////////////////
617/// Constructor.
618///
619/// \param[in] s string to compile into regular expression
620/// \param[in] opts PCRE-style option flags to be set on TPME object
621/// \param[in] nMatchMax maximum number of matches
622
623TPMERegexp::TPMERegexp(const TString& s, UInt_t opts, Int_t nMatchMax) :
624 TPRegexp(s),
625 fNMaxMatches(nMatchMax),
626 fNMatches(0),
627 fAddressOfLastString(0),
628 fLastGlobalPosition(0)
629{
630 fPCREOpts = opts;
631 Compile();
632}
633
634////////////////////////////////////////////////////////////////////////////////
635/// Copy constructor.
636/// Only PCRE specifics are copied, not last-match or global-match
637/// information.
638
640 TPRegexp(r),
641 fNMaxMatches(r.fNMaxMatches),
642 fNMatches(0),
643 fAddressOfLastString(0),
644 fLastGlobalPosition(0)
645{
646 Compile();
647}
648
649////////////////////////////////////////////////////////////////////////////////
650/// Reset the pattern and options.
651/// If 'nMatchMax' other than -1 (the default) is passed, it is also set.
652
653void TPMERegexp::Reset(const TString& s, const TString& opts, Int_t nMatchMax)
654{
655 Reset(s, ParseMods(opts), nMatchMax);
656}
657
658////////////////////////////////////////////////////////////////////////////////
659/// Reset the pattern and options.
660/// If 'nMatchMax' other than -1 (the default) is passed, it is also set.
661
662void TPMERegexp::Reset(const TString& s, UInt_t opts, Int_t nMatchMax)
663{
664 fPattern = s;
665 fPCREOpts = opts;
666 Compile();
667
668 if (nMatchMax != -1)
669 fNMatches = nMatchMax;
670 fNMatches = 0;
672}
673
674////////////////////////////////////////////////////////////////////////////////
675/// Copy global-match state from 're; so that this regexp can continue
676/// parsing the string from where 're' left off.
677///
678/// Alternatively, GetGlobalPosition() get be used to retrieve the
679/// last match position so that it can passed to Match().
680///
681/// Ideally, as it is done in PERL, the last match position would be
682/// stored in the TString itself.
683
685{
688}
689
690////////////////////////////////////////////////////////////////////////////////
691/// Reset state of global match.
692/// This happens automatically when a new string is passed for matching.
693/// But be carefull, as the address of last TString object is used
694/// to make this decision.
695
697{
699}
700
701////////////////////////////////////////////////////////////////////////////////
702/// Runs a match on s against the regex 'this' was created with.
703///
704/// \param[in] s string to match against
705/// \param[in] start offset at which to start matching
706/// \return number of matches found
707
709{
710 // If we got a new string, reset the global position counter.
711 if (fAddressOfLastString != (void*) &s) {
713 }
714
715 if (fPCREOpts & kPCRE_GLOBAL) {
716 start += fLastGlobalPosition;
717 }
718
719 //fprintf(stderr, "string: '%s' length: %d offset: %d\n", s.Data(), s.length(), offset);
721
722 //fprintf(stderr, "MatchInternal_exec result = %d\n", fNMatches);
723
725 fAddressOfLastString = (void*) &s;
726
727 if (fPCREOpts & kPCRE_GLOBAL) {
728 if (fNMatches == PCRE_ERROR_NOMATCH) {
729 // fprintf(stderr, "TPME RESETTING: reset for no match\n");
730 fLastGlobalPosition = 0; // reset the position for next match (perl does this)
731 } else if (fNMatches > 0) {
732 // fprintf(stderr, "TPME RESETTING: setting to %d\n", marks[0].second);
733 fLastGlobalPosition = fMarkers[1]; // set to the end of the match
734 } else {
735 // fprintf(stderr, "TPME RESETTING: reset for no unknown\n");
737 }
738 }
739
740 return fNMatches;
741}
742
743////////////////////////////////////////////////////////////////////////////////
744/// Splits into at most maxfields. If maxfields is unspecified or
745/// 0, trailing empty matches are discarded. If maxfields is
746/// positive, no more than maxfields fields will be returned and
747/// trailing empty matches are preserved. If maxfields is empty,
748/// all fields (including trailing empty ones) are returned. This
749/// *should* be the same as the perl behaviour.
750///
751/// If pattern produces sub-matches, these are also stored in
752/// the result.
753///
754/// A pattern matching the null string will split the value of EXPR
755/// into separate characters at each point it matches that way.
756///
757/// \param[in] s string to split
758/// \param[in] maxfields maximum number of fields to be split out. 0 means
759/// split all fields, but discard any trailing empty bits.
760/// Negative means split all fields and keep trailing empty bits.
761/// Positive means keep up to N fields including any empty fields
762/// less than N. Anything remaining is in the last field.
763/// \return number of fields found
764
766{
767 typedef std::pair<int, int> MarkerLoc_t;
768 typedef std::vector<MarkerLoc_t> MarkerLocVec_t;
769
770 // stores the marks for the split
771 MarkerLocVec_t oMarks;
772
773 // this is a list of current trailing empty matches if maxfields is
774 // unspecified or 0. If there is stuff in it and a non-empty match
775 // is found, then everything in here is pushed into oMarks and then
776 // the new match is pushed on. If the end of the string is reached
777 // and there are empty matches in here, they are discarded.
778 MarkerLocVec_t oCurrentTrailingEmpties;
779
780 Int_t nOffset = 0;
781 Int_t nMatchesFound = 0;
782
783 // while we are still finding matches and maxfields is 0 or negative
784 // (meaning we get all matches), or we haven't gotten to the number
785 // of specified matches
786 Int_t matchRes;
787 while ((matchRes = Match(s, nOffset)) &&
788 ((maxfields < 1) || nMatchesFound < maxfields)) {
789 ++nMatchesFound;
790
791 if (fMarkers[1] - fMarkers[0] == 0) {
792 oMarks.push_back(MarkerLoc_t(nOffset, nOffset + 1));
793 ++nOffset;
794 if (nOffset >= s.Length())
795 break;
796 else
797 continue;
798 }
799
800 // match can be empty
801 if (nOffset != fMarkers[0]) {
802 if (!oCurrentTrailingEmpties.empty()) {
803 oMarks.insert(oMarks.end(),
804 oCurrentTrailingEmpties.begin(),
805 oCurrentTrailingEmpties.end());
806 oCurrentTrailingEmpties.clear();
807 }
808 oMarks.push_back(MarkerLoc_t(nOffset, fMarkers[0]));
809 } else {
810 // empty match
811 if (maxfields == 0) {
812 // store for possible later inclusion
813 oCurrentTrailingEmpties.push_back(MarkerLoc_t(nOffset, nOffset));
814 } else {
815 oMarks.push_back(MarkerLoc_t(nOffset, nOffset));
816 }
817 }
818
819 nOffset = fMarkers[1];
820
821 if (matchRes > 1) {
822 for (Int_t i = 1; i < matchRes; ++i)
823 oMarks.push_back(MarkerLoc_t(fMarkers[2*i], fMarkers[2*i + 1]));
824 }
825 }
826
827
828 // if there were no matches found, push the whole thing on
829 if (nMatchesFound == 0) {
830 oMarks.push_back(MarkerLoc_t(0, s.Length()));
831 }
832 // if we ran out of matches, then append the rest of the string
833 // onto the end of the last split field
834 else if (maxfields > 0 && nMatchesFound >= maxfields) {
835 oMarks[oMarks.size() - 1].second = s.Length();
836 }
837 // else we have to add another entry for the end of the string
838 else {
839 Bool_t last_empty = (nOffset == s.Length());
840 if (!last_empty || maxfields < 0) {
841 if (!oCurrentTrailingEmpties.empty()) {
842 oMarks.insert(oMarks.end(),
843 oCurrentTrailingEmpties.begin(),
844 oCurrentTrailingEmpties.end());
845 }
846 oMarks.push_back(MarkerLoc_t(nOffset, s.Length()));
847 }
848 }
849
850 fNMatches = oMarks.size();
852 for (Int_t i = 0; i < fNMatches; ++i) {
853 fMarkers[2*i] = oMarks[i].first;
854 fMarkers[2*i + 1] = oMarks[i].second;
855 }
856
857 // fprintf(stderr, "match returning %d\n", fNMatches);
858 return fNMatches;
859}
860
861////////////////////////////////////////////////////////////////////////////////
862/// Substitute matching part of s with r, dollar back-ref
863/// substitution is performed if doDollarSubst is true (default).
864/// Returns the number of substitutions made.
865///
866/// After the substitution, another pass is made over the resulting
867/// string and the following special tokens are interpreted:
868/// - `\l` lowercase next char,
869/// - `\u` uppercase next char,
870/// - `\L` lowercase till `\E`,
871/// - `\U` uppercase till `\E`, and
872/// - `\E` end case modification.
873
875{
876 Int_t cnt = SubstituteInternal(s, r, 0, fNMaxMatches, doDollarSubst);
877
878 TString ret;
879 Int_t state = 0;
880 Ssiz_t pos = 0, len = s.Length();
881 const Char_t *data = s.Data();
882 while (pos < len) {
883 Char_t c = data[pos];
884 if (c == '\\') {
885 c = data[pos+1]; // Rely on string-data being null-terminated.
886 switch (c) {
887 case 0 : ret += '\\'; break;
888 case 'l': state = 1; break;
889 case 'u': state = 2; break;
890 case 'L': state = 3; break;
891 case 'U': state = 4; break;
892 case 'E': state = 0; break;
893 default : ret += '\\'; ret += c; break;
894 }
895 pos += 2;
896 } else {
897 switch (state) {
898 case 0: ret += c; break;
899 case 1: ret += (Char_t) tolower(c); state = 0; break;
900 case 2: ret += (Char_t) toupper(c); state = 0; break;
901 case 3: ret += (Char_t) tolower(c); break;
902 case 4: ret += (Char_t) toupper(c); break;
903 default: Error("TPMERegexp::Substitute", "invalid state.");
904 }
905 ++pos;
906 }
907 }
908
909 s = ret;
910
911 return cnt;
912}
913
914////////////////////////////////////////////////////////////////////////////////
915/// Returns the sub-string from the internal fMarkers vector.
916/// Requires having run match or split first.
917
919{
920 if (index >= fNMatches)
921 return "";
922
923 Int_t begin = fMarkers[2*index];
924 Int_t end = fMarkers[2*index + 1];
925 return fLastStringMatched(begin, end-begin);
926}
927
928////////////////////////////////////////////////////////////////////////////////
929/// Print the regular expression and modifier options.
930/// If 'option' contains "all", prints also last string match and
931/// match results.
932
934{
935 TString opt = option;
936 opt.ToLower();
937
938 Printf("Regexp='%s', Opts='%s'", fPattern.Data(), GetModifiers().Data());
939 if (opt.Contains("all")) {
940 Printf(" last string='%s'", fLastStringMatched.Data());
941 Printf(" number of matches = %d", fNMatches);
942 for (Int_t i=0; i<fNMatches; ++i)
943 Printf(" %d - %s", i, operator[](i).Data());
944 }
945}
946
947
948/** \class TStringToken
949Provides iteration through tokens of a given string.
950
951 - fFullStr stores the string to be split. It is never modified.
952 - fSplitRe is the perl-re that is used to separate the tokens.
953 - fReturnVoid if true, empty strings will be returned.
954
955Current token is stored in the TString base-class.
956During construction no match is done, use NextToken() to get the first
957and all subsequent tokens.
958*/
959
961
962////////////////////////////////////////////////////////////////////////////////
963/// Constructor.
964
965TStringToken::TStringToken(const TString& fullStr, const TString& splitRe, Bool_t retVoid) :
966 fFullStr (fullStr),
967 fSplitRe (splitRe),
968 fReturnVoid (retVoid),
969 fPos (0)
970{
971}
972
973////////////////////////////////////////////////////////////////////////////////
974/// Get the next token, it is stored in this TString.
975/// Returns true if new token is available, false otherwise.
976
978{
979 TArrayI x;
980 while (fPos < fFullStr.Length()) {
981 if (fSplitRe.Match(fFullStr, "", fPos, 2, &x)) {
983 fPos = x[1];
984 } else {
986 fPos = fFullStr.Length() + 1;
987 }
988 if (Length() || fReturnVoid)
989 return kTRUE;
990 }
991
992 // Special case: void-strings are requested and the full-string
993 // ends with the separator. Thus we return another empty string.
994 if (fPos == fFullStr.Length() && fReturnVoid) {
996 fPos = fFullStr.Length() + 1;
997 return kTRUE;
998 }
999
1000 return kFALSE;
1001}
ROOT::R::TRInterface & r
Definition: Object.C:4
#define c(i)
Definition: RSha256.hxx:101
int Int_t
Definition: RtypesCore.h:41
int Ssiz_t
Definition: RtypesCore.h:63
char Char_t
Definition: RtypesCore.h:29
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kTRUE
Definition: RtypesCore.h:87
const char Option_t
Definition: RtypesCore.h:62
#define ClassImp(name)
Definition: Rtypes.h:365
void Info(const char *location, const char *msgfmt,...)
void Error(const char *location, const char *msgfmt,...)
void Printf(const char *fmt,...)
Array of integers (32 bits per element).
Definition: TArrayI.h:27
void Set(Int_t n)
Set size of this array to n ints.
Definition: TArrayI.cxx:105
virtual void SetOwner(Bool_t enable=kTRUE)
Set whether this collection is the owner (enable==true) of its content.
An array of TObjects.
Definition: TObjArray.h:37
void Add(TObject *obj)
Definition: TObjArray.h:74
Collectable string class.
Definition: TObjString.h:28
Wrapper for PCRE library (Perl Compatible Regular Expressions).
Definition: TPRegexp.h:97
Int_t fLastGlobalPosition
Definition: TPRegexp.h:110
void ResetGlobalState()
Reset state of global match.
Definition: TPRegexp.cxx:696
Int_t fNMatches
Definition: TPRegexp.h:104
void * fAddressOfLastString
Definition: TPRegexp.h:108
virtual void Print(Option_t *option="")
Print the regular expression and modifier options.
Definition: TPRegexp.cxx:933
Int_t Split(const TString &s, Int_t maxfields=0)
Splits into at most maxfields.
Definition: TPRegexp.cxx:765
TPMERegexp()
Default constructor. This regexp will match an empty string.
Definition: TPRegexp.cxx:588
Int_t Substitute(TString &s, const TString &r, Bool_t doDollarSubst=kTRUE)
Substitute matching part of s with r, dollar back-ref substitution is performed if doDollarSubst is t...
Definition: TPRegexp.cxx:874
TString operator[](Int_t)
Returns the sub-string from the internal fMarkers vector.
Definition: TPRegexp.cxx:918
Int_t Match(const TString &s, UInt_t start=0)
Runs a match on s against the regex 'this' was created with.
Definition: TPRegexp.cxx:708
void Reset(const TString &s, const TString &opts="", Int_t nMatchMax=-1)
Reset the pattern and options.
Definition: TPRegexp.cxx:653
Int_t fNMaxMatches
Definition: TPRegexp.h:103
TArrayI fMarkers
Definition: TPRegexp.h:105
TString fLastStringMatched
Definition: TPRegexp.h:107
void AssignGlobalState(const TPMERegexp &re)
Copy global-match state from 're; so that this regexp can continue parsing the string from where 're'...
Definition: TPRegexp.cxx:684
TPRegexp()
Default ctor.
Definition: TPRegexp.cxx:51
void Compile()
Compile the fPattern.
Definition: TPRegexp.cxx:198
Int_t SubstituteInternal(TString &s, const TString &replace, Int_t start, Int_t nMaxMatch0, Bool_t doDollarSubst) const
Perform pattern substitution with optional back-ref replacement.
Definition: TPRegexp.cxx:396
Bool_t IsValid() const
Returns true if underlying PCRE structure has been successfully generated via regexp compilation.
Definition: TPRegexp.cxx:490
TString fPattern
Definition: TPRegexp.h:46
TPRegexp & operator=(const TPRegexp &p)
Assignment operator.
Definition: TPRegexp.cxx:92
UInt_t ParseMods(const TString &mods) const
Translate Perl modifier flags into pcre flags.
Definition: TPRegexp.cxx:135
UInt_t fPCREOpts
Definition: TPRegexp.h:48
PCREPriv_t * fPriv
Definition: TPRegexp.h:47
Int_t Match(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10, TArrayI *pos=0)
The number of matches is returned, this equals the full match + sub-pattern matches.
Definition: TPRegexp.cxx:339
Int_t MatchInternal(const TString &s, Int_t start, Int_t nMaxMatch, TArrayI *pos=0) const
Perform the actual matching - protected method.
Definition: TPRegexp.cxx:307
TObjArray * MatchS(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
Returns a TObjArray of matched substrings as TObjString's.
Definition: TPRegexp.cxx:370
static Bool_t fgThrowAtCompileError
Definition: TPRegexp.h:50
Int_t ReplaceSubs(const TString &s, TString &final, const TString &replacePattern, Int_t *ovec, Int_t nmatch) const
Returns the number of expanded '$' constructs.
Definition: TPRegexp.cxx:251
virtual ~TPRegexp()
Cleanup.
Definition: TPRegexp.cxx:80
Int_t Substitute(TString &s, const TString &replace, const TString &mods="", Int_t start=0, Int_t nMatchMax=10)
Substitute replaces the string s by a new string in which matching patterns are replaced by the repla...
Definition: TPRegexp.cxx:472
TString GetModifiers() const
Return PCRE modifier options as string.
Definition: TPRegexp.cxx:180
static Bool_t GetThrowAtCompileError()
Get value of static flag controlling whether exception should be thrown upon an error during regular ...
Definition: TPRegexp.cxx:499
void Optimize()
Send the pattern through the optimizer.
Definition: TPRegexp.cxx:230
@ kPCRE_GLOBAL
Definition: TPRegexp.h:40
@ kPCRE_OPTIMIZE
Definition: TPRegexp.h:41
@ kPCRE_DEBUG_MSGS
Definition: TPRegexp.h:42
@ kPCRE_INTMASK
Definition: TPRegexp.h:43
static void SetThrowAtCompileError(Bool_t throwp)
Set static flag controlling whether exception should be thrown upon an error during regular expressio...
Definition: TPRegexp.cxx:508
Provides iteration through tokens of a given string.
Definition: TPRegexp.h:143
TStringToken(const TString &fullStr, const TString &splitRe, Bool_t retVoid=kFALSE)
Constructor.
Definition: TPRegexp.cxx:965
TPRegexp fSplitRe
Definition: TPRegexp.h:147
Int_t fPos
Definition: TPRegexp.h:149
const TString fFullStr
Definition: TPRegexp.h:146
Bool_t NextToken()
Get the next token, it is stored in this TString.
Definition: TPRegexp.cxx:977
Bool_t fReturnVoid
Definition: TPRegexp.h:148
Basic string class.
Definition: TString.h:131
Ssiz_t Length() const
Definition: TString.h:405
friend class TSubString
Definition: TString.h:134
char & operator()(Ssiz_t i)
Definition: TString.h:709
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1125
const char * Data() const
Definition: TString.h:364
TString & operator=(char s)
Assign character c to TString.
Definition: TString.cxx:267
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2311
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:634
A zero length substring is legal.
Definition: TString.h:77
Double_t x[n]
Definition: legend1.C:17
static constexpr double s
const char * cnt
Definition: TXMLSetup.cxx:74
auto * m
Definition: textangle.C:8