Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TPRegexp.cxx
Go to the documentation of this file.
1// @(#)root/base:$Id$
2// Author: Eddy Offermann 24/06/05
3
4/*************************************************************************
5 * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/* \class TPRegexp
13\ingroup Base
14
15C++ Wrapper for the "Perl Compatible Regular Expressions" library
16 The PCRE lib can be found at: http://www.pcre.org/
17
18Extensive documentation about Regular expressions in Perl can be
19found at : http://perldoc.perl.org/perlre.html
20*/
21
22#include <iostream>
23#include "TPRegexp.h"
24#include "TObjArray.h"
25#include "TObjString.h"
26#include "TError.h"
27
28#ifdef USE_PCRE2
29#ifdef R__WIN32
30#define PCRE2_STATIC
31#endif
32#define PCRE2_CODE_UNIT_WIDTH 8
33#include <pcre2.h>
34#define PCRE_CASELESS PCRE2_CASELESS
35#define PCRE_MULTILINE PCRE2_MULTILINE
36#define PCRE_DOTALL PCRE2_DOTALL
37#define PCRE_EXTENDED PCRE2_EXTENDED
38#define PCRE_ERROR_NOMATCH PCRE2_ERROR_NOMATCH
39#else
40#ifdef R__WIN32
41#define PCRE_STATIC
42#endif
43#include <pcre.h>
44#endif
45
46#include <vector>
47#include <stdexcept>
48
49struct PCREPriv_t {
50#ifdef USE_PCRE2
51 pcre2_code *fPCRE;
52 PCREPriv_t() { fPCRE = nullptr; }
53#else
54 pcre *fPCRE;
55 pcre_extra *fPCREExtra;
56 PCREPriv_t() { fPCRE = nullptr; fPCREExtra = nullptr; }
57#endif
58};
59
60
62
64
65////////////////////////////////////////////////////////////////////////////////
66/// Default ctor.
67
69{
70 fPriv = new PCREPriv_t;
71 fPCREOpts = 0;
72}
73
74////////////////////////////////////////////////////////////////////////////////
75/// Create and initialize with pat.
76
78{
79 fPattern = pat;
80 fPriv = new PCREPriv_t;
81 fPCREOpts = 0;
82}
83
84////////////////////////////////////////////////////////////////////////////////
85/// Copy ctor.
86
88{
89 fPattern = p.fPattern;
90 fPriv = new PCREPriv_t;
91 fPCREOpts = p.fPCREOpts;
92}
93
94////////////////////////////////////////////////////////////////////////////////
95/// Cleanup.
96
98{
99#ifdef USE_PCRE2
100 if (fPriv->fPCRE)
101 pcre2_code_free(fPriv->fPCRE);
102#else
103 if (fPriv->fPCRE)
104 pcre_free(fPriv->fPCRE);
105 if (fPriv->fPCREExtra)
106 pcre_free(fPriv->fPCREExtra);
107#endif
108 delete fPriv;
109}
110
111////////////////////////////////////////////////////////////////////////////////
112/// Assignment operator.
113
115{
116 if (this != &p) {
117 fPattern = p.fPattern;
118#ifdef USE_PCRE2
119 if (fPriv->fPCRE)
120 pcre2_code_free(fPriv->fPCRE);
121 fPriv->fPCRE = nullptr;
122#else
123 if (fPriv->fPCRE)
124 pcre_free(fPriv->fPCRE);
125 fPriv->fPCRE = nullptr;
126 if (fPriv->fPCREExtra)
127 pcre_free(fPriv->fPCREExtra);
128 fPriv->fPCREExtra = nullptr;
129#endif
130 fPCREOpts = p.fPCREOpts;
131 }
132 return *this;
133}
134
135////////////////////////////////////////////////////////////////////////////////
136/// Translate Perl modifier flags into pcre flags.
137/// The supported modStr characters are: g, i, m, o, s, x, and the
138/// special d for debug. The meaning of the letters is:
139/// - m
140/// Treat string as multiple lines. That is, change "^" and "$" from
141/// matching the start or end of the string to matching the start or
142/// end of any line anywhere within the string.
143/// - s
144/// Treat string as single line. That is, change "." to match any
145/// character whatsoever, even a newline, which normally it would not match.
146/// Used together, as /ms, they let the "." match any character whatsoever,
147/// while still allowing "^" and "$" to match, respectively, just after and
148/// just before newlines within the string.
149/// - i
150/// Do case-insensitive pattern matching.
151/// - x
152/// Extend your pattern's legibility by permitting whitespace and comments.
153/// - p
154/// Preserve the string matched such that ${^PREMATCH}, ${^MATCH},
155/// and ${^POSTMATCH} are available for use after matching.
156/// - g and c
157/// Global matching, and keep the Current position after failed matching.
158/// Unlike i, m, s and x, these two flags affect the way the regex is used
159/// rather than the regex itself. See Using regular expressions in Perl in
160/// perlretut for further explanation of the g and c modifiers.
161/// For more detail see: http://perldoc.perl.org/perlre.html#Modifiers.
162
164{
165 UInt_t opts = 0;
166
167 if (modStr.Length() <= 0)
168 return fPCREOpts;
169
170 //translate perl flags into pcre flags
171 const char *m = modStr;
172 while (*m) {
173 switch (*m) {
174 case 'g':
175 opts |= kPCRE_GLOBAL;
176 break;
177 case 'i':
178 opts |= PCRE_CASELESS;
179 break;
180 case 'm':
181 opts |= PCRE_MULTILINE;
182 break;
183 case 'o':
184 opts |= kPCRE_OPTIMIZE;
185 break;
186 case 's':
187 opts |= PCRE_DOTALL;
188 break;
189 case 'x':
190 opts |= PCRE_EXTENDED;
191 break;
192 case 'd': // special flag to enable debug printing (not Perl compat.)
193 opts |= kPCRE_DEBUG_MSGS;
194 break;
195 default:
196 Error("ParseMods", "illegal pattern modifier: %c", *m);
197 opts = 0;
198 }
199 ++m;
200 }
201 return opts;
202}
203
204////////////////////////////////////////////////////////////////////////////////
205/// Return PCRE modifier options as string.
206/// For meaning of mods see ParseMods().
207
209{
210 TString ret;
211
212 if (fPCREOpts & kPCRE_GLOBAL) ret += 'g';
213 if (fPCREOpts & PCRE_CASELESS) ret += 'i';
214 if (fPCREOpts & PCRE_MULTILINE) ret += 'm';
215 if (fPCREOpts & PCRE_DOTALL) ret += 's';
216 if (fPCREOpts & PCRE_EXTENDED) ret += 'x';
217 if (fPCREOpts & kPCRE_OPTIMIZE) ret += 'o';
218 if (fPCREOpts & kPCRE_DEBUG_MSGS) ret += 'd';
219
220 return ret;
221}
222
223////////////////////////////////////////////////////////////////////////////////
224/// Compile the fPattern.
225
227{
228#ifdef USE_PCRE2
229 if (fPriv->fPCRE)
230 pcre2_code_free(fPriv->fPCRE);
231#else
232 if (fPriv->fPCRE)
233 pcre_free(fPriv->fPCRE);
234#endif
235
237 Info("Compile", "PREGEX compiling %s", fPattern.Data());
238
239#ifdef USE_PCRE2
240 int errcode;
241 PCRE2_SIZE patIndex;
242 fPriv->fPCRE = pcre2_compile((PCRE2_SPTR)fPattern.Data(), fPattern.Length(),
244 &errcode, &patIndex, nullptr);
245#else
246 const char *errstr;
247 Int_t patIndex;
248 fPriv->fPCRE = pcre_compile(fPattern.Data(), fPCREOpts & kPCRE_INTMASK,
249 &errstr, &patIndex, nullptr);
250#endif
251
252 if (!fPriv->fPCRE) {
253#ifdef USE_PCRE2
254 PCRE2_UCHAR errstr[256];
255 pcre2_get_error_message(errcode, errstr, 256);
256#endif
258 throw std::runtime_error
259 (TString::Format("TPRegexp::Compile() compilation of TPRegexp(%s) failed at: %d because %s",
260 fPattern.Data(), (int)patIndex, errstr).Data());
261 } else {
262 Error("Compile", "compilation of TPRegexp(%s) failed at: %d because %s",
263 fPattern.Data(), (int)patIndex, errstr);
264 return;
265 }
266 }
267
268#ifndef USE_PCRE2
270 Optimize();
271#endif
272}
273
274////////////////////////////////////////////////////////////////////////////////
275/// Send the pattern through the optimizer.
276
278{
279#ifndef USE_PCRE2
280 if (fPriv->fPCREExtra)
281 pcre_free(fPriv->fPCREExtra);
282
284 Info("Optimize", "PREGEX studying %s", fPattern.Data());
285
286 const char *errstr;
287 // pcre_study allows less options - see pcre_internal.h PUBLIC_STUDY_OPTIONS.
288 fPriv->fPCREExtra = pcre_study(fPriv->fPCRE, 0, &errstr);
289
290 if (!fPriv->fPCREExtra && errstr) {
291 Error("Optimize", "Optimization of TPRegexp(%s) failed: %s",
292 fPattern.Data(), errstr);
293 }
294#endif
295}
296
297////////////////////////////////////////////////////////////////////////////////
298/// Returns the number of expanded '$' constructs.
299
301 const TString &replacePattern,
302 Int_t *offVec, Int_t nrMatch) const
303{
304 Int_t nrSubs = 0;
305 const char *p = replacePattern;
306
307 Int_t state = 0;
308 Int_t subnum = 0;
309 while (state != -1) {
310 switch (state) {
311 case 0:
312 if (!*p) {
313 state = -1;
314 break;
315 }
316 if (*p == '$') {
317 state = 1;
318 subnum = 0;
319 if (p[1] == '&') {
320 p++;
321 if (isdigit(p[1]))
322 p++;
323 } else if (!isdigit(p[1])) {
324 Error("ReplaceSubs", "badly formed replacement pattern: %s",
325 replacePattern.Data());
326 }
327 } else
328 final += *p;
329 break;
330 case 1:
331 if (isdigit(*p)) {
332 subnum *= 10;
333 subnum += (*p)-'0';
334 } else {
336 Info("ReplaceSubs", "PREGEX appending substr #%d", subnum);
337 if (subnum < 0 || subnum > nrMatch-1) {
338 Error("ReplaceSubs","bad string number: %d",subnum);
339 } else {
340 const TString subStr = s(offVec[2*subnum],offVec[2*subnum+1]-offVec[2*subnum]);
341 final += subStr;
342 nrSubs++;
343 }
344 state = 0;
345 continue; // send char to start state
346 }
347 }
348 p++;
349 }
350 return nrSubs;
351}
352
353////////////////////////////////////////////////////////////////////////////////
354/// Perform the actual matching - protected method.
355
357 Int_t nMaxMatch, TArrayI *pos) const
358{
359 Int_t *offVec = new Int_t[3*nMaxMatch];
360
361#ifdef USE_PCRE2
362 pcre2_match_data *match_data;
363 match_data = pcre2_match_data_create_from_pattern(fPriv->fPCRE, nullptr);
364 Int_t nrMatch = pcre2_match(fPriv->fPCRE, (PCRE2_SPTR8)s.Data(),
365 s.Length(), start, 0,
366 match_data, nullptr);
367#else
368 // pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS.
369 Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
370 s.Length(), start, 0,
371 offVec, 3*nMaxMatch);
372#endif
373
374 if (nrMatch == PCRE_ERROR_NOMATCH)
375 nrMatch = 0;
376 else if (nrMatch <= 0) {
377 Error("Match","pcre_exec error = %d", nrMatch);
378#ifdef USE_PCRE2
379 pcre2_match_data_free(match_data);
380#endif
381 delete [] offVec;
382 return 0;
383 }
384
385 if (pos) {
386#ifdef USE_PCRE2
387 PCRE2_SIZE *oVec = pcre2_get_ovector_pointer(match_data);
388 for (int i = 0; i < 2 * nrMatch; ++i)
389 offVec[i] = oVec[i];
390#endif
391 pos->Set(2*nrMatch, offVec);
392 }
393
394#ifdef USE_PCRE2
395 pcre2_match_data_free(match_data);
396#endif
397 delete [] offVec;
398
399 return nrMatch;
400}
401
402////////////////////////////////////////////////////////////////////////////////
403/// The number of matches is returned, this equals the full match +
404/// sub-pattern matches.
405/// nMaxMatch is the maximum allowed number of matches.
406/// pos contains the string indices of the matches. Its usage is
407/// shown in the routine MatchS.
408/// For meaning of mods see ParseMods().
409
410Int_t TPRegexp::Match(const TString &s, const TString &mods, Int_t start,
411 Int_t nMaxMatch, TArrayI *pos)
412{
413 UInt_t opts = ParseMods(mods);
414
415 if (!fPriv->fPCRE || opts != fPCREOpts) {
416 fPCREOpts = opts;
417 Compile();
418 }
419
420 return MatchInternal(s, start, nMaxMatch, pos);
421}
422
423
424////////////////////////////////////////////////////////////////////////////////
425/// Returns a TObjArray of matched substrings as TObjString's.
426/// The TObjArray is owner of the objects and must be deleted by the user.
427/// The first entry is the full matched pattern, followed by the sub-patterns.
428/// If a pattern was not matched, it will return an empty substring:
429/// ~~~ {.cpp}
430/// TObjArray *subStrL = TPRegexp("(a|(z))(bc)").MatchS("abc");
431/// for (Int_t i = 0; i < subStrL->GetLast()+1; i++) {
432/// const TString subStr = ((TObjString *)subStrL->At(i))->GetString();
433/// std::cout << "\"" << subStr << "\" ";
434/// }
435/// std::cout << subStr << std::endl;
436/// ~~~
437/// produces: "abc" "a" "" "bc"
438///
439/// For meaning of mods see ParseMods().
440
442 Int_t start, Int_t nMaxMatch)
443{
444 TArrayI pos;
445 Int_t nrMatch = Match(s, mods, start, nMaxMatch, &pos);
446
447 TObjArray *subStrL = new TObjArray();
448 subStrL->SetOwner();
449
450 for (Int_t i = 0; i < nrMatch; i++) {
451 Int_t startp = pos[2*i];
452 Int_t stopp = pos[2*i+1];
453 if (startp >= 0 && stopp >= 0) {
454 const TString subStr = s(pos[2*i], pos[2*i+1]-pos[2*i]);
455 subStrL->Add(new TObjString(subStr));
456 } else
457 subStrL->Add(new TObjString());
458 }
459
460 return subStrL;
461}
462
463////////////////////////////////////////////////////////////////////////////////
464/// Perform pattern substitution with optional back-ref replacement
465/// - protected method.
466
468 Int_t start, Int_t nMaxMatch,
469 Bool_t doDollarSubst) const
470{
471 Int_t *offVec = new Int_t[3*nMaxMatch];
472
473 TString fin;
474 Int_t nrSubs = 0;
475 Int_t offset = start;
476 Int_t last = 0;
477
478#ifdef USE_PCRE2
479 pcre2_match_data *match_data;
480 match_data = pcre2_match_data_create_from_pattern(fPriv->fPCRE, nullptr);
481#endif
482
483 while (kTRUE) {
484
485 // find next matching subs
486 // pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS.
487#ifdef USE_PCRE2
488 Int_t nrMatch = pcre2_match(fPriv->fPCRE, (PCRE2_SPTR)s.Data(),
489 s.Length(), offset, 0,
490 match_data, nullptr);
491#else
492 Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
493 s.Length(), offset, 0,
494 offVec, 3*nMaxMatch);
495#endif
496
497 if (nrMatch == PCRE_ERROR_NOMATCH) {
498 break;
499 } else if (nrMatch <= 0) {
500 Error("Substitute", "pcre_exec error = %d", nrMatch);
501 break;
502 }
503
504#ifdef USE_PCRE2
505 PCRE2_SIZE *oVec = pcre2_get_ovector_pointer(match_data);
506 for (int i = 0; i < 2 * nrMatch; ++i)
507 offVec[i] = oVec[i];
508#endif
509
510 // append anything previously unmatched, but not substituted
511 if (last <= offVec[0]) {
512 fin += s(last,offVec[0]-last);
513 last = offVec[1];
514 }
515
516 // replace stuff in s
517 if (doDollarSubst) {
518 ReplaceSubs(s, fin, replacePattern, offVec, nrMatch);
519 } else {
520 fin += replacePattern;
521 }
522 ++nrSubs;
523
524 // if global gotta check match at every pos
525 if (!(fPCREOpts & kPCRE_GLOBAL))
526 break;
527
528 if (offVec[0] != offVec[1]) {
529 offset = offVec[1];
530 } else {
531 // matched empty string
532 if (offVec[1] == s.Length()) break;
533 offset = offVec[1]+1;
534 }
535 }
536
537#ifdef USE_PCRE2
538 pcre2_match_data_free(match_data);
539#endif
540 delete [] offVec;
541
542 fin += s(last,s.Length()-last);
543 s = fin;
544
545 return nrSubs;
546}
547
548////////////////////////////////////////////////////////////////////////////////
549/// Substitute replaces the string s by a new string in which matching
550/// patterns are replaced by the replacePattern string. The number of
551/// substitutions are returned.
552/// ~~~ {.cpp}
553/// TString s("aap noot mies");
554/// const Int_t nrSub = TPRegexp("(\\w*) noot (\\w*)").Substitute(s,"$2 noot $1");
555/// std::cout << nrSub << " \"" << s << "\"" <<std::endl;
556/// ~~~
557/// produces: 2 "mies noot aap"
558///
559/// For meaning of mods see ParseMods().
560
561Int_t TPRegexp::Substitute(TString &s, const TString &replacePattern,
562 const TString &mods, Int_t start, Int_t nMaxMatch)
563{
564 UInt_t opts = ParseMods(mods);
565
566 if (!fPriv->fPCRE || opts != fPCREOpts) {
567 fPCREOpts = opts;
568 Compile();
569 }
570
571 return SubstituteInternal(s, replacePattern, start, nMaxMatch, kTRUE);
572}
573
574
575////////////////////////////////////////////////////////////////////////////////
576/// Returns true if underlying PCRE structure has been successfully
577/// generated via regexp compilation.
578
580{
581 return fPriv->fPCRE != nullptr;
582}
583
584////////////////////////////////////////////////////////////////////////////////
585/// Get value of static flag controlling whether exception should be thrown upon an
586/// error during regular expression compilation by the PCRE engine.
587
589{
591}
592
593////////////////////////////////////////////////////////////////////////////////
594/// Set static flag controlling whether exception should be thrown upon an
595/// error during regular expression compilation by the PCRE engine.
596
598{
599 fgThrowAtCompileError = throwp;
600}
601
602////////////////////////////////////////////////////////////////////////////////
603// //
604// TString member functions, put here so the linker will include //
605// them only if regular expressions are used. //
606// //
607////////////////////////////////////////////////////////////////////////////////
608
609////////////////////////////////////////////////////////////////////////////////
610/// Find the first occurrence of the regexp in string and return the position.
611/// Start is the offset at which the search should start.
612
614{
615 TArrayI pos;
616 Int_t nrMatch = r.Match(*this,"",start,10,&pos);
617 if (nrMatch > 0)
618 return pos[0];
619 else
620 return -1;
621}
622
623////////////////////////////////////////////////////////////////////////////////
624/// Find the first occurrence of the regexp in string and return the position.
625/// Extent is length of the matched string and start is the offset at which
626/// the matching should start.
627
629{
630 TArrayI pos;
631 const Int_t nrMatch = r.Match(*this,"",start,10,&pos);
632 if (nrMatch > 0) {
633 *extent = pos[1]-pos[0];
634 return pos[0];
635 } else {
636 *extent = 0;
637 return -1;
638 }
639}
640
641////////////////////////////////////////////////////////////////////////////////
642/// Return the substring found by applying the regexp starting at start.
643
645{
646 Ssiz_t len;
647 Ssiz_t begin = Index(r, &len, start);
648 return TSubString(*this, begin, len);
649}
650
651////////////////////////////////////////////////////////////////////////////////
652/// Return the substring found by applying the regexp.
653
655{
656 return (*this)(r, 0);
657}
658
659
660/** \class TPMERegexp
661
662Wrapper for PCRE library (Perl Compatible Regular Expressions).
663Based on PME - PCRE Made Easy by Zachary Hansen.
664
665Supports main Perl operations using regular expressions (Match,
666Substitute and Split). To retrieve the results one can simply use
667operator[] returning a TString.
668
669See $ROOTSYS/tutorials/regexp_pme.C for examples.
670*/
671
673
674////////////////////////////////////////////////////////////////////////////////
675/// Default constructor. This regexp will match an empty string.
676
678 TPRegexp(),
679 fNMaxMatches(10),
680 fNMatches(0),
681 fAddressOfLastString(nullptr),
682 fLastGlobalPosition(0)
683{
684 Compile();
685}
686
687////////////////////////////////////////////////////////////////////////////////
688/// Constructor.
689///
690/// \param[in] s string to compile into regular expression
691/// \param[in] opts perl-style character flags to be set on TPME object
692/// \param[in] nMatchMax maximum number of matches
693
694TPMERegexp::TPMERegexp(const TString& s, const TString& opts, Int_t nMatchMax) :
695 TPRegexp(s),
696 fNMaxMatches(nMatchMax),
697 fNMatches(0),
698 fAddressOfLastString(nullptr),
699 fLastGlobalPosition(0)
700{
701 fPCREOpts = ParseMods(opts);
702 Compile();
703}
704
705////////////////////////////////////////////////////////////////////////////////
706/// Constructor.
707///
708/// \param[in] s string to compile into regular expression
709/// \param[in] opts PCRE-style option flags to be set on TPME object
710/// \param[in] nMatchMax maximum number of matches
711
712TPMERegexp::TPMERegexp(const TString& s, UInt_t opts, Int_t nMatchMax) :
713 TPRegexp(s),
714 fNMaxMatches(nMatchMax),
715 fNMatches(0),
716 fAddressOfLastString(nullptr),
717 fLastGlobalPosition(0)
718{
719 fPCREOpts = opts;
720 Compile();
721}
722
723////////////////////////////////////////////////////////////////////////////////
724/// Copy constructor.
725/// Only PCRE specifics are copied, not last-match or global-match
726/// information.
727
729 TPRegexp(r),
730 fNMaxMatches(r.fNMaxMatches),
731 fNMatches(0),
732 fAddressOfLastString(nullptr),
733 fLastGlobalPosition(0)
734{
735 Compile();
736}
737
738////////////////////////////////////////////////////////////////////////////////
739/// Reset the pattern and options.
740/// If 'nMatchMax' other than -1 (the default) is passed, it is also set.
741
742void TPMERegexp::Reset(const TString& s, const TString& opts, Int_t nMatchMax)
743{
744 Reset(s, ParseMods(opts), nMatchMax);
745}
746
747////////////////////////////////////////////////////////////////////////////////
748/// Reset the pattern and options.
749/// If 'nMatchMax' other than -1 (the default) is passed, it is also set.
750
751void TPMERegexp::Reset(const TString& s, UInt_t opts, Int_t nMatchMax)
752{
753 fPattern = s;
754 fPCREOpts = opts;
755 Compile();
756
757 if (nMatchMax != -1)
758 fNMatches = nMatchMax;
759 fNMatches = 0;
761}
762
763////////////////////////////////////////////////////////////////////////////////
764/// Copy global-match state from 're; so that this regexp can continue
765/// parsing the string from where 're' left off.
766///
767/// Alternatively, GetGlobalPosition() get be used to retrieve the
768/// last match position so that it can passed to Match().
769///
770/// Ideally, as it is done in PERL, the last match position would be
771/// stored in the TString itself.
772
774{
777}
778
779////////////////////////////////////////////////////////////////////////////////
780/// Reset state of global match.
781/// This happens automatically when a new string is passed for matching.
782/// But be carefull, as the address of last TString object is used
783/// to make this decision.
784
786{
788}
789
790////////////////////////////////////////////////////////////////////////////////
791/// Runs a match on s against the regex 'this' was created with.
792///
793/// \param[in] s string to match against
794/// \param[in] start offset at which to start matching
795/// \return number of matches found
796
798{
799 // If we got a new string, reset the global position counter.
800 if (fAddressOfLastString != (void*) &s) {
802 }
803
804 if (fPCREOpts & kPCRE_GLOBAL) {
805 start += fLastGlobalPosition;
806 }
807
808 //fprintf(stderr, "string: '%s' length: %d offset: %d\n", s.Data(), s.length(), offset);
810
811 //fprintf(stderr, "MatchInternal_exec result = %d\n", fNMatches);
812
814 fAddressOfLastString = (void*) &s;
815
816 if (fPCREOpts & kPCRE_GLOBAL) {
817 if (fNMatches == PCRE_ERROR_NOMATCH) {
818 // fprintf(stderr, "TPME RESETTING: reset for no match\n");
819 fLastGlobalPosition = 0; // reset the position for next match (perl does this)
820 } else if (fNMatches > 0) {
821 // fprintf(stderr, "TPME RESETTING: setting to %d\n", marks[0].second);
822 fLastGlobalPosition = fMarkers[1]; // set to the end of the match
823 } else {
824 // fprintf(stderr, "TPME RESETTING: reset for no unknown\n");
826 }
827 }
828
829 return fNMatches;
830}
831
832////////////////////////////////////////////////////////////////////////////////
833/// Splits into at most maxfields. If maxfields is unspecified or
834/// 0, trailing empty matches are discarded. If maxfields is
835/// positive, no more than maxfields fields will be returned and
836/// trailing empty matches are preserved. If maxfields is empty,
837/// all fields (including trailing empty ones) are returned. This
838/// *should* be the same as the perl behaviour.
839///
840/// If pattern produces sub-matches, these are also stored in
841/// the result.
842///
843/// A pattern matching the null string will split the value of EXPR
844/// into separate characters at each point it matches that way.
845///
846/// \param[in] s string to split
847/// \param[in] maxfields maximum number of fields to be split out. 0 means
848/// split all fields, but discard any trailing empty bits.
849/// Negative means split all fields and keep trailing empty bits.
850/// Positive means keep up to N fields including any empty fields
851/// less than N. Anything remaining is in the last field.
852/// \return number of fields found
853
855{
856 typedef std::pair<int, int> MarkerLoc_t;
857 typedef std::vector<MarkerLoc_t> MarkerLocVec_t;
858
859 // stores the marks for the split
860 MarkerLocVec_t oMarks;
861
862 // this is a list of current trailing empty matches if maxfields is
863 // unspecified or 0. If there is stuff in it and a non-empty match
864 // is found, then everything in here is pushed into oMarks and then
865 // the new match is pushed on. If the end of the string is reached
866 // and there are empty matches in here, they are discarded.
867 MarkerLocVec_t oCurrentTrailingEmpties;
868
869 Int_t nOffset = 0;
870 Int_t nMatchesFound = 0;
871
872 // while we are still finding matches and maxfields is 0 or negative
873 // (meaning we get all matches), or we haven't gotten to the number
874 // of specified matches
875 Int_t matchRes;
876 while ((matchRes = Match(s, nOffset)) &&
877 ((maxfields < 1) || nMatchesFound < maxfields)) {
878 ++nMatchesFound;
879
880 if (fMarkers[1] - fMarkers[0] == 0) {
881 oMarks.push_back(MarkerLoc_t(nOffset, nOffset + 1));
882 ++nOffset;
883 if (nOffset >= s.Length())
884 break;
885 else
886 continue;
887 }
888
889 // match can be empty
890 if (nOffset != fMarkers[0]) {
891 if (!oCurrentTrailingEmpties.empty()) {
892 oMarks.insert(oMarks.end(),
893 oCurrentTrailingEmpties.begin(),
894 oCurrentTrailingEmpties.end());
895 oCurrentTrailingEmpties.clear();
896 }
897 oMarks.push_back(MarkerLoc_t(nOffset, fMarkers[0]));
898 } else {
899 // empty match
900 if (maxfields == 0) {
901 // store for possible later inclusion
902 oCurrentTrailingEmpties.push_back(MarkerLoc_t(nOffset, nOffset));
903 } else {
904 oMarks.push_back(MarkerLoc_t(nOffset, nOffset));
905 }
906 }
907
908 nOffset = fMarkers[1];
909
910 if (matchRes > 1) {
911 for (Int_t i = 1; i < matchRes; ++i)
912 oMarks.push_back(MarkerLoc_t(fMarkers[2*i], fMarkers[2*i + 1]));
913 }
914 }
915
916
917 // if there were no matches found, push the whole thing on
918 if (nMatchesFound == 0) {
919 oMarks.push_back(MarkerLoc_t(0, s.Length()));
920 }
921 // if we ran out of matches, then append the rest of the string
922 // onto the end of the last split field
923 else if (maxfields > 0 && nMatchesFound >= maxfields) {
924 oMarks[oMarks.size() - 1].second = s.Length();
925 }
926 // else we have to add another entry for the end of the string
927 else {
928 Bool_t last_empty = (nOffset == s.Length());
929 if (!last_empty || maxfields < 0) {
930 if (!oCurrentTrailingEmpties.empty()) {
931 oMarks.insert(oMarks.end(),
932 oCurrentTrailingEmpties.begin(),
933 oCurrentTrailingEmpties.end());
934 }
935 oMarks.push_back(MarkerLoc_t(nOffset, s.Length()));
936 }
937 }
938
939 fNMatches = oMarks.size();
941 for (Int_t i = 0; i < fNMatches; ++i) {
942 fMarkers[2*i] = oMarks[i].first;
943 fMarkers[2*i + 1] = oMarks[i].second;
944 }
945
946 // fprintf(stderr, "match returning %d\n", fNMatches);
947 return fNMatches;
948}
949
950////////////////////////////////////////////////////////////////////////////////
951/// Substitute matching part of s with r, dollar back-ref
952/// substitution is performed if doDollarSubst is true (default).
953/// Returns the number of substitutions made.
954///
955/// After the substitution, another pass is made over the resulting
956/// string and the following special tokens are interpreted:
957/// - `\l` lowercase next char,
958/// - `\u` uppercase next char,
959/// - `\L` lowercase till `\E`,
960/// - `\U` uppercase till `\E`, and
961/// - `\E` end case modification.
962
964{
965 Int_t cnt = SubstituteInternal(s, r, 0, fNMaxMatches, doDollarSubst);
966
967 TString ret;
968 Int_t state = 0;
969 Ssiz_t pos = 0, len = s.Length();
970 const Char_t *data = s.Data();
971 while (pos < len) {
972 Char_t c = data[pos];
973 if (c == '\\') {
974 c = data[pos+1]; // Rely on string-data being null-terminated.
975 switch (c) {
976 case 0 : ret += '\\'; break;
977 case 'l': state = 1; break;
978 case 'u': state = 2; break;
979 case 'L': state = 3; break;
980 case 'U': state = 4; break;
981 case 'E': state = 0; break;
982 default : ret += '\\'; ret += c; break;
983 }
984 pos += 2;
985 } else {
986 switch (state) {
987 case 0: ret += c; break;
988 case 1: ret += (Char_t) tolower(c); state = 0; break;
989 case 2: ret += (Char_t) toupper(c); state = 0; break;
990 case 3: ret += (Char_t) tolower(c); break;
991 case 4: ret += (Char_t) toupper(c); break;
992 default: Error("TPMERegexp::Substitute", "invalid state.");
993 }
994 ++pos;
995 }
996 }
997
998 s = ret;
999
1000 return cnt;
1001}
1002
1003////////////////////////////////////////////////////////////////////////////////
1004/// Returns the sub-string from the internal fMarkers vector.
1005/// Requires having run match or split first.
1006
1008{
1009 if (index >= fNMatches)
1010 return "";
1011
1012 Int_t begin = fMarkers[2*index];
1013 Int_t end = fMarkers[2*index + 1];
1014 return fLastStringMatched(begin, end-begin);
1015}
1016
1017////////////////////////////////////////////////////////////////////////////////
1018/// Print the regular expression and modifier options.
1019/// If 'option' contains "all", prints also last string match and
1020/// match results.
1021
1023{
1024 TString opt = option;
1025 opt.ToLower();
1026
1027 Printf("Regexp='%s', Opts='%s'", fPattern.Data(), GetModifiers().Data());
1028 if (opt.Contains("all")) {
1029 Printf(" last string='%s'", fLastStringMatched.Data());
1030 Printf(" number of matches = %d", fNMatches);
1031 for (Int_t i=0; i<fNMatches; ++i)
1032 Printf(" %d - %s", i, operator[](i).Data());
1033 }
1034}
1035
1036
1037/** \class TStringToken
1038Provides iteration through tokens of a given string.
1039
1040 - fFullStr stores the string to be split. It is never modified.
1041 - fSplitRe is the perl-re that is used to separate the tokens.
1042 - fReturnVoid if true, empty strings will be returned.
1043
1044Current token is stored in the TString base-class.
1045During construction no match is done, use NextToken() to get the first
1046and all subsequent tokens.
1047*/
1048
1050
1051////////////////////////////////////////////////////////////////////////////////
1052/// Constructor.
1053
1054TStringToken::TStringToken(const TString& fullStr, const TString& splitRe, Bool_t retVoid) :
1055 fFullStr (fullStr),
1056 fSplitRe (splitRe),
1057 fReturnVoid (retVoid),
1058 fPos (0)
1059{
1060}
1061
1062////////////////////////////////////////////////////////////////////////////////
1063/// Get the next token, it is stored in this TString.
1064/// Returns true if new token is available, false otherwise.
1065
1067{
1068 TArrayI x;
1069 while (fPos < fFullStr.Length()) {
1070 if (fSplitRe.Match(fFullStr, "", fPos, 2, &x)) {
1072 fPos = x[1];
1073 } else {
1075 fPos = fFullStr.Length() + 1;
1076 }
1077 if (Length() || fReturnVoid)
1078 return kTRUE;
1079 }
1080
1081 // Special case: void-strings are requested and the full-string
1082 // ends with the separator. Thus we return another empty string.
1083 if (fPos == fFullStr.Length() && fReturnVoid) {
1085 fPos = fFullStr.Length() + 1;
1086 return kTRUE;
1087 }
1088
1089 return kFALSE;
1090}
#define c(i)
Definition RSha256.hxx:101
bool Bool_t
Definition RtypesCore.h:63
char Char_t
Definition RtypesCore.h:37
constexpr Bool_t kFALSE
Definition RtypesCore.h:101
constexpr Bool_t kTRUE
Definition RtypesCore.h:100
const char Option_t
Definition RtypesCore.h:66
#define ClassImp(name)
Definition Rtypes.h:377
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
Definition TError.cxx:218
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition TError.cxx:185
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t option
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
void Printf(const char *fmt,...)
Formats a string in a circular formatting buffer and prints the string.
Definition TString.cxx:2503
Array of integers (32 bits per element).
Definition TArrayI.h:27
void Set(Int_t n) override
Set size of this array to n ints.
Definition TArrayI.cxx:105
virtual void SetOwner(Bool_t enable=kTRUE)
Set whether this collection is the owner (enable==true) of its content.
An array of TObjects.
Definition TObjArray.h:31
void Add(TObject *obj) override
Definition TObjArray.h:68
Collectable string class.
Definition TObjString.h:28
Wrapper for PCRE library (Perl Compatible Regular Expressions).
Definition TPRegexp.h:97
Int_t fLastGlobalPosition
Definition TPRegexp.h:110
void ResetGlobalState()
Reset state of global match.
Definition TPRegexp.cxx:785
Int_t fNMatches
Definition TPRegexp.h:104
void * fAddressOfLastString
Definition TPRegexp.h:108
virtual void Print(Option_t *option="")
Print the regular expression and modifier options.
Int_t Split(const TString &s, Int_t maxfields=0)
Splits into at most maxfields.
Definition TPRegexp.cxx:854
TPMERegexp()
Default constructor. This regexp will match an empty string.
Definition TPRegexp.cxx:677
Int_t Substitute(TString &s, const TString &r, Bool_t doDollarSubst=kTRUE)
Substitute matching part of s with r, dollar back-ref substitution is performed if doDollarSubst is t...
Definition TPRegexp.cxx:963
TString operator[](Int_t)
Returns the sub-string from the internal fMarkers vector.
Int_t Match(const TString &s, UInt_t start=0)
Runs a match on s against the regex 'this' was created with.
Definition TPRegexp.cxx:797
void Reset(const TString &s, const TString &opts="", Int_t nMatchMax=-1)
Reset the pattern and options.
Definition TPRegexp.cxx:742
Int_t fNMaxMatches
Definition TPRegexp.h:103
TArrayI fMarkers
Definition TPRegexp.h:105
TString fLastStringMatched
Definition TPRegexp.h:107
void AssignGlobalState(const TPMERegexp &re)
Copy global-match state from 're; so that this regexp can continue parsing the string from where 're'...
Definition TPRegexp.cxx:773
TPRegexp()
Default ctor.
Definition TPRegexp.cxx:68
void Compile()
Compile the fPattern.
Definition TPRegexp.cxx:226
Int_t SubstituteInternal(TString &s, const TString &replace, Int_t start, Int_t nMaxMatch0, Bool_t doDollarSubst) const
Perform pattern substitution with optional back-ref replacement.
Definition TPRegexp.cxx:467
Bool_t IsValid() const
Returns true if underlying PCRE structure has been successfully generated via regexp compilation.
Definition TPRegexp.cxx:579
TString fPattern
Definition TPRegexp.h:46
TPRegexp & operator=(const TPRegexp &p)
Assignment operator.
Definition TPRegexp.cxx:114
UInt_t ParseMods(const TString &mods) const
Translate Perl modifier flags into pcre flags.
Definition TPRegexp.cxx:163
UInt_t fPCREOpts
Definition TPRegexp.h:48
PCREPriv_t * fPriv
Definition TPRegexp.h:47
Int_t Match(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10, TArrayI *pos=nullptr)
The number of matches is returned, this equals the full match + sub-pattern matches.
Definition TPRegexp.cxx:410
TObjArray * MatchS(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
Returns a TObjArray of matched substrings as TObjString's.
Definition TPRegexp.cxx:441
static Bool_t fgThrowAtCompileError
Definition TPRegexp.h:50
Int_t ReplaceSubs(const TString &s, TString &final, const TString &replacePattern, Int_t *ovec, Int_t nmatch) const
Returns the number of expanded '$' constructs.
Definition TPRegexp.cxx:300
virtual ~TPRegexp()
Cleanup.
Definition TPRegexp.cxx:97
Int_t Substitute(TString &s, const TString &replace, const TString &mods="", Int_t start=0, Int_t nMatchMax=10)
Substitute replaces the string s by a new string in which matching patterns are replaced by the repla...
Definition TPRegexp.cxx:561
Int_t MatchInternal(const TString &s, Int_t start, Int_t nMaxMatch, TArrayI *pos=nullptr) const
Perform the actual matching - protected method.
Definition TPRegexp.cxx:356
TString GetModifiers() const
Return PCRE modifier options as string.
Definition TPRegexp.cxx:208
static Bool_t GetThrowAtCompileError()
Get value of static flag controlling whether exception should be thrown upon an error during regular ...
Definition TPRegexp.cxx:588
void Optimize()
Send the pattern through the optimizer.
Definition TPRegexp.cxx:277
@ kPCRE_GLOBAL
Definition TPRegexp.h:40
@ kPCRE_OPTIMIZE
Definition TPRegexp.h:41
@ kPCRE_DEBUG_MSGS
Definition TPRegexp.h:42
@ kPCRE_INTMASK
Definition TPRegexp.h:43
static void SetThrowAtCompileError(Bool_t throwp)
Set static flag controlling whether exception should be thrown upon an error during regular expressio...
Definition TPRegexp.cxx:597
Provides iteration through tokens of a given string.
Definition TPRegexp.h:143
TStringToken(const TString &fullStr, const TString &splitRe, Bool_t retVoid=kFALSE)
Constructor.
TPRegexp fSplitRe
Definition TPRegexp.h:147
const TString fFullStr
Definition TPRegexp.h:146
Bool_t NextToken()
Get the next token, it is stored in this TString.
Bool_t fReturnVoid
Definition TPRegexp.h:148
Basic string class.
Definition TString.h:139
Ssiz_t Length() const
Definition TString.h:417
friend class TSubString
Definition TString.h:142
char & operator()(Ssiz_t i)
Definition TString.h:724
void ToLower()
Change string to lower-case.
Definition TString.cxx:1182
const char * Data() const
Definition TString.h:376
TString & operator=(char s)
Assign character c to TString.
Definition TString.cxx:301
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2378
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition TString.h:632
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:651
A zero length substring is legal.
Definition TString.h:85
Double_t x[n]
Definition legend1.C:17
pcre_extra * fPCREExtra
Definition TPRegexp.cxx:55
pcre * fPCRE
Definition TPRegexp.cxx:54
TMarker m
Definition textangle.C:8