Logo ROOT  
Reference Guide
TRegexp.cxx
Go to the documentation of this file.
1// @(#)root/base:$Id$
2// Author: Fons Rademakers 04/08/95
3
4/*************************************************************************
5 * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/** \class TRegexp
13\ingroup Base
14
15Regular expression class.
16
17~~~ {.cpp}
18 '^' // start-of-line anchor
19 '$' // end-of-line anchor
20 '.' // matches any character
21 '[' // start a character class
22 ']' // end a character class
23 '^' // negates character class if 1st character
24 '*' // Kleene closure (matches 0 or more)
25 '+' // Positive closure (1 or more)
26 '?' // Optional closure (0 or 1)
27~~~
28Note that the '|' operator (union) is not supported, nor are
29parentheses (grouping). Therefore "a|b" does not match "a".
30
31Standard classes like [:alnum:], [:alpha:], etc. are not supported,
32only [a-zA-Z], [^ntf] and so on.
33*/
34
35#include "TRegexp.h"
36#include "TString.h"
37#include "TError.h"
38#include "ThreadLocalStorage.h"
39
40const unsigned TRegexp::fgMaxpat = 2048;
41
42
44
45////////////////////////////////////////////////////////////////////////////////
46/// Create a regular expression from the input string. If wildcard is
47/// true then the input string will first be interpreted as a wildcard
48/// expression by MakeWildcard(), and the result then interpreted as a
49/// regular expression.
50
51TRegexp::TRegexp(const char *re, Bool_t wildcard)
52{
53 if (wildcard)
55 else
56 GenPattern(re);
57}
58
59////////////////////////////////////////////////////////////////////////////////
60/// Create a regular expression from a TString.
61
63{
64 GenPattern(re.Data());
65}
66
67////////////////////////////////////////////////////////////////////////////////
68/// Copy ctor.
69
71{
73}
74
75////////////////////////////////////////////////////////////////////////////////
76/// Destructor.
77
79{
80 delete [] fPattern;
81}
82
83////////////////////////////////////////////////////////////////////////////////
84/// Assignment operator.
85
87{
88 if (this != &r) {
89 delete [] fPattern;
91 }
92 return *this;
93}
94
95////////////////////////////////////////////////////////////////////////////////
96/// Assignment operator taking a char* and assigning it to a regexp.
97
98TRegexp& TRegexp::operator=(const char *str)
99{
100 delete [] fPattern;
101 GenPattern(str);
102 return *this;
103}
104
105////////////////////////////////////////////////////////////////////////////////
106/// Assignment operator taking a TString.
107
109{
110 delete [] fPattern;
111 GenPattern(str.Data());
112 return *this;
113}
114
115////////////////////////////////////////////////////////////////////////////////
116/// Generate the regular expression pattern.
117
118void TRegexp::GenPattern(const char *str)
119{
121 int error = ::Makepat(str, fPattern, fgMaxpat);
122 fStat = (error < 3) ? (EStatVal) error : kToolong;
123}
124
125////////////////////////////////////////////////////////////////////////////////
126/// Copy the regular expression pattern.
127
129{
131 memcpy(fPattern, r.fPattern, fgMaxpat * sizeof(Pattern_t));
132 fStat = r.fStat;
133}
134
135////////////////////////////////////////////////////////////////////////////////
136/// This routine transforms a wildcarding regular expression into
137/// a general regular expression used for pattern matching.
138/// When using wildcards the regular expression is assumed to be
139/// preceded by a "^" (BOL) and terminated by a "$" (EOL). Also, all
140/// "*"'s and "?"'s (closures) are assumed to be preceded by a "." (i.e. any
141/// character, except "/"'s) and all .'s are escaped (so *.ps is different
142/// from *.eps). The special treatment of "/" allows the easy matching of
143/// pathnames, e.g. "*.root" will match "aap.root", but not "pipo/aap.root".
144
145const char *TRegexp::MakeWildcard(const char *re)
146{
147 TTHREAD_TLS_ARRAY(char,fgMaxpat,buf);
148 char *s = buf;
149 if (!re) return "";
150 int len = strlen(re);
151 int slen = 0;
152
153 if (!len) return "";
154
155 for (int i = 0; i < len; i++) {
156 if ((unsigned)slen > fgMaxpat - 10) {
157 Error("MakeWildcard", "regexp too large");
158 break;
159 }
160 if (i == 0 && re[i] != '^') {
161 *s++ = '^';
162 slen++;
163 }
164 if (re[i] == '*') {
165#ifndef R__WIN32
166 //const char *wc = "[a-zA-Z0-9-+_\\.,: []<>]";
167 const char *wc = "[^/]";
168#else
169 //const char *wc = "[a-zA-Z0-9-+_., []<>]";
170 const char *wc = "[^\\/:]";
171#endif
172 strcpy(s, wc);
173 s += strlen(wc);
174 slen += strlen(wc);
175 }
176 if (re[i] == '.') {
177 *s++ = '\\';
178 slen++;
179 }
180 if (re[i] == '?') {
181#ifndef R__WIN32
182 //const char *wc = "[a-zA-Z0-9-+_\\.,: []<>]";
183 const char *wc = "[^/]";
184#else
185 //const char *wc = "[a-zA-Z0-9-+_., []<>]";
186 const char *wc = "[^\\/:]";
187#endif
188 strcpy(s, wc);
189 s += strlen(wc);
190 slen += strlen(wc);
191 } else {
192 *s++ = re[i];
193 slen++;
194 }
195 if (i == len-1 && re[i] != '$') {
196 *s++ = '$';
197 slen++;
198 }
199 }
200 *s = '\0';
201 return buf;
202}
203
204////////////////////////////////////////////////////////////////////////////////
205/// Find the first occurrence of the regexp in string and return the
206/// position, or -1 if there is no match. Len is length of the matched
207/// string and i is the offset at which the matching should start.
208
209Ssiz_t TRegexp::Index(const TString& string, Ssiz_t* len, Ssiz_t i) const
210{
211 if (fStat != kOK)
212 Error("TRegexp::Index", "Bad Regular Expression");
213
214 const char* startp;
215 const char* s = string.Data();
216 Ssiz_t slen = string.Length();
217 if (slen < i) return kNPOS;
218 const char* endp = ::Matchs(s+i, slen-i, fPattern, &startp);
219 if (endp) {
220 *len = endp - startp;
221 return startp - s;
222 } else {
223 *len = 0;
224 return kNPOS;
225 }
226}
227
228////////////////////////////////////////////////////////////////////////////////
229/// Check status of regexp.
230
232{
233 EStatVal temp = fStat;
234 fStat = kOK;
235 return temp;
236}
237
238////////////////////////////////////////////////////////////////////////////////
239// //
240// TString member functions, put here so the linker will include //
241// them only if regular expressions are used. //
242// //
243////////////////////////////////////////////////////////////////////////////////
244
245////////////////////////////////////////////////////////////////////////////////
246/// Find the first occurrence of the regexp in string and return the
247/// position, or -1 if there is no match. Start is the offset at which
248/// the search should start.
249
251{
252 Ssiz_t len;
253 return r.Index(*this, &len, start); // len not used
254}
255
256////////////////////////////////////////////////////////////////////////////////
257/// Find the first occurrence of the regexp in string and return the
258/// position, or -1 if there is no match. Extent is length of the matched
259/// string and start is the offset at which the matching should start.
260
261Ssiz_t TString::Index(const TRegexp& r, Ssiz_t* extent, Ssiz_t start) const
262{
263 return r.Index(*this, extent, start);
264}
265
266////////////////////////////////////////////////////////////////////////////////
267/// Return the substring found by applying the regexp starting at start.
268
270{
271 Ssiz_t len;
272 Ssiz_t begin = Index(r, &len, start);
273 return TSubString(*this, begin, len);
274}
275
276////////////////////////////////////////////////////////////////////////////////
277/// Return the substring found by applying the regexp.
278
280{
281 return (*this)(r,0);
282}
283
284////////////////////////////////////////////////////////////////////////////////
285/// Search for tokens delimited by regular expression 'delim' (default " ")
286/// in this string; search starts at 'from' and the token is returned in 'tok'.
287/// Returns in 'from' the next position after the delimiter.
288/// Returns kTRUE if a token is found, kFALSE if not or if some inconsistency
289/// occurred.
290/// This method allows to loop over tokens in this way:
291/// ~~~ {.cpp}
292/// TString myl = "tok1 tok2|tok3";
293/// TString tok;
294/// Ssiz_t from = 0;
295/// while (myl.Tokenize(tok, from, "[ |]")) {
296/// // Analyse tok
297/// ...
298/// }
299/// ~~~
300/// more convenient of the other Tokenize method when saving the tokens is not
301/// needed.
302
303Bool_t TString::Tokenize(TString &tok, Ssiz_t &from, const char *delim) const
304{
305 Bool_t found = kFALSE;
306
307 // Reset the token
308 tok = "";
309
310 // Make sure inputs make sense
311 Int_t len = Length();
312 if (len <= 0 || from > (len - 1) || from < 0)
313 return found;
314
315 // Ensure backward compatibility to allow one or more times the delimiting character
316 TString rdelim(delim);
317 if(rdelim.Length() == 1) {
318 rdelim = "[" + rdelim + "]+";
319 }
320 TRegexp rg(rdelim);
321
322 // Find delimiter
323 Int_t ext = 0;
324 Int_t pos = Index(rg, &ext, from);
325
326 // Assign to token
327 if (pos == kNPOS || pos > from) {
328 Ssiz_t last = (pos != kNPOS) ? (pos - 1) : len;
329 tok = (*this)(from, last-from+1);
330 }
331 found = kTRUE;
332
333 // Update start-of-search index
334 from = pos + ext;
335 if (pos == kNPOS) {
336 from = pos;
337 if (tok.IsNull()) {
338 // Empty, last token
339 found = kFALSE;
340 }
341 }
342 // Make sure that 'from' has a meaningful value
343 from = (from < len) ? from : len;
344
345 // Done
346 return found;
347}
unsigned short Pattern_t
Definition: Match.h:26
int Makepat(const char *, Pattern_t *, int)
Make a pattern template from the string pointed to by exp.
Definition: Match.cxx:129
const char * Matchs(const char *, size_t len, const Pattern_t *, const char **)
Match a string with a pattern.
Definition: Match.cxx:220
ROOT::R::TRInterface & r
Definition: Object.C:4
const Ssiz_t kNPOS
Definition: RtypesCore.h:113
const Bool_t kFALSE
Definition: RtypesCore.h:90
const Bool_t kTRUE
Definition: RtypesCore.h:89
#define ClassImp(name)
Definition: Rtypes.h:361
void Error(const char *location, const char *msgfmt,...)
Regular expression class.
Definition: TRegexp.h:31
Pattern_t * fPattern
Definition: TRegexp.h:37
EStatVal Status()
Check status of regexp.
Definition: TRegexp.cxx:231
EStatVal fStat
Definition: TRegexp.h:38
void CopyPattern(const TRegexp &re)
Copy the regular expression pattern.
Definition: TRegexp.cxx:128
static const unsigned fgMaxpat
Definition: TRegexp.h:39
virtual ~TRegexp()
Destructor.
Definition: TRegexp.cxx:78
TRegexp & operator=(const TRegexp &re)
Assignment operator.
Definition: TRegexp.cxx:86
const char * MakeWildcard(const char *re)
This routine transforms a wildcarding regular expression into a general regular expression used for p...
Definition: TRegexp.cxx:145
Ssiz_t Index(const TString &str, Ssiz_t *len, Ssiz_t start=0) const
Find the first occurrence of the regexp in string and return the position, or -1 if there is no match...
Definition: TRegexp.cxx:209
EStatVal
Definition: TRegexp.h:34
@ kToolong
Definition: TRegexp.h:34
@ kOK
Definition: TRegexp.h:34
TRegexp(const char *re, Bool_t wildcard=kFALSE)
Create a regular expression from the input string.
Definition: TRegexp.cxx:51
void GenPattern(const char *re)
Generate the regular expression pattern.
Definition: TRegexp.cxx:118
Basic string class.
Definition: TString.h:131
Ssiz_t Length() const
Definition: TString.h:405
friend class TSubString
Definition: TString.h:134
char & operator()(Ssiz_t i)
Definition: TString.h:709
static const Ssiz_t kNPOS
Definition: TString.h:264
const char * Data() const
Definition: TString.h:364
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2197
Bool_t IsNull() const
Definition: TString.h:402
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:634
A zero length substring is legal.
Definition: TString.h:77
static constexpr double s