ROOT  6.06/09
Reference Guide
TRegexp.cxx
Go to the documentation of this file.
1 // @(#)root/base:$Id$
2 // Author: Fons Rademakers 04/08/95
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /** \class TRegexp
13 
14 Regular expression class.
15 
16 ~~~ {.cpp}
17  '^' // start-of-line anchor
18  '$' // end-of-line anchor
19  '.' // matches any character
20  '[' // start a character class
21  ']' // end a character class
22  '^' // negates character class if 1st character
23  '*' // Kleene closure (matches 0 or more)
24  '+' // Positive closure (1 or more)
25  '?' // Optional closure (0 or 1)
26 ~~~
27 Note that the '|' operator (union) is not supported, nor are
28 parentheses (grouping). Therefore "a|b" does not match "a".
29 
30 Standard classes like [:alnum:], [:alpha:], etc. are not supported,
31 only [a-zA-Z], [^ntf] and so on.
32 */
33 
34 #include "TRegexp.h"
35 #include "TString.h"
36 #include "TError.h"
37 #include "ThreadLocalStorage.h"
38 
39 const unsigned TRegexp::fgMaxpat = 2048;
40 
41 
43 
44 ////////////////////////////////////////////////////////////////////////////////
45 /// Create a regular expression from the input string. If wildcard is
46 /// true then the input string will first be interpreted as a wildcard
47 /// expression by MakeWildcard(), and the result then interpreted as a
48 /// regular expression.
49 
50 TRegexp::TRegexp(const char *re, Bool_t wildcard)
51 {
52  if (wildcard)
53  GenPattern(MakeWildcard(re));
54  else
55  GenPattern(re);
56 }
57 
58 ////////////////////////////////////////////////////////////////////////////////
59 /// Create a regular expression from a TString.
60 
62 {
63  GenPattern(re.Data());
64 }
65 
66 ////////////////////////////////////////////////////////////////////////////////
67 /// Copy ctor.
68 
70 {
71  CopyPattern(r);
72 }
73 
74 ////////////////////////////////////////////////////////////////////////////////
75 /// Destructor.
76 
78 {
79  delete [] fPattern;
80 }
81 
82 ////////////////////////////////////////////////////////////////////////////////
83 /// Assignment operator.
84 
86 {
87  if (this != &r) {
88  delete [] fPattern;
89  CopyPattern(r);
90  }
91  return *this;
92 }
93 
94 ////////////////////////////////////////////////////////////////////////////////
95 /// Assignment operator taking a char* and assigning it to a regexp.
96 
97 TRegexp& TRegexp::operator=(const char *str)
98 {
99  delete [] fPattern;
100  GenPattern(str);
101  return *this;
102 }
103 
104 ////////////////////////////////////////////////////////////////////////////////
105 /// Assignment operator taking a TString.
106 
108 {
109  delete [] fPattern;
110  GenPattern(str.Data());
111  return *this;
112 }
113 
114 ////////////////////////////////////////////////////////////////////////////////
115 /// Generate the regular expression pattern.
116 
117 void TRegexp::GenPattern(const char *str)
118 {
119  fPattern = new Pattern_t[fgMaxpat];
120  int error = ::Makepat(str, fPattern, fgMaxpat);
121  fStat = (error < 3) ? (EStatVal) error : kToolong;
122 }
123 
124 ////////////////////////////////////////////////////////////////////////////////
125 /// Copy the regular expression pattern.
126 
128 {
129  fPattern = new Pattern_t[fgMaxpat];
130  memcpy(fPattern, r.fPattern, fgMaxpat * sizeof(Pattern_t));
131  fStat = r.fStat;
132 }
133 
134 ////////////////////////////////////////////////////////////////////////////////
135 /// This routine transforms a wildcarding regular expression into
136 /// a general regular expression used for pattern matching.
137 /// When using wildcards the regular expression is assumed to be
138 /// preceded by a "^" (BOL) and terminated by a "$" (EOL). Also, all
139 /// "*"'s and "?"'s (closures) are assumed to be preceded by a "." (i.e. any
140 /// character, except "/"'s) and all .'s are escaped (so *.ps is different
141 /// from *.eps). The special treatment of "/" allows the easy matching of
142 /// pathnames, e.g. "*.root" will match "aap.root", but not "pipo/aap.root".
143 
144 const char *TRegexp::MakeWildcard(const char *re)
145 {
146  TTHREAD_TLS_ARRAY(char,fgMaxpat,buf);
147  char *s = buf;
148  if (!re) return "";
149  int len = strlen(re);
150  int slen = 0;
151 
152  if (!len) return "";
153 
154  for (int i = 0; i < len; i++) {
155  if ((unsigned)slen > fgMaxpat - 10) {
156  Error("MakeWildcard", "regexp too large");
157  break;
158  }
159  if (i == 0 && re[i] != '^') {
160  *s++ = '^';
161  slen++;
162  }
163  if (re[i] == '*') {
164 #ifndef R__WIN32
165  //const char *wc = "[a-zA-Z0-9-+_\\.,: []<>]";
166  const char *wc = "[^/]";
167 #else
168  //const char *wc = "[a-zA-Z0-9-+_., []<>]";
169  const char *wc = "[^\\/:]";
170 #endif
171  strcpy(s, wc);
172  s += strlen(wc);
173  slen += strlen(wc);
174  }
175  if (re[i] == '.') {
176  *s++ = '\\';
177  slen++;
178  }
179  if (re[i] == '?') {
180 #ifndef R__WIN32
181  //const char *wc = "[a-zA-Z0-9-+_\\.,: []<>]";
182  const char *wc = "[^/]";
183 #else
184  //const char *wc = "[a-zA-Z0-9-+_., []<>]";
185  const char *wc = "[^\\/:]";
186 #endif
187  strcpy(s, wc);
188  s += strlen(wc);
189  slen += strlen(wc);
190  } else {
191  *s++ = re[i];
192  slen++;
193  }
194  if (i == len-1 && re[i] != '$') {
195  *s++ = '$';
196  slen++;
197  }
198  }
199  *s = '\0';
200  return buf;
201 }
202 
203 ////////////////////////////////////////////////////////////////////////////////
204 /// Find the first occurrence of the regexp in string and return the
205 /// position, or -1 if there is no match. Len is length of the matched
206 /// string and i is the offset at which the matching should start.
207 
208 Ssiz_t TRegexp::Index(const TString& string, Ssiz_t* len, Ssiz_t i) const
209 {
210  if (fStat != kOK)
211  Error("TRegexp::Index", "Bad Regular Expression");
212 
213  const char* startp;
214  const char* s = string.Data();
215  Ssiz_t slen = string.Length();
216  if (slen < i) return kNPOS;
217  const char* endp = ::Matchs(s+i, slen-i, fPattern, &startp);
218  if (endp) {
219  *len = endp - startp;
220  return startp - s;
221  } else {
222  *len = 0;
223  return kNPOS;
224  }
225 }
226 
227 ////////////////////////////////////////////////////////////////////////////////
228 /// Check status of regexp.
229 
231 {
232  EStatVal temp = fStat;
233  fStat = kOK;
234  return temp;
235 }
236 
237 ////////////////////////////////////////////////////////////////////////////////
238 // //
239 // TString member functions, put here so the linker will include //
240 // them only if regular expressions are used. //
241 // //
242 ////////////////////////////////////////////////////////////////////////////////
243 
244 ////////////////////////////////////////////////////////////////////////////////
245 /// Find the first occurrence of the regexp in string and return the
246 /// position, or -1 if there is no match. Start is the offset at which
247 /// the search should start.
248 
249 Ssiz_t TString::Index(const TRegexp& r, Ssiz_t start) const
250 {
251  Ssiz_t len;
252  return r.Index(*this, &len, start); // len not used
253 }
254 
255 ////////////////////////////////////////////////////////////////////////////////
256 /// Find the first occurrence of the regexp in string and return the
257 /// position, or -1 if there is no match. Extent is length of the matched
258 /// string and start is the offset at which the matching should start.
259 
260 Ssiz_t TString::Index(const TRegexp& r, Ssiz_t* extent, Ssiz_t start) const
261 {
262  return r.Index(*this, extent, start);
263 }
264 
265 ////////////////////////////////////////////////////////////////////////////////
266 /// Return the substring found by applying the regexp starting at start.
267 
269 {
270  Ssiz_t len;
271  Ssiz_t begin = Index(r, &len, start);
272  return TSubString(*this, begin, len);
273 }
274 
275 ////////////////////////////////////////////////////////////////////////////////
276 /// Return the substring found by applying the regexp.
277 
279 {
280  return (*this)(r,0);
281 }
282 
283 ////////////////////////////////////////////////////////////////////////////////
284 /// Search for tokens delimited by regular expression 'delim' (default " ")
285 /// in this string; search starts at 'from' and the token is returned in 'tok'.
286 /// Returns in 'from' the next position after the delimiter.
287 /// Returns kTRUE if a token is found, kFALSE if not or if some inconsistency
288 /// occurred.
289 /// This method allows to loop over tokens in this way:
290 /// ~~~ {.cpp}
291 /// TString myl = "tok1 tok2|tok3";
292 /// TString tok;
293 /// Ssiz_t from = 0;
294 /// while (myl.Tokenize(tok, from, "[ |]")) {
295 /// // Analyse tok
296 /// ...
297 /// }
298 /// ~~~
299 /// more convenient of the other Tokenize method when saving the tokens is not
300 /// needed.
301 
302 Bool_t TString::Tokenize(TString &tok, Ssiz_t &from, const char *delim) const
303 {
304  Bool_t found = kFALSE;
305 
306  // Reset the token
307  tok = "";
308 
309  // Make sure inputs make sense
310  Int_t len = Length();
311  if (len <= 0 || from > (len - 1) || from < 0)
312  return found;
313 
314  TRegexp rg(delim);
315 
316  // Find delimiter
317  Int_t ext = 0;
318  Int_t pos = Index(rg, &ext, from);
319 
320  // Assign to token
321  if (pos == kNPOS || pos > from) {
322  Ssiz_t last = (pos != kNPOS) ? (pos - 1) : len;
323  tok = (*this)(from, last-from+1);
324  }
325  found = kTRUE;
326 
327  // Update start-of-search index
328  from = pos + ext;
329  if (pos == kNPOS) {
330  from = pos;
331  if (tok.IsNull()) {
332  // Empty, last token
333  found = kFALSE;
334  }
335  }
336  // Make sure that 'from' has a meaningful value
337  from = (from < len) ? from : len;
338 
339  // Done
340  return found;
341 }
A zero length substring is legal.
Definition: TString.h:83
TRegexp(const char *re, Bool_t wildcard=kFALSE)
Pattern_t * fPattern
Definition: TRegexp.h:41
EStatVal
Definition: TRegexp.h:38
TRegexp & operator=(const TRegexp &re)
Assignment operator.
Definition: TRegexp.cxx:85
Ssiz_t Length() const
Definition: TString.h:390
Ssiz_t Index(const TString &str, Ssiz_t *len, Ssiz_t start=0) const
Find the first occurrence of the regexp in string and return the position, or -1 if there is no match...
Definition: TRegexp.cxx:208
Regular expression class.
Definition: TRegexp.h:35
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
void GenPattern(const char *re)
Generate the regular expression pattern.
Definition: TRegexp.cxx:117
const char * Data() const
Definition: TString.h:349
int Makepat(const char *, Pattern_t *, int)
Make a pattern template from the string pointed to by exp.
Definition: Match.cxx:129
void Error(const char *location, const char *msgfmt,...)
char & operator()(Ssiz_t i)
Definition: TString.h:657
unsigned short Pattern_t
Definition: Match.h:26
EStatVal fStat
Definition: TRegexp.h:42
ROOT::R::TRInterface & r
Definition: Object.C:4
const char * MakeWildcard(const char *re)
This routine transforms a wildcarding regular expression into a general regular expression used for p...
Definition: TRegexp.cxx:144
Bool_t IsNull() const
Definition: TString.h:387
const char * Matchs(const char *, size_t len, const Pattern_t *, const char **)
Match a string with a pattern.
Definition: Match.cxx:220
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2240
static const Ssiz_t kNPOS
Definition: TString.h:258
int Ssiz_t
Definition: RtypesCore.h:63
void CopyPattern(const TRegexp &re)
Copy the regular expression pattern.
Definition: TRegexp.cxx:127
virtual ~TRegexp()
Destructor.
Definition: TRegexp.cxx:77
ClassImp(TRegexp) TRegexp
Create a regular expression from the input string.
Definition: TRegexp.cxx:42
const Ssiz_t kNPOS
Definition: Rtypes.h:115
EStatVal Status()
Check status of regexp.
Definition: TRegexp.cxx:230
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:582
const Bool_t kTRUE
Definition: Rtypes.h:91
friend class TSubString
Definition: TString.h:140
static const unsigned fgMaxpat
Definition: TRegexp.h:43