Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TRegexp.cxx
Go to the documentation of this file.
1// @(#)root/base:$Id$
2// Author: Fons Rademakers 04/08/95
3
4/*************************************************************************
5 * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/** \class TRegexp
13\ingroup Base
14
15Regular expression class.
16
17~~~ {.cpp}
18 '^' // start-of-line anchor
19 '$' // end-of-line anchor
20 '.' // matches any character
21 '[' // start a character class
22 ']' // end a character class
23 '^' // negates character class if 1st character
24 '*' // Kleene closure (matches 0 or more)
25 '+' // Positive closure (1 or more)
26 '?' // Optional closure (0 or 1)
27~~~
28Note that the '|' operator (union) is not supported, nor are
29parentheses (grouping). Therefore "a|b" does not match "a".
30
31Standard classes like [:alnum:], [:alpha:], etc. are not supported,
32only [a-zA-Z], [^ntf] and so on.
33
34Warning: The preferred way to use regular expressions is via std::regex.
35E.g., Index() functions may return incorrect result.
36*/
37
38#include "TRegexp.h"
39#include "TString.h"
40#include "TError.h"
41#include "ThreadLocalStorage.h"
42
43const unsigned TRegexp::fgMaxpat = 2048;
44
45
47
48////////////////////////////////////////////////////////////////////////////////
49/// Create a regular expression from the input string. If wildcard is
50/// true then the input string will first be interpreted as a wildcard
51/// expression by MakeWildcard(), and the result then interpreted as a
52/// regular expression.
53
54TRegexp::TRegexp(const char *re, Bool_t wildcard)
55{
56 if (wildcard)
58 else
59 GenPattern(re);
60}
61
62////////////////////////////////////////////////////////////////////////////////
63/// Create a regular expression from a TString.
64
66{
67 GenPattern(re.Data());
68}
69
70////////////////////////////////////////////////////////////////////////////////
71/// Copy ctor.
72
74{
76}
77
78////////////////////////////////////////////////////////////////////////////////
79/// Destructor.
80
82{
83 delete [] fPattern;
84}
85
86////////////////////////////////////////////////////////////////////////////////
87/// Assignment operator.
88
90{
91 if (this != &r) {
92 delete [] fPattern;
94 }
95 return *this;
96}
97
98////////////////////////////////////////////////////////////////////////////////
99/// Assignment operator taking a char* and assigning it to a regexp.
100
102{
103 delete [] fPattern;
104 GenPattern(str);
105 return *this;
106}
107
108////////////////////////////////////////////////////////////////////////////////
109/// Assignment operator taking a TString.
110
112{
113 delete [] fPattern;
114 GenPattern(str.Data());
115 return *this;
116}
117
118////////////////////////////////////////////////////////////////////////////////
119/// Generate the regular expression pattern.
120
121void TRegexp::GenPattern(const char *str)
122{
124 int error = ::Makepat(str, fPattern, fgMaxpat);
125 fStat = (error < 3) ? (EStatVal) error : kToolong;
126}
127
128////////////////////////////////////////////////////////////////////////////////
129/// Copy the regular expression pattern.
130
132{
134 memcpy(fPattern, r.fPattern, fgMaxpat * sizeof(Pattern_t));
135 fStat = r.fStat;
136}
137
138////////////////////////////////////////////////////////////////////////////////
139/// This routine transforms a wildcarding regular expression into
140/// a general regular expression used for pattern matching.
141/// When using wildcards the regular expression is assumed to be
142/// preceded by a "^" (BOL) and terminated by a "$" (EOL). Also, all
143/// "*"'s and "?"'s (closures) are assumed to be preceded by a "." (i.e. any
144/// character, except "/"'s) and all .'s are escaped (so *.ps is different
145/// from *.eps). The special treatment of "/" allows the easy matching of
146/// pathnames, e.g. "*.root" will match "aap.root", but not "pipo/aap.root".
147
148const char *TRegexp::MakeWildcard(const char *re)
149{
150 TTHREAD_TLS_ARRAY(char,fgMaxpat,buf);
151 char *s = buf;
152 if (!re) return "";
153 int len = strlen(re);
154 int slen = 0;
155
156 if (!len) return "";
157
158 for (int i = 0; i < len; i++) {
159 if ((unsigned)slen > fgMaxpat - 10) {
160 Error("MakeWildcard", "regexp too large");
161 break;
162 }
163 if (i == 0 && re[i] != '^') {
164 *s++ = '^';
165 slen++;
166 }
167 if (re[i] == '*') {
168#ifndef R__WIN32
169 //const char *wc = "[a-zA-Z0-9-+_\\.,: []<>]";
170 const char *wc = "[^/]";
171#else
172 //const char *wc = "[a-zA-Z0-9-+_., []<>]";
173 const char *wc = "[^\\/:]";
174#endif
175 strcpy(s, wc);
176 s += strlen(wc);
177 slen += strlen(wc);
178 }
179 if (re[i] == '.') {
180 *s++ = '\\';
181 slen++;
182 }
183 if (re[i] == '?') {
184#ifndef R__WIN32
185 //const char *wc = "[a-zA-Z0-9-+_\\.,: []<>]";
186 const char *wc = "[^/]";
187#else
188 //const char *wc = "[a-zA-Z0-9-+_., []<>]";
189 const char *wc = "[^\\/:]";
190#endif
191 strcpy(s, wc);
192 s += strlen(wc);
193 slen += strlen(wc);
194 } else {
195 *s++ = re[i];
196 slen++;
197 }
198 if (i == len-1 && re[i] != '$') {
199 *s++ = '$';
200 slen++;
201 }
202 }
203 *s = '\0';
204 return buf;
205}
206
207////////////////////////////////////////////////////////////////////////////////
208/// Find the first occurrence of the regexp in string and return the
209/// position, or -1 if there is no match. Len is length of the matched
210/// string and i is the offset at which the matching should start.
211/// Please, see the Warning in the class documentation above.
212
213Ssiz_t TRegexp::Index(const TString& string, Ssiz_t* len, Ssiz_t i) const
214{
215 if (fStat != kOK)
216 Error("TRegexp::Index", "Bad Regular Expression");
217
218 const char* startp;
219 const char* s = string.Data();
220 Ssiz_t slen = string.Length();
221 if (slen < i) return kNPOS;
222 const char* endp = ::Matchs(s+i, slen-i, fPattern, &startp);
223 if (endp) {
224 *len = endp - startp;
225 return startp - s;
226 } else {
227 *len = 0;
228 return kNPOS;
229 }
230}
231
232////////////////////////////////////////////////////////////////////////////////
233/// Check status of regexp.
234
236{
237 EStatVal temp = fStat;
238 fStat = kOK;
239 return temp;
240}
241
242////////////////////////////////////////////////////////////////////////////////
243// //
244// TString member functions, put here so the linker will include //
245// them only if regular expressions are used. //
246// //
247////////////////////////////////////////////////////////////////////////////////
248
249////////////////////////////////////////////////////////////////////////////////
250/// Find the first occurrence of the regexp in string and return the
251/// position, or -1 if there is no match. Start is the offset at which
252/// the search should start.
253/// Please, see the Warning in the class documentation above.
254
256{
257 Ssiz_t len;
258 return r.Index(*this, &len, start); // len not used
259}
260
261////////////////////////////////////////////////////////////////////////////////
262/// Find the first occurrence of the regexp in string and return the
263/// position, or -1 if there is no match. Extent is length of the matched
264/// string and start is the offset at which the matching should start.
265/// Please, see the Warning in the class documentation above.
266
267Ssiz_t TString::Index(const TRegexp& r, Ssiz_t* extent, Ssiz_t start) const
268{
269 return r.Index(*this, extent, start);
270}
271
272////////////////////////////////////////////////////////////////////////////////
273/// Return the substring found by applying the regexp starting at start.
274/// Please, see the Warning in the class documentation above.
275
277{
278 Ssiz_t len = 0;
279 Ssiz_t begin = Index(r, &len, start);
280 return TSubString(*this, begin, len);
281}
282
283////////////////////////////////////////////////////////////////////////////////
284/// Return the substring found by applying the regexp.
285
287{
288 return (*this)(r,0);
289}
290
291////////////////////////////////////////////////////////////////////////////////
292/// Search for tokens delimited by regular expression 'delim' (default " ")
293/// in this string; search starts at 'from' and the token is returned in 'tok'.
294/// Returns in 'from' the next position after the delimiter.
295/// Returns kTRUE if a token is found, kFALSE if not or if some inconsistency
296/// occurred.
297/// This method allows to loop over tokens in this way:
298/// ~~~ {.cpp}
299/// TString myl = "tok1 tok2|tok3";
300/// TString tok;
301/// Ssiz_t from = 0;
302/// while (myl.Tokenize(tok, from, "[ |]")) {
303/// // Analyse tok
304/// ...
305/// }
306/// ~~~
307/// more convenient of the other Tokenize method when saving the tokens is not
308/// needed.
309
310Bool_t TString::Tokenize(TString &tok, Ssiz_t &from, const char *delim) const
311{
312 Bool_t found = kFALSE;
313
314 // Reset the token
315 tok = "";
316
317 // Make sure inputs make sense
318 Int_t len = Length();
319 if (len <= 0 || from > (len - 1) || from < 0)
320 return found;
321
322 // Ensure backward compatibility to allow one or more times the delimiting character
323 TString rdelim(delim);
324 if(rdelim.Length() == 1) {
325 rdelim = "[" + rdelim + "]+";
326 }
327 TRegexp rg(rdelim);
328
329 // Find delimiter
330 Int_t ext = 0;
331 Int_t pos = Index(rg, &ext, from);
332
333 // Assign to token
334 if (pos == kNPOS || pos > from) {
335 Ssiz_t last = (pos != kNPOS) ? (pos - 1) : len;
336 tok = (*this)(from, last-from+1);
337 }
338 found = kTRUE;
339
340 // Update start-of-search index
341 from = pos + ext;
342 if (pos == kNPOS) {
343 from = pos;
344 if (tok.IsNull()) {
345 // Empty, last token
346 found = kFALSE;
347 }
348 }
349 // Make sure that 'from' has a meaningful value
350 from = (from < len) ? from : len;
351
352 // Done
353 return found;
354}
unsigned short Pattern_t
Definition Match.h:24
int Makepat(const char *, Pattern_t *, int)
Make a pattern template from the string pointed to by exp.
Definition Match.cxx:129
const char * Matchs(const char *, size_t len, const Pattern_t *, const char **)
Match a string with a pattern.
Definition Match.cxx:220
constexpr Bool_t kFALSE
Definition RtypesCore.h:101
constexpr Ssiz_t kNPOS
Definition RtypesCore.h:124
constexpr Bool_t kTRUE
Definition RtypesCore.h:100
#define ClassImp(name)
Definition Rtypes.h:377
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition TError.cxx:185
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
Regular expression class.
Definition TRegexp.h:31
Pattern_t * fPattern
Definition TRegexp.h:37
EStatVal Status()
Check status of regexp.
Definition TRegexp.cxx:235
EStatVal fStat
Definition TRegexp.h:38
void CopyPattern(const TRegexp &re)
Copy the regular expression pattern.
Definition TRegexp.cxx:131
static const unsigned fgMaxpat
Definition TRegexp.h:39
virtual ~TRegexp()
Destructor.
Definition TRegexp.cxx:81
TRegexp & operator=(const TRegexp &re)
Assignment operator.
Definition TRegexp.cxx:89
const char * MakeWildcard(const char *re)
This routine transforms a wildcarding regular expression into a general regular expression used for p...
Definition TRegexp.cxx:148
Ssiz_t Index(const TString &str, Ssiz_t *len, Ssiz_t start=0) const
Find the first occurrence of the regexp in string and return the position, or -1 if there is no match...
Definition TRegexp.cxx:213
@ kToolong
Definition TRegexp.h:34
@ kOK
Definition TRegexp.h:34
TRegexp(const char *re, Bool_t wildcard=kFALSE)
Create a regular expression from the input string.
Definition TRegexp.cxx:54
void GenPattern(const char *re)
Generate the regular expression pattern.
Definition TRegexp.cxx:121
Basic string class.
Definition TString.h:139
Ssiz_t Length() const
Definition TString.h:417
friend class TSubString
Definition TString.h:142
char & operator()(Ssiz_t i)
Definition TString.h:724
static constexpr Ssiz_t kNPOS
Definition TString.h:278
const char * Data() const
Definition TString.h:376
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition TString.cxx:2264
Bool_t IsNull() const
Definition TString.h:414
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:651
A zero length substring is legal.
Definition TString.h:85