Logo ROOT   6.14/05
Reference Guide
RooStreamParser.cxx
Go to the documentation of this file.
1 /*****************************************************************************
2  * Project: RooFit *
3  * Package: RooFitCore *
4  * @(#)root/roofitcore:$Id$
5  * Authors: *
6  * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7  * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8  * *
9  * Copyright (c) 2000-2005, Regents of the University of California *
10  * and Stanford University. All rights reserved. *
11  * *
12  * Redistribution and use in source and binary forms, *
13  * with or without modification, are permitted according to the terms *
14  * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15  *****************************************************************************/
16 
17 //////////////////////////////////////////////////////////////////////////////
18 //
19 // RooStreamParser is a utility class to parse istreams into tokens and optionally
20 // convert them into basic types (double,int,string)
21 //
22 // The general tokenizing philosophy is that there are two kinds of tokens: value
23 // and punctuation. The former are variable length, the latter always
24 // one character. A token is terminated if one of the following conditions
25 // occur
26 // - space character found (' ',tab,newline)
27 // - change of token type (value -> punctuation or vv)
28 // - end of fixed-length token (punctuation only)
29 // - start or end of quoted string
30 //
31 // The parser is aware of floating point notation and will assign leading
32 // minus signs, decimal points etc to a value token when this is obvious
33 // from the context. The definition of what is punctuation can be redefined.
34 //
35 
36 
37 #include "RooFit.h"
38 
39 #include "Riostream.h"
40 #include "Riostream.h"
41 #include <stdlib.h>
42 
43 #ifndef _WIN32
44 #include <strings.h>
45 #endif
46 
47 #include "RooStreamParser.h"
48 #include "RooMsgService.h"
49 #include "RooNumber.h"
50 
51 
52 using namespace std;
53 
55 
56 
57 ////////////////////////////////////////////////////////////////////////////////
58 /// Construct parser on given input stream
59 
61  _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(""), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
62 {
63 }
64 
65 
66 ////////////////////////////////////////////////////////////////////////////////
67 /// Construct parser on given input stream. Use given errorPrefix to
68 /// prefix any parsing error messages
69 
70 RooStreamParser::RooStreamParser(istream& is, const TString& errorPrefix) :
71  _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(errorPrefix), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
72 {
73 }
74 
75 
76 
77 ////////////////////////////////////////////////////////////////////////////////
78 /// Destructor
79 
81 {
82 }
83 
84 
85 
86 ////////////////////////////////////////////////////////////////////////////////
87 /// If true, parser is at end of line in stream
88 
90 {
91  Int_t nc(_is->peek()) ;
92  return (nc=='\n'||nc==-1) ;
93 }
94 
95 
96 
97 ////////////////////////////////////////////////////////////////////////////////
98 /// Change list of characters interpreted as punctuation
99 
100 void RooStreamParser::setPunctuation(const TString& punct)
101 {
102  _punct = punct ;
103 }
104 
105 
106 
107 ////////////////////////////////////////////////////////////////////////////////
108 /// Check if given char is considered punctuation
109 
111 {
112  const char* punct = _punct.Data() ;
113  for (int i=0 ; i<_punct.Length() ; i++)
114  if (punct[i] == c) {
115  return kTRUE ;
116  }
117  return kFALSE ;
118 }
119 
120 
121 
122 ////////////////////////////////////////////////////////////////////////////////
123 /// Read one token separated by any of the know punctuation characters
124 /// This function recognizes and handles comment lines in the istream (those
125 /// starting with '#', quoted strings ("") the content of which is not tokenized
126 /// and '+-.' characters that are part of a floating point numbers and are exempt
127 /// from being interpreted as a token separator in case '+-.' are defined as
128 /// token separators.
129 
131 {
132  // Smart tokenizer. Absorb white space and token must be either punctuation or alphanum
133  Bool_t first(kTRUE), quotedString(kFALSE), lineCont(kFALSE) ;
134  char buffer[64000], c(0), cnext = '\0', cprev = ' ';
135  Bool_t haveINF(kFALSE) ;
136  Int_t bufptr(0) ;
137 
138  // Check for end of file
139  if (_is->eof() || _is->fail()) {
140  _atEOF = kTRUE ;
141  return TString("") ;
142  }
143 
144  //Ignore leading newline
145  if (_is->peek()=='\n') {
146  _is->get(c) ;
147 
148  // If new line starts with #, zap it
149  while (_is->peek()=='#') {
150  zapToEnd(kFALSE) ;
151  _is->get(c) ; // absorb newline
152  }
153  }
154 
155  while(1) {
156  // Buffer overflow protection
157  if (bufptr >= 63999) {
159  << "RooStreamParser::readToken: token length exceeds buffer capacity, terminating token early" << endl;
160  break;
161  }
162 
163  // Read next char
164  _is->get(c) ;
165 
166 
167 
168  // Terminate at EOF, EOL or trouble
169  if (_is->eof() || _is->fail() || c=='\n') break ;
170 
171  // Terminate as SPACE, unless we haven't seen any non-SPACE yet
172  if (isspace(c)) {
173  if (first)
174  continue ;
175  else
176  if (!quotedString) {
177  break ;
178  }
179  }
180 
181  // If '-' or '/' see what the next character is
182  if (c == '.' || c=='-' || c=='+' || c=='/' || c=='\\') {
183  _is->get(cnext) ;
184 
185 
186  if (cnext=='I' || cnext=='i') {
187  char tmp1,tmp2 ;
188  _is->get(tmp1) ;
189  _is->get(tmp2) ;
190  _is->putback(tmp2) ;
191  _is->putback(tmp1) ;
192  haveINF = ((cnext=='I' && tmp1 == 'N' && tmp2 == 'F') || (cnext=='i' && tmp1 == 'n' && tmp2 == 'f')) ;
193  } else {
194  haveINF = kFALSE ;
195  }
196 
197  _is->putback(cnext) ;
198  }
199 
200 
201  // Check for line continuation marker
202  if (c=='\\' && cnext=='\\') {
203  // Kill rest of line including endline marker
204  zapToEnd(kFALSE) ;
205  _is->get(c) ;
206  lineCont=kTRUE ;
207  break ;
208  }
209 
210  // Stop if begin of comments is encountered
211  if (c=='/' && cnext=='/') {
212  zapToEnd(kFALSE) ;
213  break ;
214  }
215 
216  // Special handling of quoted strings
217  if (c=='"') {
218  if (first) {
219  quotedString=kTRUE ;
220  } else if (!quotedString) {
221  // Terminate current token. Next token will be quoted string
222  _is->putback('"') ;
223  break ;
224  }
225  }
226 
227  if (!quotedString) {
228  // Decide if next char is punctuation (exempt - and . that are part of floating point numbers, or +/- preceding INF)
229  if (isPunctChar(c) && !(c=='.' && (isdigit(cnext)||isdigit(cprev)))
230  && !((c=='-'||c=='+') && isdigit(cnext) && cprev=='e')
231  && (!first || !((c=='-'||c=='+') && (isdigit(cnext)||cnext=='.'||haveINF)))) {
232 
233  if (first) {
234  // Make this a one-char punctuation token
235  buffer[bufptr++]=c ;
236  break ;
237  } else {
238  // Put back punct. char and terminate current alphanum token
239  _is->putback(c) ;
240  break ;
241  }
242  }
243  } else {
244  // Inside quoted string conventional tokenizing rules do not apply
245 
246  // Terminate token on closing quote
247  if (c=='"' && !first) {
248  buffer[bufptr++]=c ;
249  quotedString=kFALSE ;
250  break ;
251  }
252  }
253 
254  // Store in buffer
255  buffer[bufptr++]=c ;
256  first=kFALSE ;
257  cprev=c ;
258  }
259 
260  if (_is->eof() || _is->bad()) {
261  _atEOF = kTRUE ;
262  }
263 
264  // Check if closing quote was encountered
265  if (quotedString) {
266  oocoutW((TObject*)0,InputArguments) << "RooStreamParser::readToken: closing quote (\") missing" << endl ;
267  }
268 
269  // Absorb trailing white space or absorb rest of line if // is encountered
270  if (c=='\n') {
271  if (!lineCont) {
272  _is->putback(c) ;
273  }
274  } else {
275  c = _is->peek() ;
276 
277  while ((isspace(c) || c=='/') && c != '\n') {
278  if (c=='/') {
279  _is->get(c) ;
280  if (_is->peek()=='/') {
281  zapToEnd(kFALSE) ;
282  } else {
283  _is->putback('/') ;
284  }
285  break ;
286  } else {
287  _is->get(c) ;
288  c = _is->peek() ;
289  }
290  }
291  }
292 
293  // If no token was read line is continued, return first token on next line
294  if (bufptr==0 && lineCont) {
295  return readToken() ;
296  }
297 
298  // Zero terminate buffer and convert to TString
299  buffer[bufptr]=0 ;
300  return TString(buffer) ;
301 }
302 
303 
304 
305 ////////////////////////////////////////////////////////////////////////////////
306 /// Read an entire line from the stream and return as TString
307 /// This method recognizes the use of '\\' in the istream
308 /// as line continuation token.
309 
311 {
312  char c, buffer[64000];
313  Int_t nfree(63999);
314 
315  if (_is->peek() == '\n')
316  _is->get(c);
317 
318  // Read till end of line
319  _is->getline(buffer, nfree, '\n');
320 
321  // Look for eventual continuation line sequence
322  char *pcontseq = strstr(buffer, "\\\\");
323  if (pcontseq)
324  nfree -= (pcontseq - buffer);
325  while (pcontseq) {
326  _is->getline(pcontseq, nfree, '\n');
327 
328  char *nextpcontseq = strstr(pcontseq, "\\\\");
329  if (nextpcontseq)
330  nfree -= (nextpcontseq - pcontseq);
331  pcontseq = nextpcontseq;
332  }
333 
334  // Chop eventual comments
335  char *pcomment = strstr(buffer,"//") ;
336  if (pcomment) *pcomment=0 ;
337 
338  // Chop leading and trailing space
339  char *pstart=buffer ;
340  while (isspace(*pstart)) {
341  pstart++ ;
342  }
343  char *pend=buffer+strlen(buffer)-1 ;
344  if (pend>pstart)
345  while (isspace(*pend)) { *pend--=0 ; }
346 
347  if (_is->eof() || _is->fail()) {
348  _atEOF = kTRUE ;
349  }
350 
351  // Convert to TString
352  return TString(pstart) ;
353 }
354 
355 
356 
357 ////////////////////////////////////////////////////////////////////////////////
358 /// Eat all characters up to and including then end of the
359 /// current line. If inclContLines is kTRUE, all continuation lines
360 /// marked by the '\\' token are zapped as well
361 
363 {
364  // Skip over everything until the end of the current line
365  if (_is->peek()!='\n') {
366 
367  char buffer[64000];
368  Int_t nfree(63999);
369 
370  // Read till end of line
371  _is->getline(buffer, nfree, '\n');
372 
373  if (inclContLines) {
374  // Look for eventual continuation line sequence
375  char *pcontseq = strstr(buffer, "\\\\");
376  if (pcontseq)
377  nfree -= (pcontseq - buffer);
378  while (pcontseq) {
379  _is->getline(pcontseq, nfree, '\n');
380 
381  char *nextpcontseq = strstr(pcontseq, "\\\\");
382  if (nextpcontseq)
383  nfree -= (nextpcontseq - pcontseq);
384  pcontseq = nextpcontseq;
385  }
386  }
387 
388  // Put back newline character in stream buffer
389  _is->putback('\n') ;
390  }
391 }
392 
393 
394 
395 ////////////////////////////////////////////////////////////////////////////////
396 /// Read the next token and return kTRUE if it is identical to the given 'expected' token.
397 
398 Bool_t RooStreamParser::expectToken(const TString& expected, Bool_t zapOnError)
399 {
400  TString token(readToken()) ;
401 
402  Bool_t error=token.CompareTo(expected) ;
403  if (error && !_prefix.IsNull()) {
404  oocoutW((TObject*)0,InputArguments) << _prefix << ": parse error, expected '"
405  << expected << "'" << ", got '" << token << "'" << endl ;
406  if (zapOnError) zapToEnd(kTRUE) ;
407  }
408  return error ;
409 }
410 
411 
412 
413 ////////////////////////////////////////////////////////////////////////////////
414 /// Read the next token and convert it to a Double_t. Returns true
415 /// if an error occurred in reading or conversion
416 
418 {
419  TString token(readToken()) ;
420  if (token.IsNull()) return kTRUE ;
421  return convertToDouble(token,value) ;
422 
423 }
424 
425 
426 
427 ////////////////////////////////////////////////////////////////////////////////
428 /// Convert given string to a double. Return true if the conversion fails.
429 
430 Bool_t RooStreamParser::convertToDouble(const TString& token, Double_t& value)
431 {
432  char* endptr = 0;
433  const char* data=token.Data() ;
434 
435  // Handle +/- infinity cases, (token is guaranteed to be >1 char long)
436  if (!strcasecmp(data,"inf") || !strcasecmp(data+1,"inf")) {
437  value = (data[0]=='-') ? -RooNumber::infinity() : RooNumber::infinity() ;
438  return kFALSE ;
439  }
440 
441  value = strtod(data,&endptr) ;
442  Bool_t error = (endptr-data!=token.Length()) ;
443 
444  if (error && !_prefix.IsNull()) {
445  oocoutE((TObject*)0,InputArguments) << _prefix << ": parse error, cannot convert '"
446  << token << "'" << " to double precision" << endl ;
447  }
448  return error ;
449 }
450 
451 
452 
453 ////////////////////////////////////////////////////////////////////////////////
454 /// Read a token and convert it to an Int_t. Returns true
455 /// if an error occurred in reading or conversion
456 
458 {
459  TString token(readToken()) ;
460  if (token.IsNull()) return kTRUE ;
461  return convertToInteger(token,value) ;
462 }
463 
464 
465 
466 ////////////////////////////////////////////////////////////////////////////////
467 /// Convert given string to an Int_t. Returns true if an error
468 /// occurred in conversion
469 
470 Bool_t RooStreamParser::convertToInteger(const TString& token, Int_t& value)
471 {
472  char* endptr = 0;
473  const char* data=token.Data() ;
474  value = strtol(data,&endptr,10) ;
475  Bool_t error = (endptr-data!=token.Length()) ;
476 
477  if (error && !_prefix.IsNull()) {
478  oocoutE((TObject*)0,InputArguments)<< _prefix << ": parse error, cannot convert '"
479  << token << "'" << " to integer" << endl ;
480  }
481  return error ;
482 }
483 
484 
485 
486 ////////////////////////////////////////////////////////////////////////////////
487 /// Read a string token. Returns true if an error occurred in reading
488 /// or conversion. If a the read token is enclosed in quotation
489 /// marks those are stripped in the returned value
490 
491 Bool_t RooStreamParser::readString(TString& value, Bool_t /*zapOnError*/)
492 {
493  TString token(readToken()) ;
494  if (token.IsNull()) return kTRUE ;
495  return convertToString(token,value) ;
496 }
497 
498 
499 
500 ////////////////////////////////////////////////////////////////////////////////
501 /// Convert given token to a string (i.e. remove eventual quotation marks)
502 
503 Bool_t RooStreamParser::convertToString(const TString& token, TString& string)
504 {
505  // Transport to buffer
506  char buffer[64000], *ptr;
507  strncpy(buffer, token.Data(), 63999);
508  if (token.Length() >= 63999) {
509  oocoutW((TObject *)0, InputArguments) << "RooStreamParser::convertToString: token length exceeds 63999, truncated"
510  << endl;
511  buffer[63999] = 0;
512  }
513  int len = strlen(buffer) ;
514 
515  // Remove trailing quote if any
516  if ((len) && (buffer[len-1]=='"'))
517  buffer[len-1]=0 ;
518 
519  // Skip leading quote, if present
520  ptr=(buffer[0]=='"') ? buffer+1 : buffer ;
521 
522  string = ptr ;
523  return kFALSE ;
524 }
Bool_t isPunctChar(char c) const
Check if given char is considered punctuation.
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
STL namespace.
#define oocoutE(o, a)
Definition: RooMsgService.h:47
Bool_t convertToDouble(const TString &token, Double_t &value)
Convert given string to a double. Return true if the conversion fails.
TString readToken()
Read one token separated by any of the know punctuation characters This function recognizes and handl...
Bool_t convertToInteger(const TString &token, Int_t &value)
Convert given string to an Int_t.
static Double_t infinity()
Return internal infinity representation.
Definition: RooNumber.cxx:49
void zapToEnd(Bool_t inclContLines=kFALSE)
Eat all characters up to and including then end of the current line.
RooStreamParser(std::istream &is)
Construct parser on given input stream.
Bool_t readDouble(Double_t &value, Bool_t zapOnError=kFALSE)
Read the next token and convert it to a Double_t.
TString readLine()
Read an entire line from the stream and return as TString This method recognizes the use of &#39;\&#39; in th...
Bool_t expectToken(const TString &expected, Bool_t zapOnError=kFALSE)
Read the next token and return kTRUE if it is identical to the given &#39;expected&#39; token.
Bool_t readString(TString &value, Bool_t zapOnError=kFALSE)
Read a string token.
const Bool_t kFALSE
Definition: RtypesCore.h:88
#define ClassImp(name)
Definition: Rtypes.h:359
double Double_t
Definition: RtypesCore.h:55
Bool_t convertToString(const TString &token, TString &string)
Convert given token to a string (i.e. remove eventual quotation marks)
void setPunctuation(const TString &punct)
Change list of characters interpreted as punctuation.
std::istream * _is
#define oocoutW(o, a)
Definition: RooMsgService.h:46
Mother of all ROOT objects.
Definition: TObject.h:37
virtual ~RooStreamParser()
Destructor.
Bool_t readInteger(Int_t &value, Bool_t zapOnError=kFALSE)
Read a token and convert it to an Int_t.
#define c(i)
Definition: RSha256.hxx:101
Definition: first.py:1
Bool_t atEOL()
If true, parser is at end of line in stream.
const Bool_t kTRUE
Definition: RtypesCore.h:87