Logo ROOT  
Reference Guide
RooStreamParser.cxx
Go to the documentation of this file.
1 /*****************************************************************************
2  * Project: RooFit *
3  * Package: RooFitCore *
4  * @(#)root/roofitcore:$Id$
5  * Authors: *
6  * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7  * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8  * *
9  * Copyright (c) 2000-2005, Regents of the University of California *
10  * and Stanford University. All rights reserved. *
11  * *
12  * Redistribution and use in source and binary forms, *
13  * with or without modification, are permitted according to the terms *
14  * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15  *****************************************************************************/
16 
17 //////////////////////////////////////////////////////////////////////////////
18 //
19 // RooStreamParser is a utility class to parse istreams into tokens and optionally
20 // convert them into basic types (double,int,string)
21 //
22 // The general tokenizing philosophy is that there are two kinds of tokens: value
23 // and punctuation. The former are variable length, the latter always
24 // one character. A token is terminated if one of the following conditions
25 // occur
26 // - space character found (' ',tab,newline)
27 // - change of token type (value -> punctuation or vv)
28 // - end of fixed-length token (punctuation only)
29 // - start or end of quoted string
30 //
31 // The parser is aware of floating point notation and will assign leading
32 // minus signs, decimal points etc to a value token when this is obvious
33 // from the context. The definition of what is punctuation can be redefined.
34 //
35 
36 
37 #include "RooFit.h"
38 
39 #include "Riostream.h"
40 #include <stdlib.h>
41 
42 #ifndef _WIN32
43 #include <strings.h>
44 #endif
45 
46 #include "RooStreamParser.h"
47 #include "RooMsgService.h"
48 #include "RooNumber.h"
49 
50 
51 using namespace std;
52 
54 
55 
56 ////////////////////////////////////////////////////////////////////////////////
57 /// Construct parser on given input stream
58 
60  _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(""), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
61 {
62 }
63 
64 
65 ////////////////////////////////////////////////////////////////////////////////
66 /// Construct parser on given input stream. Use given errorPrefix to
67 /// prefix any parsing error messages
68 
69 RooStreamParser::RooStreamParser(istream& is, const TString& errorPrefix) :
70  _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(errorPrefix), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
71 {
72 }
73 
74 
75 
76 ////////////////////////////////////////////////////////////////////////////////
77 /// Destructor
78 
80 {
81 }
82 
83 
84 
85 ////////////////////////////////////////////////////////////////////////////////
86 /// If true, parser is at end of line in stream
87 
89 {
90  Int_t nc(_is->peek()) ;
91  return (nc=='\n'||nc==-1) ;
92 }
93 
94 
95 
96 ////////////////////////////////////////////////////////////////////////////////
97 /// Change list of characters interpreted as punctuation
98 
100 {
101  _punct = punct ;
102 }
103 
104 
105 
106 ////////////////////////////////////////////////////////////////////////////////
107 /// Check if given char is considered punctuation
108 
110 {
111  const char* punct = _punct.Data() ;
112  for (int i=0 ; i<_punct.Length() ; i++)
113  if (punct[i] == c) {
114  return kTRUE ;
115  }
116  return kFALSE ;
117 }
118 
119 
120 
121 ////////////////////////////////////////////////////////////////////////////////
122 /// Read one token separated by any of the know punctuation characters
123 /// This function recognizes and handles comment lines in the istream (those
124 /// starting with '#', quoted strings ("") the content of which is not tokenized
125 /// and '+-.' characters that are part of a floating point numbers and are exempt
126 /// from being interpreted as a token separator in case '+-.' are defined as
127 /// token separators.
128 
130 {
131  // Smart tokenizer. Absorb white space and token must be either punctuation or alphanum
132  Bool_t first(kTRUE), quotedString(kFALSE), lineCont(kFALSE) ;
133  char buffer[64000], c(0), cnext = '\0', cprev = ' ';
134  Bool_t haveINF(kFALSE) ;
135  Int_t bufptr(0) ;
136 
137  // Check for end of file
138  if (_is->eof() || _is->fail()) {
139  _atEOF = kTRUE ;
140  return TString("") ;
141  }
142 
143  //Ignore leading newline
144  if (_is->peek()=='\n') {
145  _is->get(c) ;
146 
147  // If new line starts with #, zap it
148  while (_is->peek()=='#') {
149  zapToEnd(kFALSE) ;
150  _is->get(c) ; // absorb newline
151  }
152  }
153 
154  while(1) {
155  // Buffer overflow protection
156  if (bufptr >= 63999) {
158  << "RooStreamParser::readToken: token length exceeds buffer capacity, terminating token early" << endl;
159  break;
160  }
161 
162  // Read next char
163  _is->get(c) ;
164 
165 
166 
167  // Terminate at EOF, EOL or trouble
168  if (_is->eof() || _is->fail() || c=='\n') break ;
169 
170  // Terminate as SPACE, unless we haven't seen any non-SPACE yet
171  if (isspace(c)) {
172  if (first)
173  continue ;
174  else
175  if (!quotedString) {
176  break ;
177  }
178  }
179 
180  // If '-' or '/' see what the next character is
181  if (c == '.' || c=='-' || c=='+' || c=='/' || c=='\\') {
182  _is->get(cnext) ;
183 
184 
185  if (cnext=='I' || cnext=='i') {
186  char tmp1,tmp2 ;
187  _is->get(tmp1) ;
188  _is->get(tmp2) ;
189  _is->putback(tmp2) ;
190  _is->putback(tmp1) ;
191  haveINF = ((cnext=='I' && tmp1 == 'N' && tmp2 == 'F') || (cnext=='i' && tmp1 == 'n' && tmp2 == 'f')) ;
192  } else {
193  haveINF = kFALSE ;
194  }
195 
196  _is->putback(cnext) ;
197  }
198 
199 
200  // Check for line continuation marker
201  if (c=='\\' && cnext=='\\') {
202  // Kill rest of line including endline marker
203  zapToEnd(kFALSE) ;
204  _is->get(c) ;
205  lineCont=kTRUE ;
206  break ;
207  }
208 
209  // Stop if begin of comments is encountered
210  if (c=='/' && cnext=='/') {
211  zapToEnd(kFALSE) ;
212  break ;
213  }
214 
215  // Special handling of quoted strings
216  if (c=='"') {
217  if (first) {
218  quotedString=kTRUE ;
219  } else if (!quotedString) {
220  // Terminate current token. Next token will be quoted string
221  _is->putback('"') ;
222  break ;
223  }
224  }
225 
226  if (!quotedString) {
227  // Decide if next char is punctuation (exempt - and . that are part of floating point numbers, or +/- preceding INF)
228  if (isPunctChar(c) && !(c=='.' && (isdigit(cnext)||isdigit(cprev)))
229  && !((c=='-'||c=='+') && isdigit(cnext) && (cprev == 'e' || cprev == 'E'))
230  && (!first || !((c=='-'||c=='+') && (isdigit(cnext)||cnext=='.'||haveINF)))) {
231 
232  if (first) {
233  // Make this a one-char punctuation token
234  buffer[bufptr++]=c ;
235  break ;
236  } else {
237  // Put back punct. char and terminate current alphanum token
238  _is->putback(c) ;
239  break ;
240  }
241  }
242  } else {
243  // Inside quoted string conventional tokenizing rules do not apply
244 
245  // Terminate token on closing quote
246  if (c=='"' && !first) {
247  buffer[bufptr++]=c ;
248  quotedString=kFALSE ;
249  break ;
250  }
251  }
252 
253  // Store in buffer
254  buffer[bufptr++]=c ;
255  first=kFALSE ;
256  cprev=c ;
257  }
258 
259  if (_is->eof() || _is->bad()) {
260  _atEOF = kTRUE ;
261  }
262 
263  // Check if closing quote was encountered
264  if (quotedString) {
265  oocoutW((TObject*)0,InputArguments) << "RooStreamParser::readToken: closing quote (\") missing" << endl ;
266  }
267 
268  // Absorb trailing white space or absorb rest of line if // is encountered
269  if (c=='\n') {
270  if (!lineCont) {
271  _is->putback(c) ;
272  }
273  } else {
274  c = _is->peek() ;
275 
276  while ((isspace(c) || c=='/') && c != '\n') {
277  if (c=='/') {
278  _is->get(c) ;
279  if (_is->peek()=='/') {
280  zapToEnd(kFALSE) ;
281  } else {
282  _is->putback('/') ;
283  }
284  break ;
285  } else {
286  _is->get(c) ;
287  c = _is->peek() ;
288  }
289  }
290  }
291 
292  // If no token was read line is continued, return first token on next line
293  if (bufptr==0 && lineCont) {
294  return readToken() ;
295  }
296 
297  // Zero terminate buffer and convert to TString
298  buffer[bufptr]=0 ;
299  return TString(buffer) ;
300 }
301 
302 
303 
304 ////////////////////////////////////////////////////////////////////////////////
305 /// Read an entire line from the stream and return as TString
306 /// This method recognizes the use of '\\' in the istream
307 /// as line continuation token.
308 
310 {
311  char c, buffer[64000];
312  Int_t nfree(63999);
313 
314  if (_is->peek() == '\n')
315  _is->get(c);
316 
317  // Read till end of line
318  _is->getline(buffer, nfree, '\n');
319 
320  // Look for eventual continuation line sequence
321  char *pcontseq = strstr(buffer, "\\\\");
322  if (pcontseq)
323  nfree -= (pcontseq - buffer);
324  while (pcontseq) {
325  _is->getline(pcontseq, nfree, '\n');
326 
327  char *nextpcontseq = strstr(pcontseq, "\\\\");
328  if (nextpcontseq)
329  nfree -= (nextpcontseq - pcontseq);
330  pcontseq = nextpcontseq;
331  }
332 
333  // Chop eventual comments
334  char *pcomment = strstr(buffer,"//") ;
335  if (pcomment) *pcomment=0 ;
336 
337  // Chop leading and trailing space
338  char *pstart=buffer ;
339  while (isspace(*pstart)) {
340  pstart++ ;
341  }
342  char *pend=buffer+strlen(buffer)-1 ;
343  if (pend>pstart)
344  while (isspace(*pend)) { *pend--=0 ; }
345 
346  if (_is->eof() || _is->fail()) {
347  _atEOF = kTRUE ;
348  }
349 
350  // Convert to TString
351  return TString(pstart) ;
352 }
353 
354 
355 
356 ////////////////////////////////////////////////////////////////////////////////
357 /// Eat all characters up to and including then end of the
358 /// current line. If inclContLines is kTRUE, all continuation lines
359 /// marked by the '\\' token are zapped as well
360 
362 {
363  // Skip over everything until the end of the current line
364  if (_is->peek()!='\n') {
365 
366  char buffer[64000];
367  Int_t nfree(63999);
368 
369  // Read till end of line
370  _is->getline(buffer, nfree, '\n');
371 
372  if (inclContLines) {
373  // Look for eventual continuation line sequence
374  char *pcontseq = strstr(buffer, "\\\\");
375  if (pcontseq)
376  nfree -= (pcontseq - buffer);
377  while (pcontseq) {
378  _is->getline(pcontseq, nfree, '\n');
379 
380  char *nextpcontseq = strstr(pcontseq, "\\\\");
381  if (nextpcontseq)
382  nfree -= (nextpcontseq - pcontseq);
383  pcontseq = nextpcontseq;
384  }
385  }
386 
387  // Put back newline character in stream buffer
388  _is->putback('\n') ;
389  }
390 }
391 
392 
393 
394 ////////////////////////////////////////////////////////////////////////////////
395 /// Read the next token and return kTRUE if it is identical to the given 'expected' token.
396 
397 Bool_t RooStreamParser::expectToken(const TString& expected, Bool_t zapOnError)
398 {
399  TString token(readToken()) ;
400 
401  Bool_t error=token.CompareTo(expected) ;
402  if (error && !_prefix.IsNull()) {
403  oocoutW((TObject*)0,InputArguments) << _prefix << ": parse error, expected '"
404  << expected << "'" << ", got '" << token << "'" << endl ;
405  if (zapOnError) zapToEnd(kTRUE) ;
406  }
407  return error ;
408 }
409 
410 
411 
412 ////////////////////////////////////////////////////////////////////////////////
413 /// Read the next token and convert it to a Double_t. Returns true
414 /// if an error occurred in reading or conversion
415 
417 {
418  TString token(readToken()) ;
419  if (token.IsNull()) return kTRUE ;
420  return convertToDouble(token,value) ;
421 
422 }
423 
424 
425 
426 ////////////////////////////////////////////////////////////////////////////////
427 /// Convert given string to a double. Return true if the conversion fails.
428 
430 {
431  char* endptr = 0;
432  const char* data=token.Data() ;
433 
434  // Handle +/- infinity cases, (token is guaranteed to be >1 char long)
435  if (!strcasecmp(data,"inf") || !strcasecmp(data+1,"inf")) {
436  value = (data[0]=='-') ? -RooNumber::infinity() : RooNumber::infinity() ;
437  return kFALSE ;
438  }
439 
440  value = strtod(data,&endptr) ;
441  Bool_t error = (endptr-data!=token.Length()) ;
442 
443  if (error && !_prefix.IsNull()) {
444  oocoutE((TObject*)0,InputArguments) << _prefix << ": parse error, cannot convert '"
445  << token << "'" << " to double precision" << endl ;
446  }
447  return error ;
448 }
449 
450 
451 
452 ////////////////////////////////////////////////////////////////////////////////
453 /// Read a token and convert it to an Int_t. Returns true
454 /// if an error occurred in reading or conversion
455 
457 {
458  TString token(readToken()) ;
459  if (token.IsNull()) return kTRUE ;
460  return convertToInteger(token,value) ;
461 }
462 
463 
464 
465 ////////////////////////////////////////////////////////////////////////////////
466 /// Convert given string to an Int_t. Returns true if an error
467 /// occurred in conversion
468 
470 {
471  char* endptr = 0;
472  const char* data=token.Data() ;
473  value = strtol(data,&endptr,10) ;
474  Bool_t error = (endptr-data!=token.Length()) ;
475 
476  if (error && !_prefix.IsNull()) {
477  oocoutE((TObject*)0,InputArguments)<< _prefix << ": parse error, cannot convert '"
478  << token << "'" << " to integer" << endl ;
479  }
480  return error ;
481 }
482 
483 
484 
485 ////////////////////////////////////////////////////////////////////////////////
486 /// Read a string token. Returns true if an error occurred in reading
487 /// or conversion. If a the read token is enclosed in quotation
488 /// marks those are stripped in the returned value
489 
491 {
492  TString token(readToken()) ;
493  if (token.IsNull()) return kTRUE ;
494  return convertToString(token,value) ;
495 }
496 
497 
498 
499 ////////////////////////////////////////////////////////////////////////////////
500 /// Convert given token to a string (i.e. remove eventual quotation marks)
501 
503 {
504  // Transport to buffer
505  char buffer[64000], *ptr;
506  strncpy(buffer, token.Data(), 63999);
507  if (token.Length() >= 63999) {
508  oocoutW((TObject *)0, InputArguments) << "RooStreamParser::convertToString: token length exceeds 63999, truncated"
509  << endl;
510  buffer[63999] = 0;
511  }
512  int len = strlen(buffer) ;
513 
514  // Remove trailing quote if any
515  if ((len) && (buffer[len-1]=='"'))
516  buffer[len-1]=0 ;
517 
518  // Skip leading quote, if present
519  ptr=(buffer[0]=='"') ? buffer+1 : buffer ;
520 
521  string = ptr ;
522  return kFALSE ;
523 }
RooStreamParser.h
c
#define c(i)
Definition: RSha256.hxx:101
RooStreamParser::readString
Bool_t readString(TString &value, Bool_t zapOnError=kFALSE)
Read a string token.
Definition: RooStreamParser.cxx:490
first
Definition: first.py:1
RooStreamParser::zapToEnd
void zapToEnd(Bool_t inclContLines=kFALSE)
Eat all characters up to and including then end of the current line.
Definition: RooStreamParser.cxx:361
kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:100
RooStreamParser::~RooStreamParser
virtual ~RooStreamParser()
Destructor.
Definition: RooStreamParser.cxx:79
RooMsgService.h
RooStreamParser::expectToken
Bool_t expectToken(const TString &expected, Bool_t zapOnError=kFALSE)
Read the next token and return kTRUE if it is identical to the given 'expected' token.
Definition: RooStreamParser.cxx:397
RooFit.h
RooFit::InputArguments
@ InputArguments
Definition: RooGlobalFunc.h:61
TString::Data
const char * Data() const
Definition: TString.h:369
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
RooStreamParser::RooStreamParser
RooStreamParser(std::istream &is)
Construct parser on given input stream.
Definition: RooStreamParser.cxx:59
RooStreamParser::atEOL
Bool_t atEOL()
If true, parser is at end of line in stream.
Definition: RooStreamParser.cxx:88
TString::Length
Ssiz_t Length() const
Definition: TString.h:410
RooStreamParser::readToken
TString readToken()
Read one token separated by any of the know punctuation characters This function recognizes and handl...
Definition: RooStreamParser.cxx:129
oocoutE
#define oocoutE(o, a)
Definition: RooMsgService.h:48
TString
Basic string class.
Definition: TString.h:136
RooStreamParser::readInteger
Bool_t readInteger(Int_t &value, Bool_t zapOnError=kFALSE)
Read a token and convert it to an Int_t.
Definition: RooStreamParser.cxx:456
bool
RooStreamParser::_prefix
TString _prefix
Definition: RooStreamParser.h:54
RooStreamParser
Definition: RooStreamParser.h:21
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:101
RooStreamParser::convertToDouble
Bool_t convertToDouble(const TString &token, Double_t &value)
Convert given string to a double. Return true if the conversion fails.
Definition: RooStreamParser.cxx:429
RooStreamParser::readDouble
Bool_t readDouble(Double_t &value, Bool_t zapOnError=kFALSE)
Read the next token and convert it to a Double_t.
Definition: RooStreamParser.cxx:416
RooStreamParser::isPunctChar
Bool_t isPunctChar(char c) const
Check if given char is considered punctuation.
Definition: RooStreamParser.cxx:109
RooStreamParser::_atEOF
Bool_t _atEOF
Definition: RooStreamParser.h:53
oocoutW
#define oocoutW(o, a)
Definition: RooMsgService.h:47
RooStreamParser::convertToString
Bool_t convertToString(const TString &token, TString &string)
Convert given token to a string (i.e. remove eventual quotation marks)
Definition: RooStreamParser.cxx:502
RooNumber.h
RooStreamParser::readLine
TString readLine()
Read an entire line from the stream and return as TString This method recognizes the use of '\' in th...
Definition: RooStreamParser.cxx:309
RooStreamParser::setPunctuation
void setPunctuation(const TString &punct)
Change list of characters interpreted as punctuation.
Definition: RooStreamParser.cxx:99
RooNumber::infinity
static Double_t infinity()
Return internal infinity representation.
Definition: RooNumber.cxx:49
TString::CompareTo
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
Definition: TString.cxx:439
TString::IsNull
Bool_t IsNull() const
Definition: TString.h:407
Double_t
double Double_t
Definition: RtypesCore.h:59
TObject
Mother of all ROOT objects.
Definition: TObject.h:37
RooStreamParser::convertToInteger
Bool_t convertToInteger(const TString &token, Int_t &value)
Convert given string to an Int_t.
Definition: RooStreamParser.cxx:469
RooStreamParser::_is
std::istream * _is
Definition: RooStreamParser.h:51
RooStreamParser::_punct
TString _punct
Definition: RooStreamParser.h:55
Riostream.h
int