Logo ROOT   6.08/07
Reference Guide
RooStreamParser.cxx
Go to the documentation of this file.
1 /*****************************************************************************
2  * Project: RooFit *
3  * Package: RooFitCore *
4  * @(#)root/roofitcore:$Id$
5  * Authors: *
6  * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7  * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8  * *
9  * Copyright (c) 2000-2005, Regents of the University of California *
10  * and Stanford University. All rights reserved. *
11  * *
12  * Redistribution and use in source and binary forms, *
13  * with or without modification, are permitted according to the terms *
14  * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15  *****************************************************************************/
16 
17 //////////////////////////////////////////////////////////////////////////////
18 //
19 // RooStreamParser is a utility class to parse istreams into tokens and optionally
20 // convert them into basic types (double,int,string)
21 //
22 // The general tokenizing philosophy is that there are two kinds of tokens: value
23 // and punctuation. The former are variable length, the latter always
24 // one character. A token is terminated if one of the following conditions
25 // occur
26 // - space character found (' ',tab,newline)
27 // - change of token type (value -> punctuation or vv)
28 // - end of fixed-length token (punctuation only)
29 // - start or end of quoted string
30 //
31 // The parser is aware of floating point notation and will assign leading
32 // minus signs, decimal points etc to a value token when this is obvious
33 // from the context. The definition of what is punctuation can be redefined.
34 //
35 
36 
37 #include "RooFit.h"
38 
39 #include "Riostream.h"
40 #include "Riostream.h"
41 #include <stdlib.h>
42 #include <ctype.h>
43 
44 #ifndef _WIN32
45 #include <strings.h>
46 #endif
47 
48 #include "RooStreamParser.h"
49 #include "RooMsgService.h"
50 #include "RooNumber.h"
51 
52 
53 using namespace std;
54 
56 
57 
58 ////////////////////////////////////////////////////////////////////////////////
59 /// Construct parser on given input stream
60 
62  _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(""), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
63 {
64 }
65 
66 
67 ////////////////////////////////////////////////////////////////////////////////
68 /// Construct parser on given input stream. Use given errorPrefix to
69 /// prefix any parsing error messages
70 
71 RooStreamParser::RooStreamParser(istream& is, const TString& errorPrefix) :
72  _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(errorPrefix), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
73 {
74 }
75 
76 
77 
78 ////////////////////////////////////////////////////////////////////////////////
79 /// Destructor
80 
82 {
83 }
84 
85 
86 
87 ////////////////////////////////////////////////////////////////////////////////
88 /// If true, parser is at end of line in stream
89 
91 {
92  Int_t nc(_is->peek()) ;
93  return (nc=='\n'||nc==-1) ;
94 }
95 
96 
97 
98 ////////////////////////////////////////////////////////////////////////////////
99 /// Change list of characters interpreted as punctuation
100 
101 void RooStreamParser::setPunctuation(const TString& punct)
102 {
103  _punct = punct ;
104 }
105 
106 
107 
108 ////////////////////////////////////////////////////////////////////////////////
109 /// Check if given char is considered punctuation
110 
112 {
113  const char* punct = _punct.Data() ;
114  for (int i=0 ; i<_punct.Length() ; i++)
115  if (punct[i] == c) {
116  return kTRUE ;
117  }
118  return kFALSE ;
119 }
120 
121 
122 
123 ////////////////////////////////////////////////////////////////////////////////
124 /// Read one token separated by any of the know punctuation characters
125 /// This function recognizes and handles comment lines in the istream (those
126 /// starting with '#', quoted strings ("") the content of which is not tokenized
127 /// and '+-.' characters that are part of a floating point numbers and are exempt
128 /// from being interpreted as a token separator in case '+-.' are defined as
129 /// token separators.
130 
132 {
133  // Smart tokenizer. Absorb white space and token must be either punctuation or alphanum
134  Bool_t first(kTRUE), quotedString(kFALSE), lineCont(kFALSE) ;
135  char buffer[10240], c(0), cnext='\0', cprev=' ' ;
136  Bool_t haveINF(kFALSE) ;
137  Int_t bufptr(0) ;
138 
139  // Check for end of file
140  if (_is->eof() || _is->fail()) {
141  _atEOF = kTRUE ;
142  return TString("") ;
143  }
144 
145  //Ignore leading newline
146  if (_is->peek()=='\n') {
147  _is->get(c) ;
148 
149  // If new line starts with #, zap it
150  while (_is->peek()=='#') {
151  zapToEnd(kFALSE) ;
152  _is->get(c) ; // absorb newline
153  }
154  }
155 
156  while(1) {
157  // Buffer overflow protection
158  if (bufptr>=10239) {
159  oocoutW((TObject*)0,InputArguments) << "RooStreamParser::readToken: token length exceeds buffer capacity, terminating token early" << endl ;
160  break ;
161  }
162 
163  // Read next char
164  _is->get(c) ;
165 
166 
167 
168  // Terminate at EOF, EOL or trouble
169  if (_is->eof() || _is->fail() || c=='\n') break ;
170 
171  // Terminate as SPACE, unless we haven't seen any non-SPACE yet
172  if (isspace(c)) {
173  if (first)
174  continue ;
175  else
176  if (!quotedString) {
177  break ;
178  }
179  }
180 
181  // If '-' or '/' see what the next character is
182  if (c == '.' || c=='-' || c=='+' || c=='/' || c=='\\') {
183  _is->get(cnext) ;
184 
185 
186  if (cnext=='I' || cnext=='i') {
187  char tmp1,tmp2 ;
188  _is->get(tmp1) ;
189  _is->get(tmp2) ;
190  _is->putback(tmp2) ;
191  _is->putback(tmp1) ;
192  haveINF = ((cnext=='I' && tmp1 == 'N' && tmp2 == 'F') || (cnext=='i' && tmp1 == 'n' && tmp2 == 'f')) ;
193  } else {
194  haveINF = kFALSE ;
195  }
196 
197  _is->putback(cnext) ;
198  }
199 
200 
201  // Check for line continuation marker
202  if (c=='\\' && cnext=='\\') {
203  // Kill rest of line including endline marker
204  zapToEnd(kFALSE) ;
205  _is->get(c) ;
206  lineCont=kTRUE ;
207  break ;
208  }
209 
210  // Stop if begin of comments is encountered
211  if (c=='/' && cnext=='/') {
212  zapToEnd(kFALSE) ;
213  break ;
214  }
215 
216  // Special handling of quoted strings
217  if (c=='"') {
218  if (first) {
219  quotedString=kTRUE ;
220  } else if (!quotedString) {
221  // Terminate current token. Next token will be quoted string
222  _is->putback('"') ;
223  break ;
224  }
225  }
226 
227  if (!quotedString) {
228  // Decide if next char is punctuation (exempt - and . that are part of floating point numbers, or +/- preceding INF)
229  if (isPunctChar(c) && !(c=='.' && (isdigit(cnext)||isdigit(cprev)))
230  && (!first || !((c=='-'||c=='+') && (isdigit(cnext)||cnext=='.'||haveINF)))) {
231 
232  if (first) {
233  // Make this a one-char punctuation token
234  buffer[bufptr++]=c ;
235  break ;
236  } else {
237  // Put back punct. char and terminate current alphanum token
238  _is->putback(c) ;
239  break ;
240  }
241  }
242  } else {
243  // Inside quoted string conventional tokenizing rules do not apply
244 
245  // Terminate token on closing quote
246  if (c=='"' && !first) {
247  buffer[bufptr++]=c ;
248  quotedString=kFALSE ;
249  break ;
250  }
251  }
252 
253  // Store in buffer
254  buffer[bufptr++]=c ;
255  first=kFALSE ;
256  cprev=c ;
257  }
258 
259  if (_is->eof() || _is->bad()) {
260  _atEOF = kTRUE ;
261  }
262 
263  // Check if closing quote was encountered
264  if (quotedString) {
265  oocoutW((TObject*)0,InputArguments) << "RooStreamParser::readToken: closing quote (\") missing" << endl ;
266  }
267 
268  // Absorb trailing white space or absorb rest of line if // is encountered
269  if (c=='\n') {
270  if (!lineCont) {
271  _is->putback(c) ;
272  }
273  } else {
274  c = _is->peek() ;
275 
276  while ((isspace(c) || c=='/') && c != '\n') {
277  if (c=='/') {
278  _is->get(c) ;
279  if (_is->peek()=='/') {
280  zapToEnd(kFALSE) ;
281  } else {
282  _is->putback('/') ;
283  }
284  break ;
285  } else {
286  _is->get(c) ;
287  c = _is->peek() ;
288  }
289  }
290  }
291 
292  // If no token was read line is continued, return first token on next line
293  if (bufptr==0 && lineCont) {
294  return readToken() ;
295  }
296 
297  // Zero terminate buffer and convert to TString
298  buffer[bufptr]=0 ;
299  return TString(buffer) ;
300 }
301 
302 
303 
304 ////////////////////////////////////////////////////////////////////////////////
305 /// Read an entire line from the stream and return as TString
306 /// This method recognizes the use of '\\' in the istream
307 /// as line continuation token.
308 
310 {
311  char c,buffer[10240] ;
312  Int_t nfree(10239) ;
313 
314  if (_is->peek()=='\n') _is->get(c) ;
315 
316  // Read till end of line
317  _is->getline(buffer,nfree,'\n') ;
318 
319  // Look for eventual continuation line sequence
320  char *pcontseq = strstr(buffer,"\\\\") ;
321  if (pcontseq) nfree -= (pcontseq-buffer) ;
322  while(pcontseq) {
323  _is->getline(pcontseq,nfree,'\n') ;
324 
325  char* nextpcontseq = strstr(pcontseq,"\\\\") ;
326  if (nextpcontseq) nfree -= (nextpcontseq-pcontseq) ;
327  pcontseq = nextpcontseq ;
328  }
329 
330  // Chop eventual comments
331  char *pcomment = strstr(buffer,"//") ;
332  if (pcomment) *pcomment=0 ;
333 
334  // Chop leading and trailing space
335  char *pstart=buffer ;
336  while (isspace(*pstart)) {
337  pstart++ ;
338  }
339  char *pend=buffer+strlen(buffer)-1 ;
340  if (pend>pstart)
341  while (isspace(*pend)) { *pend--=0 ; }
342 
343  if (_is->eof() || _is->fail()) {
344  _atEOF = kTRUE ;
345  }
346 
347  // Convert to TString
348  return TString(pstart) ;
349 }
350 
351 
352 
353 ////////////////////////////////////////////////////////////////////////////////
354 /// Eat all characters up to and including then end of the
355 /// current line. If inclContLines is kTRUE, all continuation lines
356 /// marked by the '\\' token are zapped as well
357 
358 void RooStreamParser::zapToEnd(Bool_t inclContLines)
359 {
360  // Skip over everything until the end of the current line
361  if (_is->peek()!='\n') {
362 
363  char buffer[10240] ;
364  Int_t nfree(10239) ;
365 
366  // Read till end of line
367  _is->getline(buffer,nfree,'\n') ;
368 
369  if (inclContLines) {
370  // Look for eventual continuation line sequence
371  char *pcontseq = strstr(buffer,"\\\\") ;
372  if (pcontseq) nfree -= (pcontseq-buffer) ;
373  while(pcontseq) {
374  _is->getline(pcontseq,nfree,'\n') ;
375 
376  char* nextpcontseq = strstr(pcontseq,"\\\\") ;
377  if (nextpcontseq) nfree -= (nextpcontseq-pcontseq) ;
378  pcontseq = nextpcontseq ;
379  }
380  }
381 
382  // Put back newline character in stream buffer
383  _is->putback('\n') ;
384  }
385 }
386 
387 
388 
389 ////////////////////////////////////////////////////////////////////////////////
390 /// Read the next token and return kTRUE if it is identical to the given 'expected' token.
391 
392 Bool_t RooStreamParser::expectToken(const TString& expected, Bool_t zapOnError)
393 {
394  TString token(readToken()) ;
395 
396  Bool_t error=token.CompareTo(expected) ;
397  if (error && !_prefix.IsNull()) {
398  oocoutW((TObject*)0,InputArguments) << _prefix << ": parse error, expected '"
399  << expected << "'" << ", got '" << token << "'" << endl ;
400  if (zapOnError) zapToEnd(kTRUE) ;
401  }
402  return error ;
403 }
404 
405 
406 
407 ////////////////////////////////////////////////////////////////////////////////
408 /// Read the next token and convert it to a Double_t. Returns true
409 /// if an error occurred in reading or conversion
410 
412 {
413  TString token(readToken()) ;
414  if (token.IsNull()) return kTRUE ;
415  return convertToDouble(token,value) ;
416 
417 }
418 
419 
420 
421 ////////////////////////////////////////////////////////////////////////////////
422 /// Convert given string to a double. Return true if the conversion fails.
423 
424 Bool_t RooStreamParser::convertToDouble(const TString& token, Double_t& value)
425 {
426  char* endptr = 0;
427  const char* data=token.Data() ;
428 
429  // Handle +/- infinity cases, (token is guaranteed to be >1 char long)
430  if (!strcasecmp(data,"inf") || !strcasecmp(data+1,"inf")) {
431  value = (data[0]=='-') ? -RooNumber::infinity() : RooNumber::infinity() ;
432  return kFALSE ;
433  }
434 
435  value = strtod(data,&endptr) ;
436  Bool_t error = (endptr-data!=token.Length()) ;
437 
438  if (error && !_prefix.IsNull()) {
439  oocoutE((TObject*)0,InputArguments) << _prefix << ": parse error, cannot convert '"
440  << token << "'" << " to double precision" << endl ;
441  }
442  return error ;
443 }
444 
445 
446 
447 ////////////////////////////////////////////////////////////////////////////////
448 /// Read a token and convert it to an Int_t. Returns true
449 /// if an error occurred in reading or conversion
450 
452 {
453  TString token(readToken()) ;
454  if (token.IsNull()) return kTRUE ;
455  return convertToInteger(token,value) ;
456 }
457 
458 
459 
460 ////////////////////////////////////////////////////////////////////////////////
461 /// Convert given string to an Int_t. Returns true if an error
462 /// occurred in conversion
463 
464 Bool_t RooStreamParser::convertToInteger(const TString& token, Int_t& value)
465 {
466  char* endptr = 0;
467  const char* data=token.Data() ;
468  value = strtol(data,&endptr,10) ;
469  Bool_t error = (endptr-data!=token.Length()) ;
470 
471  if (error && !_prefix.IsNull()) {
472  oocoutE((TObject*)0,InputArguments)<< _prefix << ": parse error, cannot convert '"
473  << token << "'" << " to integer" << endl ;
474  }
475  return error ;
476 }
477 
478 
479 
480 ////////////////////////////////////////////////////////////////////////////////
481 /// Read a string token. Returns true if an error occurred in reading
482 /// or conversion. If a the read token is enclosed in quotation
483 /// marks those are stripped in the returned value
484 
485 Bool_t RooStreamParser::readString(TString& value, Bool_t /*zapOnError*/)
486 {
487  TString token(readToken()) ;
488  if (token.IsNull()) return kTRUE ;
489  return convertToString(token,value) ;
490 }
491 
492 
493 
494 ////////////////////////////////////////////////////////////////////////////////
495 /// Convert given token to a string (i.e. remove eventual quotation marks)
496 
497 Bool_t RooStreamParser::convertToString(const TString& token, TString& string)
498 {
499  // Transport to buffer
500  char buffer[10240],*ptr ;
501  strncpy(buffer,token.Data(),10239) ;
502  if (token.Length()>=10239) {
503  oocoutW((TObject*)0,InputArguments) << "RooStreamParser::convertToString: token length exceeds 1023, truncated" << endl ;
504  buffer[10239]=0 ;
505  }
506  int len = strlen(buffer) ;
507 
508  // Remove trailing quote if any
509  if ((len) && (buffer[len-1]=='"'))
510  buffer[len-1]=0 ;
511 
512  // Skip leading quote, if present
513  ptr=(buffer[0]=='"') ? buffer+1 : buffer ;
514 
515  string = ptr ;
516  return kFALSE ;
517 }
Bool_t isPunctChar(char c) const
Check if given char is considered punctuation.
return c
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
STL namespace.
#define oocoutE(o, a)
Definition: RooMsgService.h:48
if on multiple lines(like in C++). **The " * configuration fragment. * * The "import myobject continue
Parses the configuration file.
Definition: HLFactory.cxx:368
Bool_t convertToDouble(const TString &token, Double_t &value)
Convert given string to a double. Return true if the conversion fails.
TString readToken()
Read one token separated by any of the know punctuation characters This function recognizes and handl...
Bool_t convertToInteger(const TString &token, Int_t &value)
Convert given string to an Int_t.
static Double_t infinity()
Return internal infinity representation.
Definition: RooNumber.cxx:49
void zapToEnd(Bool_t inclContLines=kFALSE)
Eat all characters up to and including then end of the current line.
RooStreamParser(std::istream &is)
Construct parser on given input stream.
Bool_t readDouble(Double_t &value, Bool_t zapOnError=kFALSE)
Read the next token and convert it to a Double_t.
TString readLine()
Read an entire line from the stream and return as TString This method recognizes the use of &#39;\&#39; in th...
Bool_t expectToken(const TString &expected, Bool_t zapOnError=kFALSE)
Read the next token and return kTRUE if it is identical to the given &#39;expected&#39; token.
Bool_t readString(TString &value, Bool_t zapOnError=kFALSE)
Read a string token.
#define ClassImp(name)
Definition: Rtypes.h:279
double Double_t
Definition: RtypesCore.h:55
Bool_t convertToString(const TString &token, TString &string)
Convert given token to a string (i.e. remove eventual quotation marks)
void setPunctuation(const TString &punct)
Change list of characters interpreted as punctuation.
std::istream * _is
#define oocoutW(o, a)
Definition: RooMsgService.h:47
Mother of all ROOT objects.
Definition: TObject.h:37
virtual ~RooStreamParser()
Destructor.
Bool_t readInteger(Int_t &value, Bool_t zapOnError=kFALSE)
Read a token and convert it to an Int_t.
Definition: first.py:1
const Bool_t kTRUE
Definition: Rtypes.h:91
Bool_t atEOL()
If true, parser is at end of line in stream.