Logo ROOT  
Reference Guide
RooStreamParser.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitCore *
4 * @(#)root/roofitcore:$Id$
5 * Authors: *
6 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * *
9 * Copyright (c) 2000-2005, Regents of the University of California *
10 * and Stanford University. All rights reserved. *
11 * *
12 * Redistribution and use in source and binary forms, *
13 * with or without modification, are permitted according to the terms *
14 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15 *****************************************************************************/
16
17//////////////////////////////////////////////////////////////////////////////
18//
19// RooStreamParser is a utility class to parse istreams into tokens and optionally
20// convert them into basic types (double,int,string)
21//
22// The general tokenizing philosophy is that there are two kinds of tokens: value
23// and punctuation. The former are variable length, the latter always
24// one character. A token is terminated if one of the following conditions
25// occur
26// - space character found (' ',tab,newline)
27// - change of token type (value -> punctuation or vv)
28// - end of fixed-length token (punctuation only)
29// - start or end of quoted string
30//
31// The parser is aware of floating point notation and will assign leading
32// minus signs, decimal points etc to a value token when this is obvious
33// from the context. The definition of what is punctuation can be redefined.
34//
35
36
37#include "RooFit.h"
38
39#include "Riostream.h"
40#include <stdlib.h>
41
42#ifndef _WIN32
43#include <strings.h>
44#endif
45
46#include "RooStreamParser.h"
47#include "RooMsgService.h"
48#include "RooNumber.h"
49
50
51using namespace std;
52
54
55
56////////////////////////////////////////////////////////////////////////////////
57/// Construct parser on given input stream
58
60 _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(""), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
61{
62}
63
64
65////////////////////////////////////////////////////////////////////////////////
66/// Construct parser on given input stream. Use given errorPrefix to
67/// prefix any parsing error messages
68
69RooStreamParser::RooStreamParser(istream& is, const TString& errorPrefix) :
70 _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(errorPrefix), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
71{
72}
73
74
75
76////////////////////////////////////////////////////////////////////////////////
77/// Destructor
78
80{
81}
82
83
84
85////////////////////////////////////////////////////////////////////////////////
86/// If true, parser is at end of line in stream
87
89{
90 Int_t nc(_is->peek()) ;
91 return (nc=='\n'||nc==-1) ;
92}
93
94
95
96////////////////////////////////////////////////////////////////////////////////
97/// Change list of characters interpreted as punctuation
98
100{
101 _punct = punct ;
102}
103
104
105
106////////////////////////////////////////////////////////////////////////////////
107/// Check if given char is considered punctuation
108
110{
111 const char* punct = _punct.Data() ;
112 for (int i=0 ; i<_punct.Length() ; i++)
113 if (punct[i] == c) {
114 return kTRUE ;
115 }
116 return kFALSE ;
117}
118
119
120
121////////////////////////////////////////////////////////////////////////////////
122/// Read one token separated by any of the know punctuation characters
123/// This function recognizes and handles comment lines in the istream (those
124/// starting with '#', quoted strings ("") the content of which is not tokenized
125/// and '+-.' characters that are part of a floating point numbers and are exempt
126/// from being interpreted as a token separator in case '+-.' are defined as
127/// token separators.
128
130{
131 // Smart tokenizer. Absorb white space and token must be either punctuation or alphanum
132 Bool_t first(kTRUE), quotedString(kFALSE), lineCont(kFALSE) ;
133 char buffer[64000], c(0), cnext = '\0', cprev = ' ';
134 Bool_t haveINF(kFALSE) ;
135 Int_t bufptr(0) ;
136
137 // Check for end of file
138 if (_is->eof() || _is->fail()) {
139 _atEOF = kTRUE ;
140 return TString("") ;
141 }
142
143 //Ignore leading newline
144 if (_is->peek()=='\n') {
145 _is->get(c) ;
146
147 // If new line starts with #, zap it
148 while (_is->peek()=='#') {
150 _is->get(c) ; // absorb newline
151 }
152 }
153
154 while(1) {
155 // Buffer overflow protection
156 if (bufptr >= 63999) {
158 << "RooStreamParser::readToken: token length exceeds buffer capacity, terminating token early" << endl;
159 break;
160 }
161
162 // Read next char
163 _is->get(c) ;
164
165
166
167 // Terminate at EOF, EOL or trouble
168 if (_is->eof() || _is->fail() || c=='\n') break ;
169
170 // Terminate as SPACE, unless we haven't seen any non-SPACE yet
171 if (isspace(c)) {
172 if (first)
173 continue ;
174 else
175 if (!quotedString) {
176 break ;
177 }
178 }
179
180 // If '-' or '/' see what the next character is
181 if (c == '.' || c=='-' || c=='+' || c=='/' || c=='\\') {
182 _is->get(cnext) ;
183
184
185 if (cnext=='I' || cnext=='i') {
186 char tmp1,tmp2 ;
187 _is->get(tmp1) ;
188 _is->get(tmp2) ;
189 _is->putback(tmp2) ;
190 _is->putback(tmp1) ;
191 haveINF = ((cnext=='I' && tmp1 == 'N' && tmp2 == 'F') || (cnext=='i' && tmp1 == 'n' && tmp2 == 'f')) ;
192 } else {
193 haveINF = kFALSE ;
194 }
195
196 _is->putback(cnext) ;
197 }
198
199
200 // Check for line continuation marker
201 if (c=='\\' && cnext=='\\') {
202 // Kill rest of line including endline marker
204 _is->get(c) ;
205 lineCont=kTRUE ;
206 break ;
207 }
208
209 // Stop if begin of comments is encountered
210 if (c=='/' && cnext=='/') {
212 break ;
213 }
214
215 // Special handling of quoted strings
216 if (c=='"') {
217 if (first) {
218 quotedString=kTRUE ;
219 } else if (!quotedString) {
220 // Terminate current token. Next token will be quoted string
221 _is->putback('"') ;
222 break ;
223 }
224 }
225
226 if (!quotedString) {
227 // Decide if next char is punctuation (exempt - and . that are part of floating point numbers, or +/- preceding INF)
228 if (isPunctChar(c) && !(c=='.' && (isdigit(cnext)||isdigit(cprev)))
229 && !((c=='-'||c=='+') && isdigit(cnext) && (cprev=='e' || cprev == 'E'))
230 && (!first || !((c=='-'||c=='+') && (isdigit(cnext)||cnext=='.'||haveINF)))) {
231
232 if (first) {
233 // Make this a one-char punctuation token
234 buffer[bufptr++]=c ;
235 break ;
236 } else {
237 // Put back punct. char and terminate current alphanum token
238 _is->putback(c) ;
239 break ;
240 }
241 }
242 } else {
243 // Inside quoted string conventional tokenizing rules do not apply
244
245 // Terminate token on closing quote
246 if (c=='"' && !first) {
247 buffer[bufptr++]=c ;
248 quotedString=kFALSE ;
249 break ;
250 }
251 }
252
253 // Store in buffer
254 buffer[bufptr++]=c ;
255 first=kFALSE ;
256 cprev=c ;
257 }
258
259 if (_is->eof() || _is->bad()) {
260 _atEOF = kTRUE ;
261 }
262
263 // Check if closing quote was encountered
264 if (quotedString) {
265 oocoutW((TObject*)0,InputArguments) << "RooStreamParser::readToken: closing quote (\") missing" << endl ;
266 }
267
268 // Absorb trailing white space or absorb rest of line if // is encountered
269 if (c=='\n') {
270 if (!lineCont) {
271 _is->putback(c) ;
272 }
273 } else {
274 c = _is->peek() ;
275
276 while ((isspace(c) || c=='/') && c != '\n') {
277 if (c=='/') {
278 _is->get(c) ;
279 if (_is->peek()=='/') {
281 } else {
282 _is->putback('/') ;
283 }
284 break ;
285 } else {
286 _is->get(c) ;
287 c = _is->peek() ;
288 }
289 }
290 }
291
292 // If no token was read line is continued, return first token on next line
293 if (bufptr==0 && lineCont) {
294 return readToken() ;
295 }
296
297 // Zero terminate buffer and convert to TString
298 buffer[bufptr]=0 ;
299 return TString(buffer) ;
300}
301
302
303
304////////////////////////////////////////////////////////////////////////////////
305/// Read an entire line from the stream and return as TString
306/// This method recognizes the use of '\\' in the istream
307/// as line continuation token.
308
310{
311 char c, buffer[64000];
312 Int_t nfree(63999);
313
314 if (_is->peek() == '\n')
315 _is->get(c);
316
317 // Read till end of line
318 _is->getline(buffer, nfree, '\n');
319
320 // Look for eventual continuation line sequence
321 char *pcontseq = strstr(buffer, "\\\\");
322 if (pcontseq)
323 nfree -= (pcontseq - buffer);
324 while (pcontseq) {
325 _is->getline(pcontseq, nfree, '\n');
326
327 char *nextpcontseq = strstr(pcontseq, "\\\\");
328 if (nextpcontseq)
329 nfree -= (nextpcontseq - pcontseq);
330 pcontseq = nextpcontseq;
331 }
332
333 // Chop eventual comments
334 char *pcomment = strstr(buffer,"//") ;
335 if (pcomment) *pcomment=0 ;
336
337 // Chop leading and trailing space
338 char *pstart=buffer ;
339 while (isspace(*pstart)) {
340 pstart++ ;
341 }
342 char *pend=buffer+strlen(buffer)-1 ;
343 if (pend>pstart)
344 while (isspace(*pend)) { *pend--=0 ; }
345
346 if (_is->eof() || _is->fail()) {
347 _atEOF = kTRUE ;
348 }
349
350 // Convert to TString
351 return TString(pstart) ;
352}
353
354
355
356////////////////////////////////////////////////////////////////////////////////
357/// Eat all characters up to and including then end of the
358/// current line. If inclContLines is kTRUE, all continuation lines
359/// marked by the '\\' token are zapped as well
360
362{
363 // Skip over everything until the end of the current line
364 if (_is->peek()!='\n') {
365
366 char buffer[64000];
367 Int_t nfree(63999);
368
369 // Read till end of line
370 _is->getline(buffer, nfree, '\n');
371
372 if (inclContLines) {
373 // Look for eventual continuation line sequence
374 char *pcontseq = strstr(buffer, "\\\\");
375 if (pcontseq)
376 nfree -= (pcontseq - buffer);
377 while (pcontseq) {
378 _is->getline(pcontseq, nfree, '\n');
379
380 char *nextpcontseq = strstr(pcontseq, "\\\\");
381 if (nextpcontseq)
382 nfree -= (nextpcontseq - pcontseq);
383 pcontseq = nextpcontseq;
384 }
385 }
386
387 // Put back newline character in stream buffer
388 _is->putback('\n') ;
389 }
390}
391
392
393
394////////////////////////////////////////////////////////////////////////////////
395/// Read the next token and return kTRUE if it is identical to the given 'expected' token.
396
398{
399 TString token(readToken()) ;
400
401 Bool_t error=token.CompareTo(expected) ;
402 if (error && !_prefix.IsNull()) {
403 oocoutW((TObject*)0,InputArguments) << _prefix << ": parse error, expected '"
404 << expected << "'" << ", got '" << token << "'" << endl ;
405 if (zapOnError) zapToEnd(kTRUE) ;
406 }
407 return error ;
408}
409
410
411
412////////////////////////////////////////////////////////////////////////////////
413/// Read the next token and convert it to a Double_t. Returns true
414/// if an error occurred in reading or conversion
415
417{
418 TString token(readToken()) ;
419 if (token.IsNull()) return kTRUE ;
420 return convertToDouble(token,value) ;
421
422}
423
424
425
426////////////////////////////////////////////////////////////////////////////////
427/// Convert given string to a double. Return true if the conversion fails.
428
430{
431 char* endptr = 0;
432 const char* data=token.Data() ;
433
434 // Handle +/- infinity cases, (token is guaranteed to be >1 char long)
435 if (!strcasecmp(data,"inf") || !strcasecmp(data+1,"inf")) {
436 value = (data[0]=='-') ? -RooNumber::infinity() : RooNumber::infinity() ;
437 return kFALSE ;
438 }
439
440 value = strtod(data,&endptr) ;
441 Bool_t error = (endptr-data!=token.Length()) ;
442
443 if (error && !_prefix.IsNull()) {
444 oocoutE((TObject*)0,InputArguments) << _prefix << ": parse error, cannot convert '"
445 << token << "'" << " to double precision" << endl ;
446 }
447 return error ;
448}
449
450
451
452////////////////////////////////////////////////////////////////////////////////
453/// Read a token and convert it to an Int_t. Returns true
454/// if an error occurred in reading or conversion
455
457{
458 TString token(readToken()) ;
459 if (token.IsNull()) return kTRUE ;
460 return convertToInteger(token,value) ;
461}
462
463
464
465////////////////////////////////////////////////////////////////////////////////
466/// Convert given string to an Int_t. Returns true if an error
467/// occurred in conversion
468
470{
471 char* endptr = 0;
472 const char* data=token.Data() ;
473 value = strtol(data,&endptr,10) ;
474 Bool_t error = (endptr-data!=token.Length()) ;
475
476 if (error && !_prefix.IsNull()) {
477 oocoutE((TObject*)0,InputArguments)<< _prefix << ": parse error, cannot convert '"
478 << token << "'" << " to integer" << endl ;
479 }
480 return error ;
481}
482
483
484
485////////////////////////////////////////////////////////////////////////////////
486/// Read a string token. Returns true if an error occurred in reading
487/// or conversion. If a the read token is enclosed in quotation
488/// marks those are stripped in the returned value
489
491{
492 TString token(readToken()) ;
493 if (token.IsNull()) return kTRUE ;
494 return convertToString(token,value) ;
495}
496
497
498
499////////////////////////////////////////////////////////////////////////////////
500/// Convert given token to a string (i.e. remove eventual quotation marks)
501
503{
504 // Transport to buffer
505 char buffer[64000], *ptr;
506 strncpy(buffer, token.Data(), 63999);
507 if (token.Length() >= 63999) {
508 oocoutW((TObject *)0, InputArguments) << "RooStreamParser::convertToString: token length exceeds 63999, truncated"
509 << endl;
510 buffer[63999] = 0;
511 }
512 int len = strlen(buffer) ;
513
514 // Remove trailing quote if any
515 if ((len) && (buffer[len-1]=='"'))
516 buffer[len-1]=0 ;
517
518 // Skip leading quote, if present
519 ptr=(buffer[0]=='"') ? buffer+1 : buffer ;
520
521 string = ptr ;
522 return kFALSE ;
523}
#define c(i)
Definition: RSha256.hxx:101
#define oocoutW(o, a)
Definition: RooMsgService.h:47
#define oocoutE(o, a)
Definition: RooMsgService.h:48
int Int_t
Definition: RtypesCore.h:43
const Bool_t kFALSE
Definition: RtypesCore.h:90
bool Bool_t
Definition: RtypesCore.h:61
double Double_t
Definition: RtypesCore.h:57
const Bool_t kTRUE
Definition: RtypesCore.h:89
#define ClassImp(name)
Definition: Rtypes.h:361
static Double_t infinity()
Return internal infinity representation.
Definition: RooNumber.cxx:49
Bool_t isPunctChar(char c) const
Check if given char is considered punctuation.
Bool_t atEOL()
If true, parser is at end of line in stream.
Bool_t readInteger(Int_t &value, Bool_t zapOnError=kFALSE)
Read a token and convert it to an Int_t.
Bool_t readDouble(Double_t &value, Bool_t zapOnError=kFALSE)
Read the next token and convert it to a Double_t.
Bool_t convertToDouble(const TString &token, Double_t &value)
Convert given string to a double. Return true if the conversion fails.
Bool_t readString(TString &value, Bool_t zapOnError=kFALSE)
Read a string token.
void setPunctuation(const TString &punct)
Change list of characters interpreted as punctuation.
void zapToEnd(Bool_t inclContLines=kFALSE)
Eat all characters up to and including then end of the current line.
Bool_t expectToken(const TString &expected, Bool_t zapOnError=kFALSE)
Read the next token and return kTRUE if it is identical to the given 'expected' token.
virtual ~RooStreamParser()
Destructor.
std::istream * _is
Bool_t convertToInteger(const TString &token, Int_t &value)
Convert given string to an Int_t.
TString readLine()
Read an entire line from the stream and return as TString This method recognizes the use of '\' in th...
TString readToken()
Read one token separated by any of the know punctuation characters This function recognizes and handl...
Bool_t convertToString(const TString &token, TString &string)
Convert given token to a string (i.e. remove eventual quotation marks)
RooStreamParser(std::istream &is)
Construct parser on given input stream.
Mother of all ROOT objects.
Definition: TObject.h:37
Basic string class.
Definition: TString.h:131
Ssiz_t Length() const
Definition: TString.h:405
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
Definition: TString.cxx:418
const char * Data() const
Definition: TString.h:364
Bool_t IsNull() const
Definition: TString.h:402
@ InputArguments
Definition: RooGlobalFunc.h:68
Definition: first.py:1