Logo ROOT  
Reference Guide
RooStreamParser.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitCore *
4 * @(#)root/roofitcore:$Id$
5 * Authors: *
6 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * *
9 * Copyright (c) 2000-2005, Regents of the University of California *
10 * and Stanford University. All rights reserved. *
11 * *
12 * Redistribution and use in source and binary forms, *
13 * with or without modification, are permitted according to the terms *
14 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15 *****************************************************************************/
16
17//////////////////////////////////////////////////////////////////////////////
18//
19// RooStreamParser is a utility class to parse istreams into tokens and optionally
20// convert them into basic types (double,int,string)
21//
22// The general tokenizing philosophy is that there are two kinds of tokens: value
23// and punctuation. The former are variable length, the latter always
24// one character. A token is terminated if one of the following conditions
25// occur
26// - space character found (' ',tab,newline)
27// - change of token type (value -> punctuation or vv)
28// - end of fixed-length token (punctuation only)
29// - start or end of quoted string
30//
31// The parser is aware of floating point notation and will assign leading
32// minus signs, decimal points etc to a value token when this is obvious
33// from the context. The definition of what is punctuation can be redefined.
34//
35
36
37#include "Riostream.h"
38#include <stdlib.h>
39
40#ifndef _WIN32
41#include <strings.h>
42#endif
43
44#include "RooStreamParser.h"
45#include "RooMsgService.h"
46#include "RooNumber.h"
47
48
49using namespace std;
50
52
53
54////////////////////////////////////////////////////////////////////////////////
55/// Construct parser on given input stream
56
58 _is(&is), _atEOL(false), _atEOF(false), _prefix(""), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
59{
60}
61
62
63////////////////////////////////////////////////////////////////////////////////
64/// Construct parser on given input stream. Use given errorPrefix to
65/// prefix any parsing error messages
66
67RooStreamParser::RooStreamParser(istream& is, const TString& errorPrefix) :
68 _is(&is), _atEOL(false), _atEOF(false), _prefix(errorPrefix), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
69{
70}
71
72
73
74////////////////////////////////////////////////////////////////////////////////
75/// Destructor
76
78{
79}
80
81
82
83////////////////////////////////////////////////////////////////////////////////
84/// If true, parser is at end of line in stream
85
87{
88 Int_t nc(_is->peek()) ;
89 return (nc=='\n'||nc==-1) ;
90}
91
92
93
94////////////////////////////////////////////////////////////////////////////////
95/// Change list of characters interpreted as punctuation
96
98{
99 _punct = punct ;
100}
101
102
103
104////////////////////////////////////////////////////////////////////////////////
105/// Check if given char is considered punctuation
106
108{
109 const char* punct = _punct.Data() ;
110 for (int i=0 ; i<_punct.Length() ; i++)
111 if (punct[i] == c) {
112 return true ;
113 }
114 return false ;
115}
116
117
118
119////////////////////////////////////////////////////////////////////////////////
120/// Read one token separated by any of the know punctuation characters
121/// This function recognizes and handles comment lines in the istream (those
122/// starting with '#', quoted strings ("") the content of which is not tokenized
123/// and '+-.' characters that are part of a floating point numbers and are exempt
124/// from being interpreted as a token separator in case '+-.' are defined as
125/// token separators.
126
128{
129 // Smart tokenizer. Absorb white space and token must be either punctuation or alphanum
130 bool first(true), quotedString(false), lineCont(false) ;
131 char buffer[64000], c(0), cnext = '\0', cprev = ' ';
132 bool haveINF(false) ;
133 Int_t bufptr(0) ;
134
135 // Check for end of file
136 if (_is->eof() || _is->fail()) {
137 _atEOF = true ;
138 return TString("") ;
139 }
140
141 //Ignore leading newline
142 if (_is->peek()=='\n') {
143 _is->get(c) ;
144
145 // If new line starts with #, zap it
146 while (_is->peek()=='#') {
147 zapToEnd(false) ;
148 _is->get(c) ; // absorb newline
149 }
150 }
151
152 while(1) {
153 // Buffer overflow protection
154 if (bufptr >= 63999) {
155 oocoutW(nullptr, InputArguments)
156 << "RooStreamParser::readToken: token length exceeds buffer capacity, terminating token early" << endl;
157 break;
158 }
159
160 // Read next char
161 _is->get(c) ;
162
163
164
165 // Terminate at EOF, EOL or trouble
166 if (_is->eof() || _is->fail() || c=='\n') break ;
167
168 // Terminate as SPACE, unless we haven't seen any non-SPACE yet
169 if (isspace(c)) {
170 if (first)
171 continue ;
172 else
173 if (!quotedString) {
174 break ;
175 }
176 }
177
178 // If '-' or '/' see what the next character is
179 if (c == '.' || c=='-' || c=='+' || c=='/' || c=='\\') {
180 _is->get(cnext) ;
181
182
183 if (cnext=='I' || cnext=='i') {
184 char tmp1,tmp2 ;
185 _is->get(tmp1) ;
186 _is->get(tmp2) ;
187 _is->putback(tmp2) ;
188 _is->putback(tmp1) ;
189 haveINF = ((cnext=='I' && tmp1 == 'N' && tmp2 == 'F') || (cnext=='i' && tmp1 == 'n' && tmp2 == 'f')) ;
190 } else {
191 haveINF = false ;
192 }
193
194 _is->putback(cnext) ;
195 }
196
197
198 // Check for line continuation marker
199 if (c=='\\' && cnext=='\\') {
200 // Kill rest of line including endline marker
201 zapToEnd(false) ;
202 _is->get(c) ;
203 lineCont=true ;
204 break ;
205 }
206
207 // Stop if begin of comments is encountered
208 if (c=='/' && cnext=='/') {
209 zapToEnd(false) ;
210 break ;
211 }
212
213 // Special handling of quoted strings
214 if (c=='"') {
215 if (first) {
216 quotedString=true ;
217 } else if (!quotedString) {
218 // Terminate current token. Next token will be quoted string
219 _is->putback('"') ;
220 break ;
221 }
222 }
223
224 if (!quotedString) {
225 // Decide if next char is punctuation (exempt - and . that are part of floating point numbers, or +/- preceding INF)
226 if (isPunctChar(c) && !(c=='.' && (isdigit(cnext)||isdigit(cprev)))
227 && !((c=='-'||c=='+') && isdigit(cnext) && (cprev == 'e' || cprev == 'E'))
228 && (!first || !((c=='-'||c=='+') && (isdigit(cnext)||cnext=='.'||haveINF)))) {
229
230 if (first) {
231 // Make this a one-char punctuation token
232 buffer[bufptr++]=c ;
233 break ;
234 } else {
235 // Put back punct. char and terminate current alphanum token
236 _is->putback(c) ;
237 break ;
238 }
239 }
240 } else {
241 // Inside quoted string conventional tokenizing rules do not apply
242
243 // Terminate token on closing quote
244 if (c=='"' && !first) {
245 buffer[bufptr++]=c ;
246 quotedString=false ;
247 break ;
248 }
249 }
250
251 // Store in buffer
252 buffer[bufptr++]=c ;
253 first=false ;
254 cprev=c ;
255 }
256
257 if (_is->eof() || _is->bad()) {
258 _atEOF = true ;
259 }
260
261 // Check if closing quote was encountered
262 if (quotedString) {
263 oocoutW(nullptr,InputArguments) << "RooStreamParser::readToken: closing quote (\") missing" << endl ;
264 }
265
266 // Absorb trailing white space or absorb rest of line if // is encountered
267 if (c=='\n') {
268 if (!lineCont) {
269 _is->putback(c) ;
270 }
271 } else {
272 c = _is->peek() ;
273
274 while ((isspace(c) || c=='/') && c != '\n') {
275 if (c=='/') {
276 _is->get(c) ;
277 if (_is->peek()=='/') {
278 zapToEnd(false) ;
279 } else {
280 _is->putback('/') ;
281 }
282 break ;
283 } else {
284 _is->get(c) ;
285 c = _is->peek() ;
286 }
287 }
288 }
289
290 // If no token was read line is continued, return first token on next line
291 if (bufptr==0 && lineCont) {
292 return readToken() ;
293 }
294
295 // Zero terminate buffer and convert to TString
296 buffer[bufptr]=0 ;
297 return TString(buffer) ;
298}
299
300
301
302////////////////////////////////////////////////////////////////////////////////
303/// Read an entire line from the stream and return as TString
304/// This method recognizes the use of '\\' in the istream
305/// as line continuation token.
306
308{
309 char c, buffer[64000];
310 Int_t nfree(63999);
311
312 if (_is->peek() == '\n')
313 _is->get(c);
314
315 // Read till end of line
316 _is->getline(buffer, nfree, '\n');
317
318 // Look for eventual continuation line sequence
319 char *pcontseq = strstr(buffer, "\\\\");
320 if (pcontseq)
321 nfree -= (pcontseq - buffer);
322 while (pcontseq) {
323 _is->getline(pcontseq, nfree, '\n');
324
325 char *nextpcontseq = strstr(pcontseq, "\\\\");
326 if (nextpcontseq)
327 nfree -= (nextpcontseq - pcontseq);
328 pcontseq = nextpcontseq;
329 }
330
331 // Chop eventual comments
332 char *pcomment = strstr(buffer,"//") ;
333 if (pcomment) *pcomment=0 ;
334
335 // Chop leading and trailing space
336 char *pstart=buffer ;
337 while (isspace(*pstart)) {
338 pstart++ ;
339 }
340 char *pend=buffer+strlen(buffer)-1 ;
341 if (pend>pstart)
342 while (isspace(*pend)) { *pend--=0 ; }
343
344 if (_is->eof() || _is->fail()) {
345 _atEOF = true ;
346 }
347
348 // Convert to TString
349 return TString(pstart) ;
350}
351
352
353
354////////////////////////////////////////////////////////////////////////////////
355/// Eat all characters up to and including then end of the
356/// current line. If inclContLines is true, all continuation lines
357/// marked by the '\\' token are zapped as well
358
359void RooStreamParser::zapToEnd(bool inclContLines)
360{
361 // Skip over everything until the end of the current line
362 if (_is->peek()!='\n') {
363
364 char buffer[64000];
365 Int_t nfree(63999);
366
367 // Read till end of line
368 _is->getline(buffer, nfree, '\n');
369
370 if (inclContLines) {
371 // Look for eventual continuation line sequence
372 char *pcontseq = strstr(buffer, "\\\\");
373 if (pcontseq)
374 nfree -= (pcontseq - buffer);
375 while (pcontseq) {
376 _is->getline(pcontseq, nfree, '\n');
377
378 char *nextpcontseq = strstr(pcontseq, "\\\\");
379 if (nextpcontseq)
380 nfree -= (nextpcontseq - pcontseq);
381 pcontseq = nextpcontseq;
382 }
383 }
384
385 // Put back newline character in stream buffer
386 _is->putback('\n') ;
387 }
388}
389
390
391
392////////////////////////////////////////////////////////////////////////////////
393/// Read the next token and return true if it is identical to the given 'expected' token.
394
395bool RooStreamParser::expectToken(const TString& expected, bool zapOnError)
396{
397 TString token(readToken()) ;
398
399 bool error=token.CompareTo(expected) ;
400 if (error && !_prefix.IsNull()) {
401 oocoutW(nullptr,InputArguments) << _prefix << ": parse error, expected '"
402 << expected << "'" << ", got '" << token << "'" << endl ;
403 if (zapOnError) zapToEnd(true) ;
404 }
405 return error ;
406}
407
408
409
410////////////////////////////////////////////////////////////////////////////////
411/// Read the next token and convert it to a double. Returns true
412/// if an error occurred in reading or conversion
413
414bool RooStreamParser::readDouble(double& value, bool /*zapOnError*/)
415{
416 TString token(readToken()) ;
417 if (token.IsNull()) return true ;
418 return convertToDouble(token,value) ;
419
420}
421
422
423
424////////////////////////////////////////////////////////////////////////////////
425/// Convert given string to a double. Return true if the conversion fails.
426
428{
429 char* endptr = 0;
430 const char* data=token.Data() ;
431
432 // Handle +/- infinity cases, (token is guaranteed to be >1 char long)
433 if (!strcasecmp(data,"inf") || !strcasecmp(data+1,"inf")) {
435 return false ;
436 }
437
438 value = strtod(data,&endptr) ;
439 bool error = (endptr-data!=token.Length()) ;
440
441 if (error && !_prefix.IsNull()) {
442 oocoutE(nullptr,InputArguments) << _prefix << ": parse error, cannot convert '"
443 << token << "'" << " to double precision" << endl ;
444 }
445 return error ;
446}
447
448
449
450////////////////////////////////////////////////////////////////////////////////
451/// Read a token and convert it to an Int_t. Returns true
452/// if an error occurred in reading or conversion
453
454bool RooStreamParser::readInteger(Int_t& value, bool /*zapOnError*/)
455{
456 TString token(readToken()) ;
457 if (token.IsNull()) return true ;
458 return convertToInteger(token,value) ;
459}
460
461
462
463////////////////////////////////////////////////////////////////////////////////
464/// Convert given string to an Int_t. Returns true if an error
465/// occurred in conversion
466
468{
469 char* endptr = 0;
470 const char* data=token.Data() ;
471 value = strtol(data,&endptr,10) ;
472 bool error = (endptr-data!=token.Length()) ;
473
474 if (error && !_prefix.IsNull()) {
475 oocoutE(nullptr,InputArguments)<< _prefix << ": parse error, cannot convert '"
476 << token << "'" << " to integer" << endl ;
477 }
478 return error ;
479}
480
481
482
483////////////////////////////////////////////////////////////////////////////////
484/// Read a string token. Returns true if an error occurred in reading
485/// or conversion. If a the read token is enclosed in quotation
486/// marks those are stripped in the returned value
487
488bool RooStreamParser::readString(TString& value, bool /*zapOnError*/)
489{
490 TString token(readToken()) ;
491 if (token.IsNull()) return true ;
492 return convertToString(token,value) ;
493}
494
495
496
497////////////////////////////////////////////////////////////////////////////////
498/// Convert given token to a string (i.e. remove eventual quotation marks)
499
501{
502 // Transport to buffer
503 char buffer[64000], *ptr;
504 strncpy(buffer, token.Data(), 63999);
505 if (token.Length() >= 63999) {
506 oocoutW(nullptr, InputArguments) << "RooStreamParser::convertToString: token length exceeds 63999, truncated"
507 << endl;
508 buffer[63999] = 0;
509 }
510 int len = strlen(buffer) ;
511
512 // Remove trailing quote if any
513 if ((len) && (buffer[len-1]=='"'))
514 buffer[len-1]=0 ;
515
516 // Skip leading quote, if present
517 ptr=(buffer[0]=='"') ? buffer+1 : buffer ;
518
519 string = ptr ;
520 return false ;
521}
#define c(i)
Definition: RSha256.hxx:101
#define oocoutW(o, a)
Definition: RooMsgService.h:51
#define oocoutE(o, a)
Definition: RooMsgService.h:52
#define ClassImp(name)
Definition: Rtypes.h:375
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
static double infinity()
Return internal infinity representation.
Definition: RooNumber.cxx:48
bool convertToInteger(const TString &token, Int_t &value)
Convert given string to an Int_t.
bool readString(TString &value, bool zapOnError=false)
Read a string token.
void setPunctuation(const TString &punct)
Change list of characters interpreted as punctuation.
bool isPunctChar(char c) const
Check if given char is considered punctuation.
virtual ~RooStreamParser()
Destructor.
bool expectToken(const TString &expected, bool zapOnError=false)
Read the next token and return true if it is identical to the given 'expected' token.
bool convertToDouble(const TString &token, double &value)
Convert given string to a double. Return true if the conversion fails.
bool atEOL()
If true, parser is at end of line in stream.
std::istream * _is
TString readLine()
Read an entire line from the stream and return as TString This method recognizes the use of '\' in th...
bool readDouble(double &value, bool zapOnError=false)
Read the next token and convert it to a double.
TString readToken()
Read one token separated by any of the know punctuation characters This function recognizes and handl...
bool readInteger(Int_t &value, bool zapOnError=false)
Read a token and convert it to an Int_t.
void zapToEnd(bool inclContLines=false)
Eat all characters up to and including then end of the current line.
bool convertToString(const TString &token, TString &string)
Convert given token to a string (i.e. remove eventual quotation marks)
RooStreamParser(std::istream &is)
Construct parser on given input stream.
Basic string class.
Definition: TString.h:136
Ssiz_t Length() const
Definition: TString.h:410
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
Definition: TString.cxx:442
const char * Data() const
Definition: TString.h:369
Bool_t IsNull() const
Definition: TString.h:407
@ InputArguments
Definition: RooGlobalFunc.h:64
Definition: first.py:1