Logo ROOT  
Reference Guide
TGHtmlParse.cxx
Go to the documentation of this file.
1 // $Id: TGHtmlParse.cxx,v 1.1 2007/05/04 17:07:01 brun Exp $
2 // Author: Valeriy Onuchin 03/05/2007
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2001, Rene Brun, Fons Rademakers and Reiner Rohlfs *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /**************************************************************************
13 
14  HTML widget for xclass. Based on tkhtml 1.28
15  Copyright (C) 1997-2000 D. Richard Hipp <drh@acm.org>
16  Copyright (C) 2002-2003 Hector Peraza.
17 
18  This library is free software; you can redistribute it and/or
19  modify it under the terms of the GNU Library General Public
20  License as published by the Free Software Foundation; either
21  version 2 of the License, or (at your option) any later version.
22 
23  This library is distributed in the hope that it will be useful,
24  but WITHOUT ANY WARRANTY; without even the implied warranty of
25  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26  Library General Public License for more details.
27 
28  You should have received a copy of the GNU Library General Public
29  License along with this library; if not, write to the Free
30  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
31 
32 **************************************************************************/
33 
34 // A tokenizer that converts raw HTML into a linked list of HTML elements.
35 
36 #include <cstring>
37 #include <cstdlib>
38 #include <cstdio>
39 #include <cctype>
40 
41 #include "TGHtml.h"
42 #include "TGHtmlTokens.h"
43 #include "strlcpy.h"
44 #include "snprintf.h"
45 
46 //----------------------------------------------------------------------
47 
49 
50 
51 /****************** Begin Escape Sequence Translator *************/
52 
53 // The next section of code implements routines used to translate
54 // the '&' escape sequences of SGML to individual characters.
55 // Examples:
56 //
57 // &amp; &
58 // &lt; <
59 // &gt; >
60 // &nbsp; nonbreakable space
61 //
62 
63 // Each escape sequence is recorded as an instance of the following
64 // structure
65 
66 struct SgEsc_t {
67  const char *fZName; // The name of this escape sequence. ex: "amp"
68  char fValue[8]; // The value for this sequence. ex: "&"
69  SgEsc_t *fPNext; // Next sequence with the same hash on zName
70 };
71 
72 // The following is a table of all escape sequences. Add new sequences
73 // by adding entries to this table.
74 
75 static struct SgEsc_t gEscSequences[] = {
76  { "quot", "\"", 0 },
77  { "amp", "&", 0 },
78  { "lt", "<", 0 },
79  { "gt", ">", 0 },
80  { "nbsp", " ", 0 },
81  { "iexcl", "\241", 0 },
82  { "cent", "\242", 0 },
83  { "pound", "\243", 0 },
84  { "curren", "\244", 0 },
85  { "yen", "\245", 0 },
86  { "brvbar", "\246", 0 },
87  { "sect", "\247", 0 },
88  { "uml", "\250", 0 },
89  { "copy", "\251", 0 },
90  { "ordf", "\252", 0 },
91  { "laquo", "\253", 0 },
92  { "not", "\254", 0 },
93  { "shy", "\255", 0 },
94  { "reg", "\256", 0 },
95  { "macr", "\257", 0 },
96  { "deg", "\260", 0 },
97  { "plusmn", "\261", 0 },
98  { "sup2", "\262", 0 },
99  { "sup3", "\263", 0 },
100  { "acute", "\264", 0 },
101  { "micro", "\265", 0 },
102  { "para", "\266", 0 },
103  { "middot", "\267", 0 },
104  { "cedil", "\270", 0 },
105  { "sup1", "\271", 0 },
106  { "ordm", "\272", 0 },
107  { "raquo", "\273", 0 },
108  { "frac14", "\274", 0 },
109  { "frac12", "\275", 0 },
110  { "frac34", "\276", 0 },
111  { "iquest", "\277", 0 },
112  { "Agrave", "\300", 0 },
113  { "Aacute", "\301", 0 },
114  { "Acirc", "\302", 0 },
115  { "Atilde", "\303", 0 },
116  { "Auml", "\304", 0 },
117  { "Aring", "\305", 0 },
118  { "AElig", "\306", 0 },
119  { "Ccedil", "\307", 0 },
120  { "Egrave", "\310", 0 },
121  { "Eacute", "\311", 0 },
122  { "Ecirc", "\312", 0 },
123  { "Euml", "\313", 0 },
124  { "Igrave", "\314", 0 },
125  { "Iacute", "\315", 0 },
126  { "Icirc", "\316", 0 },
127  { "Iuml", "\317", 0 },
128  { "ETH", "\320", 0 },
129  { "Ntilde", "\321", 0 },
130  { "Ograve", "\322", 0 },
131  { "Oacute", "\323", 0 },
132  { "Ocirc", "\324", 0 },
133  { "Otilde", "\325", 0 },
134  { "Ouml", "\326", 0 },
135  { "times", "\327", 0 },
136  { "Oslash", "\330", 0 },
137  { "Ugrave", "\331", 0 },
138  { "Uacute", "\332", 0 },
139  { "Ucirc", "\333", 0 },
140  { "Uuml", "\334", 0 },
141  { "Yacute", "\335", 0 },
142  { "THORN", "\336", 0 },
143  { "szlig", "\337", 0 },
144  { "agrave", "\340", 0 },
145  { "aacute", "\341", 0 },
146  { "acirc", "\342", 0 },
147  { "atilde", "\343", 0 },
148  { "auml", "\344", 0 },
149  { "aring", "\345", 0 },
150  { "aelig", "\346", 0 },
151  { "ccedil", "\347", 0 },
152  { "egrave", "\350", 0 },
153  { "eacute", "\351", 0 },
154  { "ecirc", "\352", 0 },
155  { "euml", "\353", 0 },
156  { "igrave", "\354", 0 },
157  { "iacute", "\355", 0 },
158  { "icirc", "\356", 0 },
159  { "iuml", "\357", 0 },
160  { "eth", "\360", 0 },
161  { "ntilde", "\361", 0 },
162  { "ograve", "\362", 0 },
163  { "oacute", "\363", 0 },
164  { "ocirc", "\364", 0 },
165  { "otilde", "\365", 0 },
166  { "ouml", "\366", 0 },
167  { "divide", "\367", 0 },
168  { "oslash", "\370", 0 },
169  { "ugrave", "\371", 0 },
170  { "uacute", "\372", 0 },
171  { "ucirc", "\373", 0 },
172  { "uuml", "\374", 0 },
173  { "yacute", "\375", 0 },
174  { "thorn", "\376", 0 },
175  { "yuml", "\377", 0 },
176 };
177 
178 
179 // The size of the handler hash table. For best results this should
180 // be a prime number which is about the same size as the number of
181 // escape sequences known to the system.
182 
183 #define ESC_HASH_SIZE (sizeof(gEscSequences)/sizeof(gEscSequences[0])+7)
184 
185 
186 // The hash table
187 //
188 // If the name of an escape sequence hashes to the value H, then
189 // gApEscHash[H] will point to a linked list of Esc structures, one of
190 // which will be the Esc structure for that escape sequence.
191 
192 static struct SgEsc_t *gApEscHash[ESC_HASH_SIZE];
193 
194 
195 // Hash a escape sequence name. The value returned is an integer
196 // between 0 and ESC_HASH_SIZE-1, inclusive.
197 
198 static int EscHash(const char *zName) {
199  int h = 0; // The hash value to be returned
200  char c; // The next character in the name being hashed
201 
202  while ((c = *zName) != 0) {
203  h = h<<5 ^ h ^ c;
204  zName++;
205  }
206  if (h < 0) h = -h;
207 
208  return h % ESC_HASH_SIZE;
209 }
210 
211 #ifdef TEST
212 // Compute the longest and average collision chain length for the
213 // escape sequence hash table
214 
215 static void EscHashStats()
216 {
217  int i;
218  int sum = 0;
219  int max = 0;
220  int cnt;
221  int notempty = 0;
222  struct SgEsc_t *p;
223 
224  for (i = 0; i < sizeof(gEscSequences) / sizeof(gEscSequences[0]); i++) {
225  cnt = 0;
226  p = gApEscHash[i];
227  if (p) notempty++;
228  while (p) {
229  ++cnt;
230  p = p->fPNext;
231  }
232  sum += cnt;
233  if (cnt > max) max = cnt;
234  }
235  printf("Longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
236  max, (double)sum/(double)notempty, i, i-notempty,
237  100.0*(i-notempty)/(double)i);
238 }
239 #endif
240 
241 // Initialize the escape sequence hash table
242 
243 static void EscInit() {
244  int i; /* For looping thru the list of escape sequences */
245  int h; /* The hash on a sequence */
246 
247  for (i = 0; i < int(sizeof(gEscSequences) / sizeof(gEscSequences[i])); i++) {
248 /* #ifdef XCLASS_UTF_MAX */
249 #if 0
250  int c = gEscSequences[i].value[0];
251  xclass::UniCharToUtf(c, gEscSequences[i].value);
252  }
253 #endif
254  h = EscHash(gEscSequences[i].fZName);
255  gEscSequences[i].fPNext = gApEscHash[h];
256  gApEscHash[h] = &gEscSequences[i];
257  }
258 #ifdef TEST
259  EscHashStats();
260 #endif
261 }
262 
263 
264 // This table translates the non-standard microsoft characters between 0x80
265 // and 0x9f into plain ASCII so that the characters will be visible on Unix
266 // systems. Care is taken to translate the characters into values less than
267 // 0x80, to avoid UTF-8 problems.
268 
269 static char gAcMsChar[] = {
270  /* 0x80 */ 'C',
271  /* 0x81 */ ' ',
272  /* 0x82 */ ',',
273  /* 0x83 */ 'f',
274  /* 0x84 */ '"',
275  /* 0x85 */ '.',
276  /* 0x86 */ '*',
277  /* 0x87 */ '*',
278  /* 0x88 */ '^',
279  /* 0x89 */ '%',
280  /* 0x8a */ 'S',
281  /* 0x8b */ '<',
282  /* 0x8c */ 'O',
283  /* 0x8d */ ' ',
284  /* 0x8e */ 'Z',
285  /* 0x8f */ ' ',
286  /* 0x90 */ ' ',
287  /* 0x91 */ '\'',
288  /* 0x92 */ '\'',
289  /* 0x93 */ '"',
290  /* 0x94 */ '"',
291  /* 0x95 */ '*',
292  /* 0x96 */ '-',
293  /* 0x97 */ '-',
294  /* 0x98 */ '~',
295  /* 0x99 */ '@',
296  /* 0x9a */ 's',
297  /* 0x9b */ '>',
298  /* 0x9c */ 'o',
299  /* 0x9d */ ' ',
300  /* 0x9e */ 'z',
301  /* 0x9f */ 'Y',
302 };
303 
304 
305 ////////////////////////////////////////////////////////////////////////////////
306 /// Translate escape sequences in the string "z". "z" is overwritten
307 /// with the translated sequence.
308 ///
309 /// Unrecognized escape sequences are unaltered.
310 ///
311 /// Example:
312 ///
313 /// input = "AT&amp;T &gt MCI"
314 /// output = "AT&T > MCI"
315 
316 void HtmlTranslateEscapes(char *z)
317 {
318  int from; // Read characters from this position in z[]
319  int to; // Write characters into this position in z[]
320  int h; // A hash on the escape sequence
321  struct SgEsc_t *p; // For looping down the escape sequence collision chain
322  static int isInit = 0; // True after initialization
323 
324  from = to = 0;
325  if (!isInit) {
326  EscInit();
327  isInit = 1;
328  }
329  while (z[from]) {
330  if (z[from] == '&') {
331  if (z[from+1] == '#') {
332  int i = from + 2;
333  int v = 0;
334  while (isdigit(z[i])) {
335  v = v*10 + z[i] - '0';
336  i++;
337  }
338  if (z[i] == ';') { i++; }
339 
340  // Translate the non-standard microsoft characters in the range of
341  // 0x80 to 0x9f into something we can see.
342 
343  if (v >= 0x80 && v < 0xa0) {
344  v = gAcMsChar[v & 0x1f];
345  }
346 
347  // Put the character in the output stream in place of the "&#000;".
348  // How we do this depends on whether or not we are using UTF-8.
349 
350  z[to++] = v;
351  from = i;
352  } else {
353  int i = from+1;
354  int c;
355  while (z[i] && isalnum(z[i])) ++i;
356  c = z[i];
357  z[i] = 0;
358  h = EscHash(&z[from+1]);
359  p = gApEscHash[h];
360  while (p && strcmp(p->fZName, &z[from+1]) != 0) p = p->fPNext;
361  z[i] = c;
362  if (p) {
363  int j;
364  for (j = 0; p->fValue[j]; ++j) z[to++] = p->fValue[j];
365  from = i;
366  if (c == ';') from++;
367  } else {
368  z[to++] = z[from++];
369  }
370  }
371 
372  // Look for the non-standard microsoft characters between 0x80 and 0x9f
373  // and translate them into printable ASCII codes. Separate algorithms
374  // are required to do this for plain ascii and for utf-8.
375 
376  } else if (((unsigned char) z[from]) >= 0x80 &&
377  ((unsigned char) z[from]) < 0xa0) {
378  z[to++] = gAcMsChar[z[from++] & 0x1f];
379  } else {
380  z[to++] = z[from++];
381  }
382  }
383  z[to] = 0;
384 }
385 
386 /******************* End Escape Sequence Translator ***************/
387 
388 /******************* Begin HTML tokenizer code *******************/
389 
390 // The following variable becomes TRUE when the markup hash table
391 // (stored in HtmlMarkupMap[]) is initialized.
392 
393 static int gIsInit = 0;
394 
395 // The hash table for HTML markup names.
396 //
397 // If an HTML markup name hashes to H, then gApMap[H] will point to
398 // a linked list of sgMap structure, one of which will describe the
399 // the particular markup (if it exists.)
400 
402 
403 // Hash a markup name
404 //
405 // HTML markup is case insensitive, so this function will give the
406 // same hash regardless of the case of the markup name.
407 //
408 // The value returned is an integer between 0 and HTML_MARKUP_HASH_SIZE-1,
409 // inclusive.
410 
411 static int HtmlHash(const char *zName) {
412  int h = 0;
413  char c;
414 
415  while ((c = *zName) != 0) {
416  if (isupper(c)) { // do we have to check for this??????
417  c = tolower(c);
418  }
419  h = h<<5 ^ h ^ c;
420  zName++;
421  }
422  if (h < 0) {
423  h = -h;
424  }
425 
426  return h % HTML_MARKUP_HASH_SIZE;
427 }
428 
429 
430 #ifdef TEST
431 // Compute the longest and average collision chain length for the
432 // markup hash table
433 
434 static void HtmlHashStats() {
435  int i;
436  int sum = 0;
437  int max = 0;
438  int cnt;
439  int notempty = 0;
440  struct sgMap *p;
441 
442  for (i = 0; i < HTML_MARKUP_COUNT; i++) {
443  cnt = 0;
444  p = gApMap[i];
445  if (p) notempty++;
446  while (p) {
447  cnt++;
448  p = p->fPCollide;
449  }
450  sum += cnt;
451  if (cnt > max) max = cnt;
452  }
453 
454  printf("longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
455  max, (double)sum/(double)notempty, i, i-notempty,
456  100.0*(i-notempty)/(double)i);
457 }
458 #endif
459 
460 
461 // Initialize the escape sequence hash table
462 
463 static void HtmlHashInit(void){
464  int i;
465  int h; // The hash on a markup name
466 
467  for (i = 0; i < HTML_MARKUP_COUNT; i++) {
468  h = HtmlHash(HtmlMarkupMap[i].fZName);
470  gApMap[h] = &HtmlMarkupMap[i];
471  }
472 #ifdef TEST
473  HtmlHashStats();
474 #endif
475 }
476 
477 ////////////////////////////////////////////////////////////////////////////////
478 /// Append the given TGHtmlElement to the tokenizers list of elements
479 
481 {
482  pElem->fPNext = 0;
483  pElem->fPPrev = fPLast;
484  if (fPFirst == 0) {
485  fPFirst = pElem;
486  } else {
487  fPLast->fPNext = pElem;
488  }
489  fPLast = pElem;
490  fNToken++;
491 }
492 
493 ////////////////////////////////////////////////////////////////////////////////
494 /// Insert token pNew before token p
495 
497 {
498  if (offs < 0) {
499  if (p) {
500  offs = p->fOffs;
501  } else {
502  offs = fNText;
503  }
504  }
505 
506 ////if (p) { pNew->fStyle = p->fStyle; pNew->fFlags = p->fFlags; }
507 
508 // pNew->fCount = 0;
509  pNew->fOffs = offs;
510  pNew->fPNext = p;
511  if (p) {
512  pNew->fElId = p->fElId;
513  p->fElId = ++fIdind;
514  pNew->fPPrev = p->fPPrev;
515  if (p->fPPrev) p->fPPrev->fPNext = pNew;
516  if (fPFirst == p) fPFirst = pNew;
517  p->fPPrev = pNew;
518  } else {
519  pNew->fElId = ++fIdind;
520  AppendElement(pNew);
521  }
522  fNToken++;
523 }
524 
525 ////////////////////////////////////////////////////////////////////////////////
526 /// Compute the new column index following the given character.
527 
528 static int NextColumn(int iCol, char c)
529 {
530  switch (c) {
531  case '\n': return 0;
532  case '\t': return (iCol | 7) + 1;
533  default: return iCol+1;
534  }
535  /* NOT REACHED */
536 }
537 
538 ////////////////////////////////////////////////////////////////////////////////
539 /// Convert a string to all lower-case letters.
540 
541 void ToLower(char *z)
542 {
543  while (*z) {
544  if (isupper(*z)) *z = tolower(*z);
545  z++;
546  }
547 }
548 
549 ////////////////////////////////////////////////////////////////////////////////
550 /// Process as much of the input HTML as possible. Construct new
551 /// TGHtmlElement objects and appended them to the list. Return
552 /// the number of characters actually processed.
553 ///
554 /// This routine may invoke a callback procedure which could delete
555 /// the HTML widget.
556 ///
557 /// This routine is not reentrant for the same HTML widget. To
558 /// prevent reentrancy (during a callback), the p->fICol field is
559 /// set to a negative number. This is a flag to future invocations
560 /// not to reentry this routine. The p->fICol field is restored
561 /// before exiting, of course.
562 
564 {
565  char *z; // The input HTML text
566  int c; // The next character of input
567  int n; // Number of characters processed so far
568  int inpCol; // Column of input
569  int i, j; // Loop counters
570  int h; // Result from HtmlHash()
571  TGHtmlElement *pElem;// A new HTML element
572  int selfClose; // True for content free elements. Ex: <br/>
573  int argc; // The number of arguments on a markup
574  SHtmlTokenMap_t *pMap; // For searching the markup name hash table
575 # define mxARG 200 // Maximum number of parameters in a single markup
576  char *argv[mxARG]; // Pointers to each markup argument.
577  int arglen[mxARG]; // Length of each markup argument
578  //int rl, ol;
579 #ifdef pIsInMeachnism
580  int pIsInScript = 0;
581  int pIsInNoScript = 0;
582  int pIsInNoFrames = 0;
583 #endif
584  int sawdot = 0;
585  int inLi = 0;
586 
587  static char null[1] = { "" };
588 
589  inpCol = fICol;
590  n = fNComplete;
591  z = fZText;
592  if (inpCol < 0) return n; // Prevents recursion
593  fICol = -1;
594  pElem = 0;
595 
596  while ((c = z[n]) != 0) {
597 
598  sawdot--;
599  if (c == -64 && z[n+1] == -128) {
600  n += 2;
601  continue;
602  }
603 
604  if (fPScript) {
605 
606  // We are in the middle of <SCRIPT>...</SCRIPT>. Just look for
607  // the </SCRIPT> markup. (later:) Treat <STYLE>...</STYLE> the
608  // same way.
609 
610  TGHtmlScript *pScr = fPScript;
611  const char *zEnd;
612  int nEnd;
613  //int curline, curch, curlast = n;
614  int sqcnt;
615  if (pScr->fType == Html_SCRIPT) {
616  zEnd = "</script>";
617  nEnd = 9;
618  } else if (pScr->fType == Html_NOSCRIPT) {
619  zEnd = "</noscript>";
620  nEnd = 11;
621  } else if (pScr->fType == Html_NOFRAMES) {
622  zEnd = "</noframes>";
623  nEnd = 11;
624  } else {
625  zEnd = "</style>";
626  nEnd = 8;
627  }
628  if (pScr->fNStart < 0) {
629  pScr->fNStart = n;
630  pScr->fNScript = 0;
631  }
632  sqcnt = 0;
633  for (i = n /*pScr->fNStart + pScr->fNScript*/; z[i]; i++) {
634  if (z[i] == '\'' || z[i] == '"') {
635  sqcnt++; // Skip if odd # quotes
636  } else if (z[i] == '\n') {
637  sqcnt = 0;
638  }
639  if (z[i] == '<' && z[i+1] == '/' &&
640  strncasecmp(&z[i], zEnd, nEnd) == 0) {
641  if (zEnd[3] == 'c' && ((sqcnt % 2) == 1)) continue;
642  pScr->fNScript = i - n;
643  fPScript = 0;
644  n = i + nEnd;
645  break;
646  }
647  }
648  if (z[i] == 0) goto incomplete;
649  if (fPScript) {
650  pScr->fNScript = i - n;
651  n = i;
652  }
653  else {
654 #ifdef pIsInMeachnism
655  // If there is a script, execute it now and insert any output
656  // to the html stream for parsing as html. (ie. client side scripting)
657 
658  if (pIsInScript && !pIsInNoScript && !pIsInNoFrames) {
659 
660  //for (curch = 0, curline = 1; curch <= curlast; curch++)
661  // if (z[curch] == '\n') curline++;
662 
663  // arglist in pElem and text pointers in pScr?
664  // Inline scripts can contain unmatched brackets :-)
665  //char varind[50];
666  //sprintf(varind, "HtmlScrVar%d", p->varind++);
667  //char savech = fZText[pScr->fNStart + pScr->fNScript];
668  //fZText[pScr->fNStart + pScr->fNScript] = 0;
669  //char *scriptBody = StrDup(fZText[pScr->fNStart]);
670  //fZText[pScr->fNStart + pScr->fNScript] = savech;
671  AdvanceLayout(p);
672  inParse++;
673  char *result = ProcessScript((TGHtmlScript *) pElem); // pElem or pScr??
674  inParse--;
675  if (result) {
676  ol = fNAlloc;
677  rl = strlen(result);
678  fNAlloc += rl;
679  z = fZText = HtmlRealloc(z, ol+rl);
680  memmove(z + n + rl, z+n, ol - n);
681  memmove(z + n, result, rl);
682  }
683  }
684  pIsInScript = 0;
685  pIsInNoScript = 0;
686  pIsInNoFrames = 0;
687 #endif
688  }
689  //continue;
690 
691  }
692  else if (isspace((unsigned char)c)) {
693 
694  // White space
695  for (i = 0;
696  (c = z[n+i]) != 0 && isspace((unsigned char)c) && c != '\n' && c != '\r';
697  i++) { }
698  if (c == '\r' && z[n+i+1] == '\n') ++i;
699 #if 0 // this is certainly NOT OK, since it alters pre-formatted text
700  if (sawdot == 1) {
701  pElem = new TGHtmlTextElement(2);
702  strcpy(((TGHtmlTextElement *)pElem)->fZText, " ");
703  pElem->fElId = ++fIdind;
704  pElem->fOffs = n;
705  pElem->fCount = 1;
706  AppendElement(pElem);
707  }
708 #endif
709  pElem = new TGHtmlSpaceElement;
710  if (pElem == 0) goto incomplete;
711  ((TGHtmlSpaceElement *)pElem)->fW = 0;
712  pElem->fOffs = n;
713  pElem->fElId = ++fIdind;
714  if (c == '\n' || c == '\r') {
715  pElem->fFlags = HTML_NewLine;
716  pElem->fCount = 1;
717  i++;
718  inpCol = 0;
719  } else {
720  int iColStart = inpCol;
721  pElem->fFlags = 0;
722  for (j = 0; j < i; j++) {
723  inpCol = NextColumn(inpCol, z[n+j]);
724  }
725  pElem->fCount = inpCol - iColStart;
726  }
727  AppendElement(pElem);
728  n += i;
729 
730  }
731  else if (c != '<' || fIPlaintext != 0 ||
732  (!isalpha(z[n+1]) && z[n+1] != '/' && z[n+1] != '!' && z[n+1] != '?')) {
733 
734  // Ordinary text
735  for (i = 1; (c = z[n+i]) != 0 && !isspace((unsigned char)c) && c != '<'; i++) {}
736  if (z[n+i-1] == '.' || z[n+i-1] == '!' || z[n+i-1] == '?') sawdot = 2;
737  if (c == 0) goto incomplete;
738  if (fIPlaintext != 0 && z[n] == '<') {
739  switch (fIPlaintext) {
740  case Html_LISTING:
741  if (i >= 10 && strncasecmp(&z[n], "</listing>", 10) == 0) {
742  fIPlaintext = 0;
743  goto doMarkup;
744  }
745  break;
746 
747  case Html_XMP:
748  if (i >= 6 && strncasecmp(&z[n], "</xmp>", 6) == 0) {
749  fIPlaintext = 0;
750  goto doMarkup;
751  }
752  break;
753 
754  case Html_TEXTAREA:
755  if (i >= 11 && strncasecmp(&z[n], "</textarea>", 11) == 0) {
756  fIPlaintext = 0;
757  goto doMarkup;
758  }
759  break;
760 
761  default:
762  break;
763  }
764  }
765  pElem = new TGHtmlTextElement(i);
766  if (pElem == 0) goto incomplete;
767  TGHtmlTextElement *tpElem = (TGHtmlTextElement *) pElem;
768  tpElem->fElId = ++fIdind;
769  tpElem->fOffs = n;
770  strncpy(tpElem->fZText, &z[n], i);
771  tpElem->fZText[i] = 0;
772  AppendElement(pElem);
773  if (fIPlaintext == 0 || fIPlaintext == Html_TEXTAREA) {
774  HtmlTranslateEscapes(tpElem->fZText);
775  }
776  pElem->fCount = (Html_16_t) strlen(tpElem->fZText);
777  n += i;
778  inpCol += i;
779 
780  } else if (strncmp(&z[n], "<!--", 4) == 0) {
781 
782  // An HTML comment. Just skip it.
783  for (i = 4; z[n+i]; i++) {
784  if (z[n+i] == '-' && strncmp(&z[n+i], "-->", 3) == 0) break;
785  }
786  if (z[n+i] == 0) goto incomplete;
787 
788  pElem = new TGHtmlTextElement(i);
789  if (pElem == 0) goto incomplete;
790  TGHtmlTextElement *tpElem = (TGHtmlTextElement *) pElem;
791  tpElem->fType = Html_COMMENT;
792  tpElem->fElId = ++fIdind;
793  tpElem->fOffs = n;
794  strncpy(tpElem->fZText, &z[n+4], i-4);
795  tpElem->fZText[i-4] = 0;
796  tpElem->fCount = 0;
797  AppendElement(pElem);
798 
799  pElem = new TGHtmlElement(Html_EndCOMMENT);
800  AppToken(pElem, 0, n+4);
801 
802  for (j = 0; j < i+3; j++) {
803  inpCol = NextColumn(inpCol, z[n+j]);
804  }
805  n += i + 3;
806 
807  }
808  else {
809 
810  // Markup.
811  //
812  // First get the name of the markup
813 doMarkup:
814  argc = 1;
815  argv[0] = &z[n+1];
816  for (i = 1;
817  (c = z[n+i]) != 0 && !isspace((unsigned char)c) && c != '>' && (i < 2 || c != '/');
818  i++) {}
819  arglen[0] = i - 1;
820  if (c == 0) goto incomplete;
821 
822  // Now parse up the arguments
823 
824  while (isspace((unsigned char)z[n+i])) ++i;
825  while ((c = z[n+i]) != 0 && c != '>' && (c != '/' || z[n+i+1] != '>')) {
826  if (argc > mxARG - 3) argc = mxARG - 3;
827  argv[argc] = &z[n+i];
828  j = 0;
829  while ((c = z[n+i+j]) != 0 && !isspace((unsigned char)c) && c != '>' &&
830  c != '=' && (c != '/' || z[n+i+j+1] != '>')) ++j;
831  arglen[argc] = j;
832  if (c == 0) goto incomplete;
833  i += j;
834  while (isspace((unsigned char)c)) {
835  i++;
836  c = z[n+i];
837  }
838  if (c == 0) goto incomplete;
839  argc++;
840  if (c != '=') {
841  argv[argc] = null;
842  arglen[argc] = 0;
843  argc++;
844  continue;
845  }
846  i++;
847  c = z[n+i];
848  while (isspace((unsigned char)c)) {
849  i++;
850  c = z[n+i];
851  }
852  if (c == 0) goto incomplete;
853  if (c == '\'' || c == '"') {
854  int cQuote = c;
855  i++;
856  argv[argc] = &z[n+i];
857  for (j = 0; (c = z[n+i+j]) != 0 && c != cQuote; j++) {}
858  if (c == 0) goto incomplete;
859  arglen[argc] = j;
860  i += j+1;
861  } else {
862  argv[argc] = &z[n+i];
863  for (j = 0; (c = z[n+i+j]) != 0 && !isspace((unsigned char)c) && c != '>'; j++) {}
864  if (c == 0) goto incomplete;
865  arglen[argc] = j;
866  i += j;
867  }
868  argc++;
869  while (isspace(z[n+i])) ++i;
870  }
871  if (c == '/') {
872  i++;
873  c = z[n+i];
874  selfClose = 1;
875  } else {
876  selfClose = 0;
877  }
878  if (c == 0) goto incomplete;
879  for (j = 0; j < i+1; j++) {
880  inpCol = NextColumn(inpCol, z[n+j]);
881  }
882  n += i + 1;
883 
884  // Lookup the markup name in the hash table
885 
886  if (!gIsInit) {
887  HtmlHashInit();
888  gIsInit = 1;
889  }
890  c = argv[0][arglen[0]];
891  argv[0][arglen[0]] = 0;
892  h = HtmlHash(argv[0]);
893  for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
894  if (strcasecmp(pMap->fZName, argv[0]) == 0) break;
895  }
896  argv[0][arglen[0]] = c;
897  if (pMap == 0) continue; // Ignore unknown markup
898 
899 makeMarkupEntry:
900  // Construct a TGHtmlMarkupElement object for this markup.
901 
902  pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, argc, arglen, argv);
903  if (pElem == 0) goto incomplete;
904 
905  pElem->fElId = ++fIdind;
906  pElem->fOffs = n;
907 
908  AddFormInfo(pElem);
909 
910  // The new markup has now been constructed in pElem. But before
911  // appending it to the list, check to see if there is a special
912  // handler for this markup type.
913 
914  if (ProcessToken(pElem, pMap->fZName, pMap->fType)) {
915  // delete pElem;
916 
917  // Tricky, tricky. The user function might have caused the p->fZText
918  // pointer to change, so renew our copy of that pointer.
919 
920  z = fZText;
921  if (z == 0) {
922  n = 0;
923  inpCol = 0;
924  goto incomplete;
925  }
926  continue;
927  }
928 
929  // No special handler for this markup. Just append it to the
930  // list of all tokens.
931 
932  AppendElement(pElem);
933  switch (pMap->fType) {
934  case Html_TABLE:
935  break;
936 
937  case Html_PLAINTEXT:
938  case Html_LISTING:
939  case Html_XMP:
940  case Html_TEXTAREA:
941  fIPlaintext = pMap->fType;
942  break;
943 
944  case Html_NOFRAMES:
945  if (!fHasFrames) break;
946 #ifdef pIsInMeachnism
947  pIsInNoFrames = 1;
948 #endif
949  case Html_NOSCRIPT:
950  break;
951  // coverity[unreachable]
952  if (!fHasScript) break;
953 #ifdef pIsInMeachnism
954  pIsInNoScript = 1;
955 #endif
956  case Html_SCRIPT:
957 #ifdef pIsInMeachnism
958  pIsInScript = 1;
959 #endif
960  // fallthrough
961  case Html_STYLE:
962  fPScript = (TGHtmlScript *) pElem;
963  break;
964 
965  case Html_LI:
966  if (!fAddEndTags) break;
967  if (inLi) {
969  AppToken(e, pElem, n);
970  } else {
971  inLi = 1;
972  }
973  break;
974 
975  case Html_EndLI:
976  inLi=0;
977  break;
978 
979  case Html_EndOL:
980  case Html_EndUL:
981  if (!fAddEndTags) break;
982  if (inLi) {
984  AppToken(e, pElem, n);
985  } else {
986  inLi = 0;
987  }
988  break;
989 
990  default:
991  break;
992  }
993 
994  // If this is self-closing markup (ex: <br/> or <img/>) then
995  // synthesize a closing token.
996 
997  if (selfClose && argv[0][0] != '/' &&
998  strcmp(&pMap[1].fZName[1], pMap->fZName) == 0) {
999  selfClose = 0;
1000  pMap++;
1001  argc = 1;
1002  goto makeMarkupEntry;
1003  }
1004  }
1005  }
1006 
1007 incomplete:
1008  fICol = inpCol;
1009  ////fPScript = 0;
1010 
1011  return n;
1012 }
1013 
1014 /************************** End HTML Tokenizer Code ***************************/
1015 
1016 ////////////////////////////////////////////////////////////////////////////////
1017 /// Make one markup entry.
1018 
1020  int arglen[], char *argv[])
1021 {
1023 
1024  switch (objType) {
1025  case O_HtmlCell:
1026  e = new TGHtmlCell(type, argc, arglen, argv);
1027  break;
1028 
1029  case O_HtmlTable:
1030  e = new TGHtmlTable(type, argc, arglen, argv);
1031  break;
1032 
1033  case O_HtmlRef:
1034  e = new TGHtmlRef(type, argc, arglen, argv);
1035  break;
1036 
1037  case O_HtmlLi:
1038  e = new TGHtmlLi(type, argc, arglen, argv);
1039  break;
1040 
1041  case O_HtmlListStart:
1042  e = new TGHtmlListStart(type, argc, arglen, argv);
1043  break;
1044 
1045  case O_HtmlImageMarkup:
1046  e = new TGHtmlImageMarkup(type, argc, arglen, argv);
1047  break;
1048 
1049  case O_HtmlInput:
1050  e = new TGHtmlInput(type, argc, arglen, argv);
1051  break;
1052 
1053  case O_HtmlForm:
1054  e = new TGHtmlForm(type, argc, arglen, argv);
1055  break;
1056 
1057  case O_HtmlHr:
1058  e = new TGHtmlHr(type, argc, arglen, argv);
1059  break;
1060 
1061  case O_HtmlAnchor:
1062  e = new TGHtmlAnchor(type, argc, arglen, argv);
1063  break;
1064 
1065  case O_HtmlScript:
1066  e = new TGHtmlScript(type, argc, arglen, argv);
1067  break;
1068 
1069  case O_HtmlMapArea:
1070  e = new TGHtmlMapArea(type, argc, arglen, argv);
1071  break;
1072 
1073  default:
1074  e = new TGHtmlMarkupElement(type, argc, arglen, argv);
1075  break;
1076  }
1077 
1078  return e;
1079 }
1080 
1081 ////////////////////////////////////////////////////////////////////////////////
1082 /// Append text to the tokenizer engine.
1083 
1085 {
1086  int len = strlen(text);
1087 
1088  if (fNText == 0) {
1089  fNAlloc = len + 100;
1090  fZText = new char [fNAlloc];
1091  } else if (fNText + len >= fNAlloc) {
1092  fNAlloc += len + 100;
1093  char *tmp = new char[fNAlloc];
1094  // coverity[secure_coding]
1095  strcpy(tmp, fZText);
1096  delete[] fZText;
1097  fZText = tmp;
1098  }
1099 
1100  if (fZText == 0) {
1101  fNText = 0;
1102  UNTESTED;
1103  return;
1104  }
1105 
1106  // coverity[secure_coding]
1107  strcpy(&fZText[fNText], text);
1108  fNText += len;
1109  fNComplete = Tokenize();
1110 }
1111 
1112 ////////////////////////////////////////////////////////////////////////////////
1113 /// This routine takes a text representation of a token, converts it into an
1114 /// TGHtmlElement object and inserts it immediately prior to pToken. If pToken
1115 /// is 0, then the newly created TGHtmlElement is appended.
1116 ///
1117 /// This routine does nothing to resize, restyle, relayout or redisplay
1118 /// the HTML. That is the calling routines responsibility.
1119 ///
1120 /// Return the new TGHtmlElement object if successful. Return zero if
1121 /// zType is not a known markup name.
1122 ///
1123 /// pToken - Insert before this. Append if pToken == 0
1124 /// zType - Type of markup. Ex: "/a" or "table"
1125 /// zArgs - List of arguments
1126 /// offs - Calculate offset, and insert changed text into fZText!
1127 
1129  char *zType, char *zArgs, int offs)
1130 {
1131  SHtmlTokenMap_t *pMap; // For searching the markup name hash table
1132  int h; // The hash on zType
1133  TGHtmlElement *pElem; // The new element
1134  //int nByte; // How many bytes to allocate
1135  //int i; // Loop counter
1136 
1137  if (!gIsInit) {
1138  HtmlHashInit();
1139  gIsInit = 1;
1140  }
1141 
1142  if (strcmp(zType, "Text") == 0) {
1143  pElem = new TGHtmlTextElement(zArgs ? strlen(zArgs) : 0);
1144  if (pElem == 0) return 0;
1145  if (zArgs) {
1146  // coverity[secure_coding]
1147  strcpy (((TGHtmlTextElement *)pElem)->fZText, zArgs);
1148  pElem->fCount = (Html_16_t) strlen(zArgs);
1149  }
1150  } else if (!strcmp(zType, "Space")) {
1151  pElem = new TGHtmlSpaceElement();
1152  if (pElem == 0) return 0;
1153  } else {
1154  h = HtmlHash(zType);
1155  for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
1156  if (strcasecmp(pMap->fZName, zType) == 0) break;
1157  }
1158  if (pMap == 0) return 0;
1159  if (zArgs == 0 || *zArgs == 0) {
1160  // Special case of no arguments. This is a lot easier...
1161  // well... now its the same thing!
1162  pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, 1, 0, 0);
1163  if (pElem == 0) return 0;
1164  } else {
1165  // The general case. There are arguments that need to be parsed
1166  // up. This is slower, but we gotta do it.
1167  //int argc;
1168  //char **argv;
1169  //char *zBuf;
1170 
1171 #if 0
1172  if (!SplitList(zArgs, &argc, &argv)) return 0;
1173 
1174  // shall we insert a dummy argv[0]?
1175 
1176  pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, argc/*+1??*/, 0, argv);
1177  if (pElem == 0) return 1;
1178 
1179  while (--argc >= 0) if (argv[argc]) delete[] argv[argc];
1180  delete[] argv;
1181 #else
1182  return 0;
1183 #endif
1184  }
1185  }
1186 
1187  pElem->fElId = ++fIdind;
1188 
1189  AppToken(pElem, pToken, offs);
1190 
1191  return pElem;
1192 }
1193 
1194 ////////////////////////////////////////////////////////////////////////////////
1195 /// Insert text into text token, or break token into two text tokens.
1196 /// Also, handle backspace char by deleting text.
1197 /// Should also handle newline char by splitting text.
1198 
1199 int TGHtml::TextInsertCmd(int /*argc*/, char ** /*argv*/)
1200 {
1201 #if 0
1202  TGHtmlElement *p, *pElem;
1203  int i, l, n = 0;
1204  int idx = 0;
1205  int ptyp = Html_Unknown;
1206  int istxt = 0;
1207  char *cp = 0, c, *cp2;
1208 
1209  if (GetIndex(argv[3], &p, &i) != 0) {
1210  // sprintf(tmp, "malformed index: \"%s\"", argv[3]);
1211  return 0;
1212  }
1213  if (p) {
1214  ptyp = p->fType;
1215  if ((istxt = (ptyp == Html_Text))) {
1216  l = p->fCount;
1217  cp = ((TGHtmlTextElement *)p)->fZText;
1218  }
1219  }
1220  if (argv[2][0] == 'b') { // Break text token into two.
1221  if (!istxt) return 1;
1222  if (i == 0 || i == l) return 1;
1223  pElem = InsertToken(p->fPNext, "Text", cp + i, -1);
1224  cp[i] = 0;
1225  p->fCount = i;
1226  return 1;
1227  }
1228  c = argv[4][0];
1229  if (!c) return 1;
1230  if (c == '\b') {
1231  if ((!istxt) || (!l) || (!i)) {
1232  if (!p) return 1;
1233  if (p->fType == Html_BR)
1234  RemoveElements(p, p);
1235  return 1;
1236  }
1237  if (p && l == 1) {
1238  RemoveElements(p, p);
1239  return 1;
1240  }
1241  if (i == l)
1242  cp[p->fCount] = 0;
1243  else
1244  memcpy(cp+i-1, cp+i, l-i+1);
1245 
1246  cp[--p->fCount] = 0;
1247  if (ins.i-- <= 0) ins.i = 0;
1248  ins.p = p;
1249  return 1;
1250  }
1251  if (c == '\n' || c == '\r') {
1252  }
1253  if (istxt) {
1254  char *cp;
1255  int t, j, alen = strlen(argv[4]);
1256  n = alen + l;
1257 
1259 
1260  if (text->fZText == (char*) ((&text->fZText)+1)) {
1261  cp = new char[n+1];
1262  strcpy(cp, text->fZText);
1263  } else {
1264  cp = new char[n+1];
1265  strcpy(cp, text->fZText);
1266  }
1267  cp2 = new char[alen+1];
1268  memcpy(cp2, argv[4], alen+1);
1269  HtmlTranslateEscapes(cp2);
1270  alen = strlen(cp2);
1271  memmove(cp+alen+i, cp+i, l-i+1);
1272  for (j = 0; j < alen; j++) cp[i+j] = cp2[j];
1273  delete[] cp2;
1274  delete[] text->fZText;
1275  text->fZText = cp;
1276  p->fCount = strlen(cp);
1277  ins.p = p;
1278  ins.i = i+alen;
1279  } else {
1280  p = InsertToken(p ? p->fPNext : 0, "Text", argv[4], -1);
1281  AddStyle(p);
1282  i = 0;
1283  ins.p = p;
1284  ins.i = 1;
1285  }
1286  if (p) {
1287  idx = p->base.id;
1288  AddStrOffset(p, argv[4], i);
1289  }
1290 #endif
1291  return 1;
1292 }
1293 
1294 ////////////////////////////////////////////////////////////////////////////////
1295 /// Returns token map matching zType name.
1296 
1298 {
1299  SHtmlTokenMap_t *pMap; // For searching the markup name hash table
1300  int h; // The hash on zType
1301 
1302  if (!gIsInit) {
1303  HtmlHashInit();
1304  gIsInit = 1;
1305  }
1306  h = HtmlHash(zType);
1307  for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
1308  if (strcasecmp(pMap->fZName, zType) == 0) break;
1309  }
1310 
1311  return pMap;
1312 }
1313 
1314 ////////////////////////////////////////////////////////////////////////////////
1315 /// Convert a markup name into a type integer
1316 
1317 int TGHtml::NameToType(char *zType)
1318 {
1319  SHtmlTokenMap_t *pMap = NameToPmap(zType);
1320  return pMap ? pMap->fType : (int)Html_Unknown;
1321 }
1322 
1323 ////////////////////////////////////////////////////////////////////////////////
1324 /// Convert a type into a symbolic name
1325 
1326 const char *TGHtml::TypeToName(int type)
1327 {
1328  if (type >= Html_A && type <= Html_EndXMP) {
1329  SHtmlTokenMap_t *pMap = gApMap[type - Html_A];
1330  return pMap->fZName;
1331  } else {
1332  return "???";
1333  }
1334 }
1335 
1336 ////////////////////////////////////////////////////////////////////////////////
1337 /// For debugging purposes, print information about a token
1338 
1340 {
1341 //#ifdef DEBUG
1342  static char zBuf[200];
1343  int j;
1344  const char *zName;
1345 
1346  if (p == 0) {
1347  snprintf(zBuf, 200, "NULL");
1348  return zBuf;
1349  }
1350  switch (p->fType) {
1351  case Html_Text:
1352  snprintf(zBuf, 200, "text: \"%.*s\"", p->fCount, ((TGHtmlTextElement *)p)->fZText);
1353  break;
1354 
1355  case Html_Space:
1356  if (p->fFlags & HTML_NewLine) {
1357  snprintf(zBuf, 200, "space: \"\\n\"");
1358  } else {
1359  snprintf(zBuf, 200, "space: \" \"");
1360  }
1361  break;
1362 
1363  case Html_Block: {
1364  TGHtmlBlock *block = (TGHtmlBlock *) p;
1365  if (block->fN > 0) {
1366  int n = block->fN;
1367  if (n > 150) n = 150;
1368  snprintf(zBuf, 200, "<Block z=\"%.*s\">", n, block->fZ);
1369  } else {
1370  snprintf(zBuf, 200, "<Block>");
1371  }
1372  break;
1373  }
1374 
1375  default:
1376  if (p->fType >= HtmlMarkupMap[0].fType
1378  zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1379  } else {
1380  zName = "Unknown";
1381  }
1382  snprintf(zBuf, 200, "markup (%d) <%s", p->fType, zName);
1383  for (j = 1 ; j < p->fCount; j += 2) {
1384  snprintf(&zBuf[strlen(zBuf)], 200-strlen(zBuf), " %s=\"%s\"",
1385  ((TGHtmlMarkupElement *)p)->fArgv[j-1],
1386  ((TGHtmlMarkupElement *)p)->fArgv[j]);
1387  }
1388  // coverity[secure_coding]
1389  strcat(zBuf, ">");
1390  break;
1391  }
1392  return zBuf;
1393 //#else
1394 // return 0;
1395 //#endif
1396 }
1397 
1398 ////////////////////////////////////////////////////////////////////////////////
1399 /// Append all the arguments of the given markup to the given TGString.
1400 ///
1401 /// Example: If the markup is <IMG SRC=image.gif ALT="hello!">
1402 /// then the following text is appended to the TGString:
1403 ///
1404 /// "src image.gif alt hello!"
1405 ///
1406 /// Notice how all attribute names are converted to lower case.
1407 /// This conversion happens in the parser.
1408 
1410 {
1411  int i;
1412 
1413  for (i = 0; i + 1 < pElem->fCount; i += 2) {
1414  str->Append(pElem->fArgv[i]);
1415  str->Append("=");
1416  str->Append(pElem->fArgv[i+1]);
1417  str->Append(" ");
1418  }
1419 }
1420 
1421 ////////////////////////////////////////////////////////////////////////////////
1422 /// Returns token name of html element p.
1423 
1425 {
1426  static char zBuf[200];
1427  //int j;
1428  const char *zName;
1429 
1430  zBuf[0] = 0;
1431  if (p == 0) {
1432  // coverity[secure_coding]: zBuf is large enough
1433  strcpy(zBuf, "NULL");
1434  return zBuf;
1435  }
1436  switch (p->fType) {
1437  case Html_Text:
1438  case Html_Space:
1439  break;
1440 
1441  case Html_Block:
1442  break;
1443 
1444  default:
1445  if (p->fType >= HtmlMarkupMap[0].fType &&
1447  zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1448  } else {
1449  zName = "Unknown";
1450  }
1451  strlcpy(zBuf, zName, sizeof(zBuf));
1452  break;
1453  }
1454 
1455  return zBuf;
1456 }
1457 
1458 ////////////////////////////////////////////////////////////////////////////////
1459 /// Returns token map at location n.
1460 
1462 {
1463  return HtmlMarkupMap+n;
1464 }
1465 
1466 ////////////////////////////////////////////////////////////////////////////////
1467 /// Return all tokens between the two elements as a string list.
1468 
1470 {
1471  TGString *str;
1472  int i;
1473  const char *zName;
1474  char zLine[100];
1475 
1476  str = new TGString("");
1477  while (p && p != pEnd) {
1478  switch (p->fType) {
1479  case Html_Block:
1480  break;
1481 
1482  case Html_Text:
1483  str->Append("{ Text \"");
1484  str->Append(((TGHtmlTextElement *)p)->fZText);
1485  str->Append("\" } ");
1486  break;
1487 
1488  case Html_Space:
1489  snprintf(zLine, 100, "Space %d %d ",
1490  p->fCount, (p->fFlags & HTML_NewLine) != 0);
1491  str->Append(zLine);
1492  break;
1493 
1494  case Html_Unknown:
1495  str->Append("Unknown ");
1496  break;
1497 
1498  default:
1499  str->Append("{ Markup ");
1500  if (p->fType >= HtmlMarkupMap[0].fType &&
1502  zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1503  } else {
1504  zName = "Unknown";
1505  }
1506  str->Append(zName);
1507  str->Append(" ");
1508  for (i = 0; i < p->fCount; ++i) {
1509  str->Append(((TGHtmlMarkupElement *)p)->fArgv[i]);
1510  str->Append(" ");
1511  }
1512  str->Append("} ");
1513  break;
1514  }
1515  p = p->fPNext;
1516  }
1517 
1518  return str;
1519 }
1520 
1521 ////////////////////////////////////////////////////////////////////////////////
1522 /// Print a list of tokens
1523 
1525 {
1526  TGHtmlElement *p;
1527 
1528  for (p = first; p != last; p = p->fPNext) {
1529  if (p->fType == Html_Block) {
1530  TGHtmlBlock *block = (TGHtmlBlock *) p;
1531  const char *z = block->fZ;
1532  int n = block->fN;
1533  if (n == 0 || z == 0) {
1534  n = 1;
1535  z = "";
1536  }
1537  printf("Block flags=%02x cnt=%d x=%d..%d y=%d..%d z=\"%.*s\"\n",
1538  p->fFlags, p->fCount, block->fLeft, block->fRight,
1539  block->fTop, block->fBottom, n, z);
1540  } else {
1541  printf("Token font=%2d color=%2d align=%d flags=0x%04x name=%s\n",
1542  p->fStyle.fFont, p->fStyle.fColor,
1543  p->fStyle.fAlign, p->fStyle.fFlags, DumpToken(p));
1544  }
1545  }
1546 }
c
#define c(i)
Definition: RSha256.hxx:119
l
auto * l
Definition: textangle.C:4
TGHtml::MakeMarkupEntry
TGHtmlMarkupElement * MakeMarkupEntry(int objType, int type, int argc, int arglen[], char *argv[])
Make one markup entry.
Definition: TGHtmlParse.cxx:1019
SHtmlTokenMap_t::fPCollide
SHtmlTokenMap_t * fPCollide
Definition: TGHtml.h:843
TGHtml::TokenizerAppend
void TokenizerAppend(const char *text)
Append text to the tokenizer engine.
Definition: TGHtmlParse.cxx:1084
Html_EndXMP
@ Html_EndXMP
Definition: TGHtmlTokens.h:220
TGHtmlLi
Definition: TGHtml.h:418
n
const Int_t n
Definition: legend1.C:16
TGHtml::fHasScript
int fHasScript
Definition: TGHtml.h:1243
Html_16_t
short Html_16_t
Definition: TGHtml.h:136
first
Definition: first.py:1
TGHtml::fPScript
TGHtmlScript * fPScript
Definition: TGHtml.h:1179
TGHtmlElement::fPNext
TGHtmlElement * fPNext
Definition: TGHtml.h:261
e
#define e(i)
Definition: RSha256.hxx:121
TGHtml::ListTokens
TGString * ListTokens(TGHtmlElement *p, TGHtmlElement *pEnd)
Return all tokens between the two elements as a string list.
Definition: TGHtmlParse.cxx:1469
Html_SCRIPT
@ Html_SCRIPT
Definition: TGHtmlTokens.h:184
snprintf
#define snprintf
Definition: civetweb.c:1540
TGHtmlElement
Definition: TGHtml.h:249
Html_BR
@ Html_BR
Definition: TGHtmlTokens.h:91
SHtmlStyle_t::fColor
unsigned int fColor
Definition: TGHtml.h:145
HtmlTranslateEscapes
void HtmlTranslateEscapes(char *z)
Translate escape sequences in the string "z".
Definition: TGHtmlParse.cxx:316
TGHtml::ProcessScript
virtual char * ProcessScript(TGHtmlScript *)
Definition: TGHtml.h:957
SHtmlTokenMap_t::fObjType
Html_16_t fObjType
Definition: TGHtml.h:842
O_HtmlImageMarkup
#define O_HtmlImageMarkup
Definition: TGHtml.h:856
TGHtml::GetIndex
int GetIndex(const char *zIndex, TGHtmlElement **ppToken, int *pIndex)
This routine decodes a complete index specification.
Definition: TGHtmlIndex.cxx:538
gIsInit
static int gIsInit
Definition: TGHtmlParse.cxx:393
TGHtml::NameToPmap
SHtmlTokenMap_t * NameToPmap(char *zType)
Returns token map matching zType name.
Definition: TGHtmlParse.cxx:1297
TGHtml::DumpToken
char * DumpToken(TGHtmlElement *p)
For debugging purposes, print information about a token.
Definition: TGHtmlParse.cxx:1339
HTML_NewLine
#define HTML_NewLine
Definition: TGHtml.h:275
Html_COMMENT
@ Html_COMMENT
Definition: TGHtmlTokens.h:100
O_HtmlHr
#define O_HtmlHr
Definition: TGHtml.h:859
Html_STYLE
@ Html_STYLE
Definition: TGHtmlTokens.h:193
Html_Text
@ Html_Text
Definition: TGHtmlTokens.h:68
SHtmlStyle_t::fFont
unsigned int fFont
Definition: TGHtml.h:144
TGHtmlScript
Definition: TGHtml.h:678
TGHtmlElement::fOffs
int fOffs
Definition: TGHtml.h:268
O_HtmlListStart
#define O_HtmlListStart
Definition: TGHtml.h:855
Html_A
@ Html_A
Definition: TGHtmlTokens.h:72
TGHtmlTable
Definition: TGHtml.h:353
TGHtmlElement::fFlags
Html_u8_t fFlags
Definition: TGHtml.h:265
TGHtml::GetMarkupMap
SHtmlTokenMap_t * GetMarkupMap(int n)
Returns token map at location n.
Definition: TGHtmlParse.cxx:1461
Html_Unknown
@ Html_Unknown
Definition: TGHtmlTokens.h:70
gEscSequences
static struct SgEsc_t gEscSequences[]
Definition: TGHtmlParse.cxx:75
Html_Space
@ Html_Space
Definition: TGHtmlTokens.h:69
BatchHelpers::block
constexpr size_t block
Definition: BatchHelpers.h:29
TGHtml::AppendElement
void AppendElement(TGHtmlElement *pElem)
Append the given TGHtmlElement to the tokenizers list of elements.
Definition: TGHtmlParse.cxx:480
UNTESTED
#define UNTESTED
Definition: TGHtml.h:64
Html_XMP
@ Html_XMP
Definition: TGHtmlTokens.h:219
EscInit
static void EscInit()
Definition: TGHtmlParse.cxx:243
v
@ v
Definition: rootcling_impl.cxx:3635
text
TText * text
Definition: entrylist_figure1.C:10
TGHtmlCell
Definition: TGHtml.h:380
O_HtmlRef
#define O_HtmlRef
Definition: TGHtml.h:853
Html_EndLI
@ Html_EndLI
Definition: TGHtmlTokens.h:149
TGHtml::GetTokenName
char * GetTokenName(TGHtmlElement *p)
Returns token name of html element p.
Definition: TGHtmlParse.cxx:1424
TGHtml::InsertToken
TGHtmlElement * InsertToken(TGHtmlElement *pToken, char *zType, char *zArgs, int offs)
This routine takes a text representation of a token, converts it into an TGHtmlElement object and ins...
Definition: TGHtmlParse.cxx:1128
Html_NOFRAMES
@ Html_NOFRAMES
Definition: TGHtmlTokens.h:165
TGHtml::TypeToName
const char * TypeToName(int type)
Convert a type into a symbolic name.
Definition: TGHtmlParse.cxx:1326
TGHtml::AppendArglist
void AppendArglist(TGString *str, TGHtmlMarkupElement *pElem)
Append all the arguments of the given markup to the given TGString.
Definition: TGHtmlParse.cxx:1409
TGHtmlElement::fStyle
SHtmlStyle_t fStyle
Definition: TGHtml.h:263
TGHtml::fPLast
TGHtmlElement * fPLast
Definition: TGHtml.h:1129
HTML_MARKUP_COUNT
#define HTML_MARKUP_COUNT
Definition: TGHtmlTokens.h:199
SHtmlTokenMap_t::fType
Html_16_t fType
Definition: TGHtml.h:841
O_HtmlInput
#define O_HtmlInput
Definition: TGHtml.h:857
TGHtml::fIPlaintext
int fIPlaintext
Definition: TGHtml.h:1175
TGHtmlElement::fPPrev
TGHtmlElement * fPPrev
Definition: TGHtml.h:262
ToLower
void ToLower(char *z)
Convert a string to all lower-case letters.
Definition: TGHtmlParse.cxx:541
TGHtmlRef
Definition: TGHtml.h:404
EscHash
static int EscHash(const char *zName)
Definition: TGHtmlParse.cxx:198
HtmlHashInit
static void HtmlHashInit(void)
Definition: TGHtmlParse.cxx:463
Html_EndOL
@ Html_EndOL
Definition: TGHtmlTokens.h:170
O_HtmlMapArea
#define O_HtmlMapArea
Definition: TGHtml.h:862
O_HtmlLi
#define O_HtmlLi
Definition: TGHtml.h:854
TGHtmlSpaceElement::fW
Html_16_t fW
Definition: TGHtml.h:308
TGHtmlSpaceElement
Definition: TGHtml.h:306
TGHtml::TextInsertCmd
int TextInsertCmd(int argc, char **argv)
Insert text into text token, or break token into two text tokens.
Definition: TGHtmlParse.cxx:1199
TGHtmlAnchor
Definition: TGHtml.h:662
TGHtmlElement::fType
Html_u8_t fType
Definition: TGHtml.h:264
h
#define h(i)
Definition: RSha256.hxx:124
TGHtmlInput
Definition: TGHtml.h:581
O_HtmlCell
#define O_HtmlCell
Definition: TGHtml.h:851
Html_LISTING
@ Html_LISTING
Definition: TGHtmlTokens.h:151
TGHtml::fICol
int fICol
Definition: TGHtml.h:1173
TString::Append
TString & Append(const char *cs)
Definition: TString.h:564
gApMap
static SHtmlTokenMap_t * gApMap[HTML_MARKUP_HASH_SIZE]
Definition: TGHtmlParse.cxx:401
TGHtml.h
ESC_HASH_SIZE
#define ESC_HASH_SIZE
Definition: TGHtmlParse.cxx:183
TGHtmlTextElement::fZText
char * fZText
Definition: TGHtml.h:300
TGHtmlImageMarkup
Definition: TGHtml.h:534
Html_Block
@ Html_Block
Definition: TGHtmlTokens.h:71
TGHtmlMarkupElement
Definition: TGHtml.h:323
TGHtmlMarkupElement::fArgv
char ** fArgv
Definition: TGHtml.h:335
SHtmlStyle_t::fFlags
unsigned int fFlags
Definition: TGHtml.h:150
O_HtmlAnchor
#define O_HtmlAnchor
Definition: TGHtml.h:860
Html_PLAINTEXT
@ Html_PLAINTEXT
Definition: TGHtmlTokens.h:177
TGHtmlScript::fNStart
int fNStart
Definition: TGHtml.h:683
TGHtmlTextElement
Definition: TGHtml.h:285
TGHtml::fIdind
int fIdind
Definition: TGHtml.h:1270
SHtmlTokenMap_t::fZName
const char * fZName
Definition: TGHtml.h:840
O_HtmlForm
#define O_HtmlForm
Definition: TGHtml.h:858
TGHtml::fHasFrames
int fHasFrames
Definition: TGHtml.h:1244
TGHtml::fNComplete
int fNComplete
Definition: TGHtml.h:1171
TGHtmlTokens.h
TGHtml::Tokenize
int Tokenize()
Process as much of the input HTML as possible.
Definition: TGHtmlParse.cxx:563
HtmlMarkupMap
SHtmlTokenMap_t HtmlMarkupMap[]
Definition: TGHtmlTokenMap.cxx:33
TGHtml::fNAlloc
int fNAlloc
Definition: TGHtml.h:1170
TGHtmlElement::fCount
Html_16_t fCount
Definition: TGHtml.h:266
TGHtml::fNToken
int fNToken
Definition: TGHtml.h:1130
HtmlHash
static int HtmlHash(const char *zName)
Definition: TGHtmlParse.cxx:411
TGHtml::fNText
int fNText
Definition: TGHtml.h:1169
sum
static long int sum(long int i)
Definition: Factory.cxx:2272
Html_TABLE
@ Html_TABLE
Definition: TGHtmlTokens.h:198
TGHtmlScript::fNScript
int fNScript
Definition: TGHtml.h:684
SHtmlTokenMap_t
Definition: TGHtml.h:839
TGHtml::PrintList
void PrintList(TGHtmlElement *first, TGHtmlElement *last)
Print a list of tokens.
Definition: TGHtmlParse.cxx:1524
TGHtml::AddStyle
void AddStyle(TGHtmlElement *p)
This routine adds information to the input texts that doesn't change when the display is resized or w...
Definition: TGHtmlSizer.cxx:216
Html_NOSCRIPT
@ Html_NOSCRIPT
Definition: TGHtmlTokens.h:167
NextColumn
static int NextColumn(int iCol, char c)
Compute the new column index following the given character.
Definition: TGHtmlParse.cxx:528
Html_TEXTAREA
@ Html_TEXTAREA
Definition: TGHtmlTokens.h:202
TGHtml::fZText
char * fZText
Definition: TGHtml.h:1168
Html_EndCOMMENT
@ Html_EndCOMMENT
Definition: TGHtmlTokens.h:101
TGHtmlHr
Definition: TGHtml.h:648
TGHtml::ProcessToken
virtual int ProcessToken(TGHtmlElement *, const char *, int)
Definition: TGHtml.h:927
Html_EndUL
@ Html_EndUL
Definition: TGHtmlTokens.h:215
gApEscHash
static struct SgEsc_t * gApEscHash[ESC_HASH_SIZE]
Definition: TGHtmlParse.cxx:192
TGHtml::NameToType
int NameToType(char *zType)
Convert a markup name into a type integer.
Definition: TGHtmlParse.cxx:1317
xmlio::cnt
const char * cnt
Definition: TXMLSetup.cxx:81
mxARG
#define mxARG
TGHtmlElement::fElId
int fElId
Definition: TGHtml.h:267
Html_LI
@ Html_LI
Definition: TGHtmlTokens.h:148
type
int type
Definition: TGX11.cxx:121
TGHtmlForm
Definition: TGHtml.h:633
O_HtmlTable
#define O_HtmlTable
Definition: TGHtml.h:852
HTML_MARKUP_HASH_SIZE
#define HTML_MARKUP_HASH_SIZE
Definition: TGHtmlTokens.h:200
TGHtmlMapArea
Definition: TGHtml.h:467
TGString
Definition: TGString.h:30
TGHtmlListStart
Definition: TGHtml.h:450
TGHtml::AddFormInfo
void AddFormInfo(TGHtmlElement *p)
Add the DOM control information for form elements.
Definition: TGHtmlForm.cxx:565
TGHtml::fAddEndTags
int fAddEndTags
Definition: TGHtml.h:1245
TGHtmlBlock
Definition: TGHtml.h:710
SHtmlStyle_t::fAlign
unsigned int fAlign
Definition: TGHtml.h:147
gAcMsChar
static char gAcMsChar[]
Definition: TGHtmlParse.cxx:269
O_HtmlScript
#define O_HtmlScript
Definition: TGHtml.h:861
int
TGHtml::fPFirst
TGHtmlElement * fPFirst
Definition: TGHtml.h:1128
TGHtml::AppToken
void AppToken(TGHtmlElement *pNew, TGHtmlElement *p, int offs)
Insert token pNew before token p.
Definition: TGHtmlParse.cxx:496