Logo ROOT  
Reference Guide
TGHtmlParse.cxx
Go to the documentation of this file.
1// $Id: TGHtmlParse.cxx,v 1.1 2007/05/04 17:07:01 brun Exp $
2// Author: Valeriy Onuchin 03/05/2007
3
4/*************************************************************************
5 * Copyright (C) 1995-2001, Rene Brun, Fons Rademakers and Reiner Rohlfs *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/**************************************************************************
13
14 HTML widget for xclass. Based on tkhtml 1.28
15 Copyright (C) 1997-2000 D. Richard Hipp <drh@acm.org>
16 Copyright (C) 2002-2003 Hector Peraza.
17
18 This library is free software; you can redistribute it and/or
19 modify it under the terms of the GNU Library General Public
20 License as published by the Free Software Foundation; either
21 version 2 of the License, or (at your option) any later version.
22
23 This library is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 Library General Public License for more details.
27
28 You should have received a copy of the GNU Library General Public
29 License along with this library; if not, write to the Free
30 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
31
32**************************************************************************/
33
34// A tokenizer that converts raw HTML into a linked list of HTML elements.
35
36#include <cstring>
37#include <cstdlib>
38#include <cstdio>
39#include <cctype>
40
41#include "TGHtml.h"
42#include "TGHtmlTokens.h"
43#include "strlcpy.h"
44#include "snprintf.h"
45
46//----------------------------------------------------------------------
47
49
50
51/****************** Begin Escape Sequence Translator *************/
52
53// The next section of code implements routines used to translate
54// the '&' escape sequences of SGML to individual characters.
55// Examples:
56//
57// &amp; &
58// &lt; <
59// &gt; >
60// &nbsp; nonbreakable space
61//
62
63// Each escape sequence is recorded as an instance of the following
64// structure
65
66struct SgEsc_t {
67 const char *fZName; // The name of this escape sequence. ex: "amp"
68 char fValue[8]; // The value for this sequence. ex: "&"
69 SgEsc_t *fPNext; // Next sequence with the same hash on zName
70};
71
72// The following is a table of all escape sequences. Add new sequences
73// by adding entries to this table.
74
75static struct SgEsc_t gEscSequences[] = {
76 { "quot", "\"", 0 },
77 { "amp", "&", 0 },
78 { "lt", "<", 0 },
79 { "gt", ">", 0 },
80 { "nbsp", " ", 0 },
81 { "iexcl", "\241", 0 },
82 { "cent", "\242", 0 },
83 { "pound", "\243", 0 },
84 { "curren", "\244", 0 },
85 { "yen", "\245", 0 },
86 { "brvbar", "\246", 0 },
87 { "sect", "\247", 0 },
88 { "uml", "\250", 0 },
89 { "copy", "\251", 0 },
90 { "ordf", "\252", 0 },
91 { "laquo", "\253", 0 },
92 { "not", "\254", 0 },
93 { "shy", "\255", 0 },
94 { "reg", "\256", 0 },
95 { "macr", "\257", 0 },
96 { "deg", "\260", 0 },
97 { "plusmn", "\261", 0 },
98 { "sup2", "\262", 0 },
99 { "sup3", "\263", 0 },
100 { "acute", "\264", 0 },
101 { "micro", "\265", 0 },
102 { "para", "\266", 0 },
103 { "middot", "\267", 0 },
104 { "cedil", "\270", 0 },
105 { "sup1", "\271", 0 },
106 { "ordm", "\272", 0 },
107 { "raquo", "\273", 0 },
108 { "frac14", "\274", 0 },
109 { "frac12", "\275", 0 },
110 { "frac34", "\276", 0 },
111 { "iquest", "\277", 0 },
112 { "Agrave", "\300", 0 },
113 { "Aacute", "\301", 0 },
114 { "Acirc", "\302", 0 },
115 { "Atilde", "\303", 0 },
116 { "Auml", "\304", 0 },
117 { "Aring", "\305", 0 },
118 { "AElig", "\306", 0 },
119 { "Ccedil", "\307", 0 },
120 { "Egrave", "\310", 0 },
121 { "Eacute", "\311", 0 },
122 { "Ecirc", "\312", 0 },
123 { "Euml", "\313", 0 },
124 { "Igrave", "\314", 0 },
125 { "Iacute", "\315", 0 },
126 { "Icirc", "\316", 0 },
127 { "Iuml", "\317", 0 },
128 { "ETH", "\320", 0 },
129 { "Ntilde", "\321", 0 },
130 { "Ograve", "\322", 0 },
131 { "Oacute", "\323", 0 },
132 { "Ocirc", "\324", 0 },
133 { "Otilde", "\325", 0 },
134 { "Ouml", "\326", 0 },
135 { "times", "\327", 0 },
136 { "Oslash", "\330", 0 },
137 { "Ugrave", "\331", 0 },
138 { "Uacute", "\332", 0 },
139 { "Ucirc", "\333", 0 },
140 { "Uuml", "\334", 0 },
141 { "Yacute", "\335", 0 },
142 { "THORN", "\336", 0 },
143 { "szlig", "\337", 0 },
144 { "agrave", "\340", 0 },
145 { "aacute", "\341", 0 },
146 { "acirc", "\342", 0 },
147 { "atilde", "\343", 0 },
148 { "auml", "\344", 0 },
149 { "aring", "\345", 0 },
150 { "aelig", "\346", 0 },
151 { "ccedil", "\347", 0 },
152 { "egrave", "\350", 0 },
153 { "eacute", "\351", 0 },
154 { "ecirc", "\352", 0 },
155 { "euml", "\353", 0 },
156 { "igrave", "\354", 0 },
157 { "iacute", "\355", 0 },
158 { "icirc", "\356", 0 },
159 { "iuml", "\357", 0 },
160 { "eth", "\360", 0 },
161 { "ntilde", "\361", 0 },
162 { "ograve", "\362", 0 },
163 { "oacute", "\363", 0 },
164 { "ocirc", "\364", 0 },
165 { "otilde", "\365", 0 },
166 { "ouml", "\366", 0 },
167 { "divide", "\367", 0 },
168 { "oslash", "\370", 0 },
169 { "ugrave", "\371", 0 },
170 { "uacute", "\372", 0 },
171 { "ucirc", "\373", 0 },
172 { "uuml", "\374", 0 },
173 { "yacute", "\375", 0 },
174 { "thorn", "\376", 0 },
175 { "yuml", "\377", 0 },
176};
177
178
179// The size of the handler hash table. For best results this should
180// be a prime number which is about the same size as the number of
181// escape sequences known to the system.
182
183#define ESC_HASH_SIZE (sizeof(gEscSequences)/sizeof(gEscSequences[0])+7)
184
185
186// The hash table
187//
188// If the name of an escape sequence hashes to the value H, then
189// gApEscHash[H] will point to a linked list of Esc structures, one of
190// which will be the Esc structure for that escape sequence.
191
193
194
195// Hash a escape sequence name. The value returned is an integer
196// between 0 and ESC_HASH_SIZE-1, inclusive.
197
198static int EscHash(const char *zName) {
199 int h = 0; // The hash value to be returned
200 char c; // The next character in the name being hashed
201
202 while ((c = *zName) != 0) {
203 h = h<<5 ^ h ^ c;
204 zName++;
205 }
206 if (h < 0) h = -h;
207
208 return h % ESC_HASH_SIZE;
209}
210
211#ifdef TEST
212// Compute the longest and average collision chain length for the
213// escape sequence hash table
214
215static void EscHashStats()
216{
217 int i;
218 int sum = 0;
219 int max = 0;
220 int cnt;
221 int notempty = 0;
222 struct SgEsc_t *p;
223
224 for (i = 0; i < sizeof(gEscSequences) / sizeof(gEscSequences[0]); i++) {
225 cnt = 0;
226 p = gApEscHash[i];
227 if (p) notempty++;
228 while (p) {
229 ++cnt;
230 p = p->fPNext;
231 }
232 sum += cnt;
233 if (cnt > max) max = cnt;
234 }
235 printf("Longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
236 max, (double)sum/(double)notempty, i, i-notempty,
237 100.0*(i-notempty)/(double)i);
238}
239#endif
240
241// Initialize the escape sequence hash table
242
243static void EscInit() {
244 int i; /* For looping thru the list of escape sequences */
245 int h; /* The hash on a sequence */
246
247 for (i = 0; i < int(sizeof(gEscSequences) / sizeof(gEscSequences[i])); i++) {
248/* #ifdef XCLASS_UTF_MAX */
249#if 0
250 int c = gEscSequences[i].value[0];
251 xclass::UniCharToUtf(c, gEscSequences[i].value);
252 }
253#endif
257 }
258#ifdef TEST
259 EscHashStats();
260#endif
261}
262
263
264// This table translates the non-standard microsoft characters between 0x80
265// and 0x9f into plain ASCII so that the characters will be visible on Unix
266// systems. Care is taken to translate the characters into values less than
267// 0x80, to avoid UTF-8 problems.
268
269static char gAcMsChar[] = {
270 /* 0x80 */ 'C',
271 /* 0x81 */ ' ',
272 /* 0x82 */ ',',
273 /* 0x83 */ 'f',
274 /* 0x84 */ '"',
275 /* 0x85 */ '.',
276 /* 0x86 */ '*',
277 /* 0x87 */ '*',
278 /* 0x88 */ '^',
279 /* 0x89 */ '%',
280 /* 0x8a */ 'S',
281 /* 0x8b */ '<',
282 /* 0x8c */ 'O',
283 /* 0x8d */ ' ',
284 /* 0x8e */ 'Z',
285 /* 0x8f */ ' ',
286 /* 0x90 */ ' ',
287 /* 0x91 */ '\'',
288 /* 0x92 */ '\'',
289 /* 0x93 */ '"',
290 /* 0x94 */ '"',
291 /* 0x95 */ '*',
292 /* 0x96 */ '-',
293 /* 0x97 */ '-',
294 /* 0x98 */ '~',
295 /* 0x99 */ '@',
296 /* 0x9a */ 's',
297 /* 0x9b */ '>',
298 /* 0x9c */ 'o',
299 /* 0x9d */ ' ',
300 /* 0x9e */ 'z',
301 /* 0x9f */ 'Y',
302};
303
304
305////////////////////////////////////////////////////////////////////////////////
306/// Translate escape sequences in the string "z". "z" is overwritten
307/// with the translated sequence.
308///
309/// Unrecognized escape sequences are unaltered.
310///
311/// Example:
312///
313/// input = "AT&amp;T &gt MCI"
314/// output = "AT&T > MCI"
315
317{
318 int from; // Read characters from this position in z[]
319 int to; // Write characters into this position in z[]
320 int h; // A hash on the escape sequence
321 struct SgEsc_t *p; // For looping down the escape sequence collision chain
322 static int isInit = 0; // True after initialization
323
324 from = to = 0;
325 if (!isInit) {
326 EscInit();
327 isInit = 1;
328 }
329 while (z[from]) {
330 if (z[from] == '&') {
331 if (z[from+1] == '#') {
332 int i = from + 2;
333 int v = 0;
334 while (isdigit(z[i])) {
335 v = v*10 + z[i] - '0';
336 i++;
337 }
338 if (z[i] == ';') { i++; }
339
340 // Translate the non-standard microsoft characters in the range of
341 // 0x80 to 0x9f into something we can see.
342
343 if (v >= 0x80 && v < 0xa0) {
344 v = gAcMsChar[v & 0x1f];
345 }
346
347 // Put the character in the output stream in place of the "&#000;".
348 // How we do this depends on whether or not we are using UTF-8.
349
350 z[to++] = v;
351 from = i;
352 } else {
353 int i = from+1;
354 int c;
355 while (z[i] && isalnum(z[i])) ++i;
356 c = z[i];
357 z[i] = 0;
358 h = EscHash(&z[from+1]);
359 p = gApEscHash[h];
360 while (p && strcmp(p->fZName, &z[from+1]) != 0) p = p->fPNext;
361 z[i] = c;
362 if (p) {
363 int j;
364 for (j = 0; p->fValue[j]; ++j) z[to++] = p->fValue[j];
365 from = i;
366 if (c == ';') from++;
367 } else {
368 z[to++] = z[from++];
369 }
370 }
371
372 // Look for the non-standard microsoft characters between 0x80 and 0x9f
373 // and translate them into printable ASCII codes. Separate algorithms
374 // are required to do this for plain ascii and for utf-8.
375
376 } else if (((unsigned char) z[from]) >= 0x80 &&
377 ((unsigned char) z[from]) < 0xa0) {
378 z[to++] = gAcMsChar[z[from++] & 0x1f];
379 } else {
380 z[to++] = z[from++];
381 }
382 }
383 z[to] = 0;
384}
385
386/******************* End Escape Sequence Translator ***************/
387
388/******************* Begin HTML tokenizer code *******************/
389
390// The following variable becomes TRUE when the markup hash table
391// (stored in HtmlMarkupMap[]) is initialized.
392
393static int gIsInit = 0;
394
395// The hash table for HTML markup names.
396//
397// If an HTML markup name hashes to H, then gApMap[H] will point to
398// a linked list of sgMap structure, one of which will describe the
399// the particular markup (if it exists.)
400
402
403// Hash a markup name
404//
405// HTML markup is case insensitive, so this function will give the
406// same hash regardless of the case of the markup name.
407//
408// The value returned is an integer between 0 and HTML_MARKUP_HASH_SIZE-1,
409// inclusive.
410
411static int HtmlHash(const char *zName) {
412 int h = 0;
413 char c;
414
415 while ((c = *zName) != 0) {
416 if (isupper(c)) { // do we have to check for this??????
417 c = tolower(c);
418 }
419 h = h<<5 ^ h ^ c;
420 zName++;
421 }
422 if (h < 0) {
423 h = -h;
424 }
425
426 return h % HTML_MARKUP_HASH_SIZE;
427}
428
429
430#ifdef TEST
431// Compute the longest and average collision chain length for the
432// markup hash table
433
434static void HtmlHashStats() {
435 int i;
436 int sum = 0;
437 int max = 0;
438 int cnt;
439 int notempty = 0;
440 struct sgMap *p;
441
442 for (i = 0; i < HTML_MARKUP_COUNT; i++) {
443 cnt = 0;
444 p = gApMap[i];
445 if (p) notempty++;
446 while (p) {
447 cnt++;
448 p = p->fPCollide;
449 }
450 sum += cnt;
451 if (cnt > max) max = cnt;
452 }
453
454 printf("longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
455 max, (double)sum/(double)notempty, i, i-notempty,
456 100.0*(i-notempty)/(double)i);
457}
458#endif
459
460
461// Initialize the escape sequence hash table
462
463static void HtmlHashInit(void){
464 int i;
465 int h; // The hash on a markup name
466
467 for (i = 0; i < HTML_MARKUP_COUNT; i++) {
468 h = HtmlHash(HtmlMarkupMap[i].fZName);
470 gApMap[h] = &HtmlMarkupMap[i];
471 }
472#ifdef TEST
473 HtmlHashStats();
474#endif
475}
476
477////////////////////////////////////////////////////////////////////////////////
478/// Append the given TGHtmlElement to the tokenizers list of elements
479
481{
482 pElem->fPNext = 0;
483 pElem->fPPrev = fPLast;
484 if (fPFirst == 0) {
485 fPFirst = pElem;
486 } else {
487 fPLast->fPNext = pElem;
488 }
489 fPLast = pElem;
490 fNToken++;
491}
492
493////////////////////////////////////////////////////////////////////////////////
494/// Insert token pNew before token p
495
497{
498 if (offs < 0) {
499 if (p) {
500 offs = p->fOffs;
501 } else {
502 offs = fNText;
503 }
504 }
505
506////if (p) { pNew->fStyle = p->fStyle; pNew->fFlags = p->fFlags; }
507
508// pNew->fCount = 0;
509 pNew->fOffs = offs;
510 pNew->fPNext = p;
511 if (p) {
512 pNew->fElId = p->fElId;
513 p->fElId = ++fIdind;
514 pNew->fPPrev = p->fPPrev;
515 if (p->fPPrev) p->fPPrev->fPNext = pNew;
516 if (fPFirst == p) fPFirst = pNew;
517 p->fPPrev = pNew;
518 } else {
519 pNew->fElId = ++fIdind;
520 AppendElement(pNew);
521 }
522 fNToken++;
523}
524
525////////////////////////////////////////////////////////////////////////////////
526/// Compute the new column index following the given character.
527
528static int NextColumn(int iCol, char c)
529{
530 switch (c) {
531 case '\n': return 0;
532 case '\t': return (iCol | 7) + 1;
533 default: return iCol+1;
534 }
535 /* NOT REACHED */
536}
537
538////////////////////////////////////////////////////////////////////////////////
539/// Convert a string to all lower-case letters.
540
541void ToLower(char *z)
542{
543 while (*z) {
544 if (isupper(*z)) *z = tolower(*z);
545 z++;
546 }
547}
548
549////////////////////////////////////////////////////////////////////////////////
550/// Process as much of the input HTML as possible. Construct new
551/// TGHtmlElement objects and appended them to the list. Return
552/// the number of characters actually processed.
553///
554/// This routine may invoke a callback procedure which could delete
555/// the HTML widget.
556///
557/// This routine is not reentrant for the same HTML widget. To
558/// prevent reentrancy (during a callback), the p->fICol field is
559/// set to a negative number. This is a flag to future invocations
560/// not to reentry this routine. The p->fICol field is restored
561/// before exiting, of course.
562
564{
565 char *z; // The input HTML text
566 int c; // The next character of input
567 int n; // Number of characters processed so far
568 int inpCol; // Column of input
569 int i, j; // Loop counters
570 int h; // Result from HtmlHash()
571 TGHtmlElement *pElem;// A new HTML element
572 int selfClose; // True for content free elements. Ex: <br/>
573 int argc; // The number of arguments on a markup
574 SHtmlTokenMap_t *pMap; // For searching the markup name hash table
575# define mxARG 200 // Maximum number of parameters in a single markup
576 char *argv[mxARG]; // Pointers to each markup argument.
577 int arglen[mxARG]; // Length of each markup argument
578 //int rl, ol;
579#ifdef pIsInMeachnism
580 int pIsInScript = 0;
581 int pIsInNoScript = 0;
582 int pIsInNoFrames = 0;
583#endif
584 int sawdot = 0;
585 int inLi = 0;
586
587 static char null[1] = { "" };
588
589 inpCol = fICol;
590 n = fNComplete;
591 z = fZText;
592 if (inpCol < 0) return n; // Prevents recursion
593 fICol = -1;
594 pElem = 0;
595
596 while ((c = z[n]) != 0) {
597
598 sawdot--;
599 if (c == -64 && z[n+1] == -128) {
600 n += 2;
601 continue;
602 }
603
604 if (fPScript) {
605
606 // We are in the middle of <SCRIPT>...</SCRIPT>. Just look for
607 // the </SCRIPT> markup. (later:) Treat <STYLE>...</STYLE> the
608 // same way.
609
610 TGHtmlScript *pScr = fPScript;
611 const char *zEnd;
612 int nEnd;
613 //int curline, curch, curlast = n;
614 int sqcnt;
615 if (pScr->fType == Html_SCRIPT) {
616 zEnd = "</script>";
617 nEnd = 9;
618 } else if (pScr->fType == Html_NOSCRIPT) {
619 zEnd = "</noscript>";
620 nEnd = 11;
621 } else if (pScr->fType == Html_NOFRAMES) {
622 zEnd = "</noframes>";
623 nEnd = 11;
624 } else {
625 zEnd = "</style>";
626 nEnd = 8;
627 }
628 if (pScr->fNStart < 0) {
629 pScr->fNStart = n;
630 pScr->fNScript = 0;
631 }
632 sqcnt = 0;
633 for (i = n /*pScr->fNStart + pScr->fNScript*/; z[i]; i++) {
634 if (z[i] == '\'' || z[i] == '"') {
635 sqcnt++; // Skip if odd # quotes
636 } else if (z[i] == '\n') {
637 sqcnt = 0;
638 }
639 if (z[i] == '<' && z[i+1] == '/' &&
640 strncasecmp(&z[i], zEnd, nEnd) == 0) {
641 if (zEnd[3] == 'c' && ((sqcnt % 2) == 1)) continue;
642 pScr->fNScript = i - n;
643 fPScript = 0;
644 n = i + nEnd;
645 break;
646 }
647 }
648 if (z[i] == 0) goto incomplete;
649 if (fPScript) {
650 pScr->fNScript = i - n;
651 n = i;
652 }
653 else {
654#ifdef pIsInMeachnism
655 // If there is a script, execute it now and insert any output
656 // to the html stream for parsing as html. (ie. client side scripting)
657
658 if (pIsInScript && !pIsInNoScript && !pIsInNoFrames) {
659
660 //for (curch = 0, curline = 1; curch <= curlast; curch++)
661 // if (z[curch] == '\n') curline++;
662
663 // arglist in pElem and text pointers in pScr?
664 // Inline scripts can contain unmatched brackets :-)
665 //char varind[50];
666 //sprintf(varind, "HtmlScrVar%d", p->varind++);
667 //char savech = fZText[pScr->fNStart + pScr->fNScript];
668 //fZText[pScr->fNStart + pScr->fNScript] = 0;
669 //char *scriptBody = StrDup(fZText[pScr->fNStart]);
670 //fZText[pScr->fNStart + pScr->fNScript] = savech;
671 AdvanceLayout(p);
672 inParse++;
673 char *result = ProcessScript((TGHtmlScript *) pElem); // pElem or pScr??
674 inParse--;
675 if (result) {
676 ol = fNAlloc;
677 rl = strlen(result);
678 fNAlloc += rl;
679 z = fZText = HtmlRealloc(z, ol+rl);
680 memmove(z + n + rl, z+n, ol - n);
681 memmove(z + n, result, rl);
682 }
683 }
684 pIsInScript = 0;
685 pIsInNoScript = 0;
686 pIsInNoFrames = 0;
687#endif
688 }
689 //continue;
690
691 }
692 else if (isspace((unsigned char)c)) {
693
694 // White space
695 for (i = 0;
696 (c = z[n+i]) != 0 && isspace((unsigned char)c) && c != '\n' && c != '\r';
697 i++) { }
698 if (c == '\r' && z[n+i+1] == '\n') ++i;
699#if 0 // this is certainly NOT OK, since it alters pre-formatted text
700 if (sawdot == 1) {
701 pElem = new TGHtmlTextElement(2);
702 strcpy(((TGHtmlTextElement *)pElem)->fZText, " ");
703 pElem->fElId = ++fIdind;
704 pElem->fOffs = n;
705 pElem->fCount = 1;
706 AppendElement(pElem);
707 }
708#endif
709 pElem = new TGHtmlSpaceElement;
710 if (pElem == 0) goto incomplete;
711 ((TGHtmlSpaceElement *)pElem)->fW = 0;
712 pElem->fOffs = n;
713 pElem->fElId = ++fIdind;
714 if (c == '\n' || c == '\r') {
715 pElem->fFlags = HTML_NewLine;
716 pElem->fCount = 1;
717 i++;
718 inpCol = 0;
719 } else {
720 int iColStart = inpCol;
721 pElem->fFlags = 0;
722 for (j = 0; j < i; j++) {
723 inpCol = NextColumn(inpCol, z[n+j]);
724 }
725 pElem->fCount = inpCol - iColStart;
726 }
727 AppendElement(pElem);
728 n += i;
729
730 }
731 else if (c != '<' || fIPlaintext != 0 ||
732 (!isalpha(z[n+1]) && z[n+1] != '/' && z[n+1] != '!' && z[n+1] != '?')) {
733
734 // Ordinary text
735 for (i = 1; (c = z[n+i]) != 0 && !isspace((unsigned char)c) && c != '<'; i++) {}
736 if (z[n+i-1] == '.' || z[n+i-1] == '!' || z[n+i-1] == '?') sawdot = 2;
737 if (c == 0) goto incomplete;
738 if (fIPlaintext != 0 && z[n] == '<') {
739 switch (fIPlaintext) {
740 case Html_LISTING:
741 if (i >= 10 && strncasecmp(&z[n], "</listing>", 10) == 0) {
742 fIPlaintext = 0;
743 goto doMarkup;
744 }
745 break;
746
747 case Html_XMP:
748 if (i >= 6 && strncasecmp(&z[n], "</xmp>", 6) == 0) {
749 fIPlaintext = 0;
750 goto doMarkup;
751 }
752 break;
753
754 case Html_TEXTAREA:
755 if (i >= 11 && strncasecmp(&z[n], "</textarea>", 11) == 0) {
756 fIPlaintext = 0;
757 goto doMarkup;
758 }
759 break;
760
761 default:
762 break;
763 }
764 }
765 pElem = new TGHtmlTextElement(i);
766 if (pElem == 0) goto incomplete;
767 TGHtmlTextElement *tpElem = (TGHtmlTextElement *) pElem;
768 tpElem->fElId = ++fIdind;
769 tpElem->fOffs = n;
770 strncpy(tpElem->fZText, &z[n], i);
771 tpElem->fZText[i] = 0;
772 AppendElement(pElem);
773 if (fIPlaintext == 0 || fIPlaintext == Html_TEXTAREA) {
775 }
776 pElem->fCount = (Html_16_t) strlen(tpElem->fZText);
777 n += i;
778 inpCol += i;
779
780 } else if (strncmp(&z[n], "<!--", 4) == 0) {
781
782 // An HTML comment. Just skip it.
783 for (i = 4; z[n+i]; i++) {
784 if (z[n+i] == '-' && strncmp(&z[n+i], "-->", 3) == 0) break;
785 }
786 if (z[n+i] == 0) goto incomplete;
787
788 pElem = new TGHtmlTextElement(i);
789 if (pElem == 0) goto incomplete;
790 TGHtmlTextElement *tpElem = (TGHtmlTextElement *) pElem;
791 tpElem->fType = Html_COMMENT;
792 tpElem->fElId = ++fIdind;
793 tpElem->fOffs = n;
794 strncpy(tpElem->fZText, &z[n+4], i-4);
795 tpElem->fZText[i-4] = 0;
796 tpElem->fCount = 0;
797 AppendElement(pElem);
798
799 pElem = new TGHtmlElement(Html_EndCOMMENT);
800 AppToken(pElem, 0, n+4);
801
802 for (j = 0; j < i+3; j++) {
803 inpCol = NextColumn(inpCol, z[n+j]);
804 }
805 n += i + 3;
806
807 }
808 else {
809
810 // Markup.
811 //
812 // First get the name of the markup
813doMarkup:
814 argc = 1;
815 argv[0] = &z[n+1];
816 for (i = 1;
817 (c = z[n+i]) != 0 && !isspace((unsigned char)c) && c != '>' && (i < 2 || c != '/');
818 i++) {}
819 arglen[0] = i - 1;
820 if (c == 0) goto incomplete;
821
822 // Now parse up the arguments
823
824 while (isspace((unsigned char)z[n+i])) ++i;
825 while ((c = z[n+i]) != 0 && c != '>' && (c != '/' || z[n+i+1] != '>')) {
826 if (argc > mxARG - 3) argc = mxARG - 3;
827 argv[argc] = &z[n+i];
828 j = 0;
829 while ((c = z[n+i+j]) != 0 && !isspace((unsigned char)c) && c != '>' &&
830 c != '=' && (c != '/' || z[n+i+j+1] != '>')) ++j;
831 arglen[argc] = j;
832 if (c == 0) goto incomplete;
833 i += j;
834 while (isspace((unsigned char)c)) {
835 i++;
836 c = z[n+i];
837 }
838 if (c == 0) goto incomplete;
839 argc++;
840 if (c != '=') {
841 argv[argc] = null;
842 arglen[argc] = 0;
843 argc++;
844 continue;
845 }
846 i++;
847 c = z[n+i];
848 while (isspace((unsigned char)c)) {
849 i++;
850 c = z[n+i];
851 }
852 if (c == 0) goto incomplete;
853 if (c == '\'' || c == '"') {
854 int cQuote = c;
855 i++;
856 argv[argc] = &z[n+i];
857 for (j = 0; (c = z[n+i+j]) != 0 && c != cQuote; j++) {}
858 if (c == 0) goto incomplete;
859 arglen[argc] = j;
860 i += j+1;
861 } else {
862 argv[argc] = &z[n+i];
863 for (j = 0; (c = z[n+i+j]) != 0 && !isspace((unsigned char)c) && c != '>'; j++) {}
864 if (c == 0) goto incomplete;
865 arglen[argc] = j;
866 i += j;
867 }
868 argc++;
869 while (isspace(z[n+i])) ++i;
870 }
871 if (c == '/') {
872 i++;
873 c = z[n+i];
874 selfClose = 1;
875 } else {
876 selfClose = 0;
877 }
878 if (c == 0) goto incomplete;
879 for (j = 0; j < i+1; j++) {
880 inpCol = NextColumn(inpCol, z[n+j]);
881 }
882 n += i + 1;
883
884 // Lookup the markup name in the hash table
885
886 if (!gIsInit) {
887 HtmlHashInit();
888 gIsInit = 1;
889 }
890 c = argv[0][arglen[0]];
891 argv[0][arglen[0]] = 0;
892 h = HtmlHash(argv[0]);
893 for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
894 if (strcasecmp(pMap->fZName, argv[0]) == 0) break;
895 }
896 argv[0][arglen[0]] = c;
897 if (pMap == 0) continue; // Ignore unknown markup
898
899makeMarkupEntry:
900 // Construct a TGHtmlMarkupElement object for this markup.
901
902 pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, argc, arglen, argv);
903 if (pElem == 0) goto incomplete;
904
905 pElem->fElId = ++fIdind;
906 pElem->fOffs = n;
907
908 AddFormInfo(pElem);
909
910 // The new markup has now been constructed in pElem. But before
911 // appending it to the list, check to see if there is a special
912 // handler for this markup type.
913
914 if (ProcessToken(pElem, pMap->fZName, pMap->fType)) {
915 // delete pElem;
916
917 // Tricky, tricky. The user function might have caused the p->fZText
918 // pointer to change, so renew our copy of that pointer.
919
920 z = fZText;
921 if (z == 0) {
922 n = 0;
923 inpCol = 0;
924 goto incomplete;
925 }
926 continue;
927 }
928
929 // No special handler for this markup. Just append it to the
930 // list of all tokens.
931
932 AppendElement(pElem);
933 switch (pMap->fType) {
934 case Html_TABLE:
935 break;
936
937 case Html_PLAINTEXT:
938 case Html_LISTING:
939 case Html_XMP:
940 case Html_TEXTAREA:
941 fIPlaintext = pMap->fType;
942 break;
943
944 case Html_NOFRAMES:
945 if (!fHasFrames) break;
946#ifdef pIsInMeachnism
947 pIsInNoFrames = 1;
948#endif
949 case Html_NOSCRIPT:
950 break;
951 // coverity[unreachable]
952 if (!fHasScript) break;
953#ifdef pIsInMeachnism
954 pIsInNoScript = 1;
955#endif
956 case Html_SCRIPT:
957#ifdef pIsInMeachnism
958 pIsInScript = 1;
959#endif
960 // fallthrough
961 case Html_STYLE:
962 fPScript = (TGHtmlScript *) pElem;
963 break;
964
965 case Html_LI:
966 if (!fAddEndTags) break;
967 if (inLi) {
969 AppToken(e, pElem, n);
970 } else {
971 inLi = 1;
972 }
973 break;
974
975 case Html_EndLI:
976 inLi=0;
977 break;
978
979 case Html_EndOL:
980 case Html_EndUL:
981 if (!fAddEndTags) break;
982 if (inLi) {
984 AppToken(e, pElem, n);
985 } else {
986 inLi = 0;
987 }
988 break;
989
990 default:
991 break;
992 }
993
994 // If this is self-closing markup (ex: <br/> or <img/>) then
995 // synthesize a closing token.
996
997 if (selfClose && argv[0][0] != '/' &&
998 strcmp(&pMap[1].fZName[1], pMap->fZName) == 0) {
999 selfClose = 0;
1000 pMap++;
1001 argc = 1;
1002 goto makeMarkupEntry;
1003 }
1004 }
1005 }
1006
1007incomplete:
1008 fICol = inpCol;
1009 ////fPScript = 0;
1010
1011 return n;
1012}
1013
1014/************************** End HTML Tokenizer Code ***************************/
1015
1016////////////////////////////////////////////////////////////////////////////////
1017/// Make one markup entry.
1018
1020 int arglen[], char *argv[])
1021{
1023
1024 switch (objType) {
1025 case O_HtmlCell:
1026 e = new TGHtmlCell(type, argc, arglen, argv);
1027 break;
1028
1029 case O_HtmlTable:
1030 e = new TGHtmlTable(type, argc, arglen, argv);
1031 break;
1032
1033 case O_HtmlRef:
1034 e = new TGHtmlRef(type, argc, arglen, argv);
1035 break;
1036
1037 case O_HtmlLi:
1038 e = new TGHtmlLi(type, argc, arglen, argv);
1039 break;
1040
1041 case O_HtmlListStart:
1042 e = new TGHtmlListStart(type, argc, arglen, argv);
1043 break;
1044
1045 case O_HtmlImageMarkup:
1046 e = new TGHtmlImageMarkup(type, argc, arglen, argv);
1047 break;
1048
1049 case O_HtmlInput:
1050 e = new TGHtmlInput(type, argc, arglen, argv);
1051 break;
1052
1053 case O_HtmlForm:
1054 e = new TGHtmlForm(type, argc, arglen, argv);
1055 break;
1056
1057 case O_HtmlHr:
1058 e = new TGHtmlHr(type, argc, arglen, argv);
1059 break;
1060
1061 case O_HtmlAnchor:
1062 e = new TGHtmlAnchor(type, argc, arglen, argv);
1063 break;
1064
1065 case O_HtmlScript:
1066 e = new TGHtmlScript(type, argc, arglen, argv);
1067 break;
1068
1069 case O_HtmlMapArea:
1070 e = new TGHtmlMapArea(type, argc, arglen, argv);
1071 break;
1072
1073 default:
1074 e = new TGHtmlMarkupElement(type, argc, arglen, argv);
1075 break;
1076 }
1077
1078 return e;
1079}
1080
1081////////////////////////////////////////////////////////////////////////////////
1082/// Append text to the tokenizer engine.
1083
1085{
1086 int len = strlen(text);
1087
1088 if (fNText == 0) {
1089 fNAlloc = len + 100;
1090 fZText = new char [fNAlloc];
1091 } else if (fNText + len >= fNAlloc) {
1092 fNAlloc += len + 100;
1093 char *tmp = new char[fNAlloc];
1094 strlcpy(tmp, fZText, fNAlloc);
1095 delete[] fZText;
1096 fZText = tmp;
1097 }
1098
1099 if (fZText == 0) {
1100 fNText = 0;
1101 UNTESTED;
1102 return;
1103 }
1104
1105 strlcpy(&fZText[fNText], text, fNAlloc - fNText);
1106 fNText += len;
1107 fNComplete = Tokenize();
1108}
1109
1110////////////////////////////////////////////////////////////////////////////////
1111/// This routine takes a text representation of a token, converts it into an
1112/// TGHtmlElement object and inserts it immediately prior to pToken. If pToken
1113/// is 0, then the newly created TGHtmlElement is appended.
1114///
1115/// This routine does nothing to resize, restyle, relayout or redisplay
1116/// the HTML. That is the calling routines responsibility.
1117///
1118/// Return the new TGHtmlElement object if successful. Return zero if
1119/// zType is not a known markup name.
1120///
1121/// pToken - Insert before this. Append if pToken == 0
1122/// zType - Type of markup. Ex: "/a" or "table"
1123/// zArgs - List of arguments
1124/// offs - Calculate offset, and insert changed text into fZText!
1125
1127 char *zType, char *zArgs, int offs)
1128{
1129 SHtmlTokenMap_t *pMap; // For searching the markup name hash table
1130 int h; // The hash on zType
1131 TGHtmlElement *pElem; // The new element
1132 //int nByte; // How many bytes to allocate
1133 //int i; // Loop counter
1134
1135 if (!gIsInit) {
1136 HtmlHashInit();
1137 gIsInit = 1;
1138 }
1139
1140 if (strcmp(zType, "Text") == 0) {
1141 pElem = new TGHtmlTextElement(zArgs ? strlen(zArgs) : 0);
1142 if (pElem == 0) return 0;
1143 if (zArgs) {
1144 // coverity[secure_coding]
1145 strcpy (((TGHtmlTextElement *)pElem)->fZText, zArgs); // NOLINT
1146 pElem->fCount = (Html_16_t) strlen(zArgs);
1147 }
1148 } else if (!strcmp(zType, "Space")) {
1149 pElem = new TGHtmlSpaceElement();
1150 if (pElem == 0) return 0;
1151 } else {
1152 h = HtmlHash(zType);
1153 for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
1154 if (strcasecmp(pMap->fZName, zType) == 0) break;
1155 }
1156 if (pMap == 0) return 0;
1157 if (zArgs == 0 || *zArgs == 0) {
1158 // Special case of no arguments. This is a lot easier...
1159 // well... now its the same thing!
1160 pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, 1, 0, 0);
1161 if (pElem == 0) return 0;
1162 } else {
1163 // The general case. There are arguments that need to be parsed
1164 // up. This is slower, but we gotta do it.
1165 //int argc;
1166 //char **argv;
1167 //char *zBuf;
1168
1169#if 0
1170 if (!SplitList(zArgs, &argc, &argv)) return 0;
1171
1172 // shall we insert a dummy argv[0]?
1173
1174 pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, argc/*+1??*/, 0, argv);
1175 if (pElem == 0) return 1;
1176
1177 while (--argc >= 0) if (argv[argc]) delete[] argv[argc];
1178 delete[] argv;
1179#else
1180 return 0;
1181#endif
1182 }
1183 }
1184
1185 pElem->fElId = ++fIdind;
1186
1187 AppToken(pElem, pToken, offs);
1188
1189 return pElem;
1190}
1191
1192////////////////////////////////////////////////////////////////////////////////
1193/// Insert text into text token, or break token into two text tokens.
1194/// Also, handle backspace char by deleting text.
1195/// Should also handle newline char by splitting text.
1196
1197int TGHtml::TextInsertCmd(int /*argc*/, char ** /*argv*/)
1198{
1199#if 0
1200 TGHtmlElement *p, *pElem;
1201 int i, l, n = 0;
1202 int idx = 0;
1203 int ptyp = Html_Unknown;
1204 int istxt = 0;
1205 char *cp = 0, c, *cp2;
1206
1207 if (GetIndex(argv[3], &p, &i) != 0) {
1208 // sprintf(tmp, "malformed index: \"%s\"", argv[3]);
1209 return 0;
1210 }
1211 if (p) {
1212 ptyp = p->fType;
1213 if ((istxt = (ptyp == Html_Text))) {
1214 l = p->fCount;
1215 cp = ((TGHtmlTextElement *)p)->fZText;
1216 }
1217 }
1218 if (argv[2][0] == 'b') { // Break text token into two.
1219 if (!istxt) return 1;
1220 if (i == 0 || i == l) return 1;
1221 pElem = InsertToken(p->fPNext, "Text", cp + i, -1);
1222 cp[i] = 0;
1223 p->fCount = i;
1224 return 1;
1225 }
1226 c = argv[4][0];
1227 if (!c) return 1;
1228 if (c == '\b') {
1229 if ((!istxt) || (!l) || (!i)) {
1230 if (!p) return 1;
1231 if (p->fType == Html_BR)
1232 RemoveElements(p, p);
1233 return 1;
1234 }
1235 if (p && l == 1) {
1236 RemoveElements(p, p);
1237 return 1;
1238 }
1239 if (i == l)
1240 cp[p->fCount] = 0;
1241 else
1242 memcpy(cp+i-1, cp+i, l-i+1);
1243
1244 cp[--p->fCount] = 0;
1245 if (ins.i-- <= 0) ins.i = 0;
1246 ins.p = p;
1247 return 1;
1248 }
1249 if (c == '\n' || c == '\r') {
1250 }
1251 if (istxt) {
1252 char *cp;
1253 int t, j, alen = strlen(argv[4]);
1254 n = alen + l;
1255
1257
1258 if (text->fZText == (char*) ((&text->fZText)+1)) {
1259 cp = new char[n+1];
1260 strcpy(cp, text->fZText);
1261 } else {
1262 cp = new char[n+1];
1263 strcpy(cp, text->fZText);
1264 }
1265 cp2 = new char[alen+1];
1266 memcpy(cp2, argv[4], alen+1);
1268 alen = strlen(cp2);
1269 memmove(cp+alen+i, cp+i, l-i+1);
1270 for (j = 0; j < alen; j++) cp[i+j] = cp2[j];
1271 delete[] cp2;
1272 delete[] text->fZText;
1273 text->fZText = cp;
1274 p->fCount = strlen(cp);
1275 ins.p = p;
1276 ins.i = i+alen;
1277 } else {
1278 p = InsertToken(p ? p->fPNext : 0, "Text", argv[4], -1);
1279 AddStyle(p);
1280 i = 0;
1281 ins.p = p;
1282 ins.i = 1;
1283 }
1284 if (p) {
1285 idx = p->base.id;
1286 AddStrOffset(p, argv[4], i);
1287 }
1288#endif
1289 return 1;
1290}
1291
1292////////////////////////////////////////////////////////////////////////////////
1293/// Returns token map matching zType name.
1294
1296{
1297 SHtmlTokenMap_t *pMap; // For searching the markup name hash table
1298 int h; // The hash on zType
1299
1300 if (!gIsInit) {
1301 HtmlHashInit();
1302 gIsInit = 1;
1303 }
1304 h = HtmlHash(zType);
1305 for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
1306 if (strcasecmp(pMap->fZName, zType) == 0) break;
1307 }
1308
1309 return pMap;
1310}
1311
1312////////////////////////////////////////////////////////////////////////////////
1313/// Convert a markup name into a type integer
1314
1315int TGHtml::NameToType(char *zType)
1316{
1317 SHtmlTokenMap_t *pMap = NameToPmap(zType);
1318 return pMap ? pMap->fType : (int)Html_Unknown;
1319}
1320
1321////////////////////////////////////////////////////////////////////////////////
1322/// Convert a type into a symbolic name
1323
1324const char *TGHtml::TypeToName(int type)
1325{
1326 if (type >= Html_A && type <= Html_EndXMP) {
1327 SHtmlTokenMap_t *pMap = gApMap[type - Html_A];
1328 return pMap->fZName;
1329 } else {
1330 return "???";
1331 }
1332}
1333
1334////////////////////////////////////////////////////////////////////////////////
1335/// For debugging purposes, print information about a token
1336
1338{
1339//#ifdef DEBUG
1340 static char zBuf[200];
1341 int j;
1342 const char *zName;
1343
1344 if (p == 0) {
1345 snprintf(zBuf, 200, "NULL");
1346 return zBuf;
1347 }
1348 switch (p->fType) {
1349 case Html_Text:
1350 snprintf(zBuf, 200, "text: \"%.*s\"", p->fCount, ((TGHtmlTextElement *)p)->fZText);
1351 break;
1352
1353 case Html_Space:
1354 if (p->fFlags & HTML_NewLine) {
1355 snprintf(zBuf, 200, "space: \"\\n\"");
1356 } else {
1357 snprintf(zBuf, 200, "space: \" \"");
1358 }
1359 break;
1360
1361 case Html_Block: {
1362 TGHtmlBlock *block = (TGHtmlBlock *) p;
1363 if (block->fN > 0) {
1364 int n = block->fN;
1365 if (n > 150) n = 150;
1366 snprintf(zBuf, 200, "<Block z=\"%.*s\">", n, block->fZ);
1367 } else {
1368 snprintf(zBuf, 200, "<Block>");
1369 }
1370 break;
1371 }
1372
1373 default:
1374 if (p->fType >= HtmlMarkupMap[0].fType
1376 zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1377 } else {
1378 zName = "Unknown";
1379 }
1380 snprintf(zBuf, 200, "markup (%d) <%s", p->fType, zName);
1381 for (j = 1 ; j < p->fCount; j += 2) {
1382 snprintf(&zBuf[strlen(zBuf)], 200-strlen(zBuf), " %s=\"%s\"",
1383 ((TGHtmlMarkupElement *)p)->fArgv[j-1],
1384 ((TGHtmlMarkupElement *)p)->fArgv[j]);
1385 }
1386 // coverity[secure_coding]
1387 strlcat(zBuf, ">", sizeof(zBuf));
1388 break;
1389 }
1390 return zBuf;
1391//#else
1392// return 0;
1393//#endif
1394}
1395
1396////////////////////////////////////////////////////////////////////////////////
1397/// Append all the arguments of the given markup to the given TGString.
1398///
1399/// Example: If the markup is <IMG SRC=image.gif ALT="hello!">
1400/// then the following text is appended to the TGString:
1401///
1402/// "src image.gif alt hello!"
1403///
1404/// Notice how all attribute names are converted to lower case.
1405/// This conversion happens in the parser.
1406
1408{
1409 int i;
1410
1411 for (i = 0; i + 1 < pElem->fCount; i += 2) {
1412 str->Append(pElem->fArgv[i]);
1413 str->Append("=");
1414 str->Append(pElem->fArgv[i+1]);
1415 str->Append(" ");
1416 }
1417}
1418
1419////////////////////////////////////////////////////////////////////////////////
1420/// Returns token name of html element p.
1421
1423{
1424 static char zBuf[200];
1425 //int j;
1426 const char *zName;
1427
1428 zBuf[0] = 0;
1429 if (p == 0) {
1430 // coverity[secure_coding]: zBuf is large enough
1431 strcpy(zBuf, "NULL");
1432 return zBuf;
1433 }
1434 switch (p->fType) {
1435 case Html_Text:
1436 case Html_Space:
1437 break;
1438
1439 case Html_Block:
1440 break;
1441
1442 default:
1443 if (p->fType >= HtmlMarkupMap[0].fType &&
1445 zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1446 } else {
1447 zName = "Unknown";
1448 }
1449 strlcpy(zBuf, zName, sizeof(zBuf));
1450 break;
1451 }
1452
1453 return zBuf;
1454}
1455
1456////////////////////////////////////////////////////////////////////////////////
1457/// Returns token map at location n.
1458
1460{
1461 return HtmlMarkupMap+n;
1462}
1463
1464////////////////////////////////////////////////////////////////////////////////
1465/// Return all tokens between the two elements as a string list.
1466
1468{
1469 TGString *str;
1470 int i;
1471 const char *zName;
1472 char zLine[100];
1473
1474 str = new TGString("");
1475 while (p && p != pEnd) {
1476 switch (p->fType) {
1477 case Html_Block:
1478 break;
1479
1480 case Html_Text:
1481 str->Append("{ Text \"");
1482 str->Append(((TGHtmlTextElement *)p)->fZText);
1483 str->Append("\" } ");
1484 break;
1485
1486 case Html_Space:
1487 snprintf(zLine, 100, "Space %d %d ",
1488 p->fCount, (p->fFlags & HTML_NewLine) != 0);
1489 str->Append(zLine);
1490 break;
1491
1492 case Html_Unknown:
1493 str->Append("Unknown ");
1494 break;
1495
1496 default:
1497 str->Append("{ Markup ");
1498 if (p->fType >= HtmlMarkupMap[0].fType &&
1500 zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1501 } else {
1502 zName = "Unknown";
1503 }
1504 str->Append(zName);
1505 str->Append(" ");
1506 for (i = 0; i < p->fCount; ++i) {
1507 str->Append(((TGHtmlMarkupElement *)p)->fArgv[i]);
1508 str->Append(" ");
1509 }
1510 str->Append("} ");
1511 break;
1512 }
1513 p = p->fPNext;
1514 }
1515
1516 return str;
1517}
1518
1519////////////////////////////////////////////////////////////////////////////////
1520/// Print a list of tokens
1521
1523{
1524 TGHtmlElement *p;
1525
1526 for (p = first; p != last; p = p->fPNext) {
1527 if (p->fType == Html_Block) {
1528 TGHtmlBlock *block = (TGHtmlBlock *) p;
1529 const char *z = block->fZ;
1530 int n = block->fN;
1531 if (n == 0 || z == 0) {
1532 n = 1;
1533 z = "";
1534 }
1535 printf("Block flags=%02x cnt=%d x=%d..%d y=%d..%d z=\"%.*s\"\n",
1536 p->fFlags, p->fCount, block->fLeft, block->fRight,
1537 block->fTop, block->fBottom, n, z);
1538 } else {
1539 printf("Token font=%2d color=%2d align=%d flags=0x%04x name=%s\n",
1540 p->fStyle.fFont, p->fStyle.fColor,
1541 p->fStyle.fAlign, p->fStyle.fFlags, DumpToken(p));
1542 }
1543 }
1544}
#define c(i)
Definition: RSha256.hxx:101
#define h(i)
Definition: RSha256.hxx:106
#define e(i)
Definition: RSha256.hxx:103
void ToLower(char *z)
Convert a string to all lower-case letters.
static struct SgEsc_t * gApEscHash[ESC_HASH_SIZE]
static int EscHash(const char *zName)
static int HtmlHash(const char *zName)
static char gAcMsChar[]
static int gIsInit
static int NextColumn(int iCol, char c)
Compute the new column index following the given character.
static SHtmlTokenMap_t * gApMap[HTML_MARKUP_HASH_SIZE]
void HtmlTranslateEscapes(char *z)
Translate escape sequences in the string "z".
SHtmlTokenMap_t HtmlMarkupMap[]
static struct SgEsc_t gEscSequences[]
Definition: TGHtmlParse.cxx:75
#define mxARG
static void HtmlHashInit(void)
#define ESC_HASH_SIZE
static void EscInit()
@ Html_COMMENT
Definition: TGHtmlTokens.h:74
@ Html_TEXTAREA
Definition: TGHtmlTokens.h:176
@ Html_XMP
Definition: TGHtmlTokens.h:193
@ Html_STYLE
Definition: TGHtmlTokens.h:167
@ Html_SCRIPT
Definition: TGHtmlTokens.h:158
@ Html_LI
Definition: TGHtmlTokens.h:122
@ Html_LISTING
Definition: TGHtmlTokens.h:125
@ Html_TABLE
Definition: TGHtmlTokens.h:172
@ Html_EndUL
Definition: TGHtmlTokens.h:189
@ Html_EndOL
Definition: TGHtmlTokens.h:144
@ Html_NOFRAMES
Definition: TGHtmlTokens.h:139
@ Html_PLAINTEXT
Definition: TGHtmlTokens.h:151
@ Html_Block
Definition: TGHtmlTokens.h:45
@ Html_Space
Definition: TGHtmlTokens.h:43
@ Html_Text
Definition: TGHtmlTokens.h:42
@ Html_A
Definition: TGHtmlTokens.h:46
@ Html_NOSCRIPT
Definition: TGHtmlTokens.h:141
@ Html_EndLI
Definition: TGHtmlTokens.h:123
@ Html_EndXMP
Definition: TGHtmlTokens.h:194
@ Html_BR
Definition: TGHtmlTokens.h:65
@ Html_Unknown
Definition: TGHtmlTokens.h:44
@ Html_EndCOMMENT
Definition: TGHtmlTokens.h:75
#define HTML_MARKUP_HASH_SIZE
Definition: TGHtmlTokens.h:200
#define HTML_MARKUP_COUNT
Definition: TGHtmlTokens.h:199
#define O_HtmlInput
Definition: TGHtml.h:858
#define O_HtmlHr
Definition: TGHtml.h:860
#define O_HtmlTable
Definition: TGHtml.h:853
#define UNTESTED
Definition: TGHtml.h:65
#define HTML_NewLine
Definition: TGHtml.h:276
#define O_HtmlImageMarkup
Definition: TGHtml.h:857
short Html_16_t
Definition: TGHtml.h:137
#define O_HtmlAnchor
Definition: TGHtml.h:861
#define O_HtmlLi
Definition: TGHtml.h:855
#define O_HtmlMapArea
Definition: TGHtml.h:863
#define O_HtmlRef
Definition: TGHtml.h:854
#define O_HtmlCell
Definition: TGHtml.h:852
#define O_HtmlScript
Definition: TGHtml.h:862
#define O_HtmlListStart
Definition: TGHtml.h:856
#define O_HtmlForm
Definition: TGHtml.h:859
int type
Definition: TGX11.cxx:121
#define snprintf
Definition: civetweb.c:1540
Html_u16_t fN
Definition: TGHtml.h:720
Html_u16_t fRight
Definition: TGHtml.h:719
char * fZ
Definition: TGHtml.h:717
Html_u16_t fLeft
Definition: TGHtml.h:719
int fTop
Definition: TGHtml.h:718
int fBottom
Definition: TGHtml.h:718
Html_u8_t fFlags
Definition: TGHtml.h:266
Html_u8_t fType
Definition: TGHtml.h:265
SHtmlStyle_t fStyle
Definition: TGHtml.h:264
TGHtmlElement * fPPrev
Definition: TGHtml.h:263
Html_16_t fCount
Definition: TGHtml.h:267
TGHtmlElement * fPNext
Definition: TGHtml.h:262
int fNStart
Definition: TGHtml.h:684
int fNScript
Definition: TGHtml.h:685
Html_16_t fW
Definition: TGHtml.h:309
char * fZText
Definition: TGHtml.h:301
int fAddEndTags
Definition: TGHtml.h:1254
TGString * ListTokens(TGHtmlElement *p, TGHtmlElement *pEnd)
Return all tokens between the two elements as a string list.
int fICol
Definition: TGHtml.h:1181
void AppToken(TGHtmlElement *pNew, TGHtmlElement *p, int offs)
Insert token pNew before token p.
int fIdind
Definition: TGHtml.h:1279
int TextInsertCmd(int argc, char **argv)
Insert text into text token, or break token into two text tokens.
void TokenizerAppend(const char *text)
Append text to the tokenizer engine.
TGHtmlElement * fPFirst
Definition: TGHtml.h:1136
void AddStyle(TGHtmlElement *p)
This routine adds information to the input texts that doesn't change when the display is resized or w...
virtual char * ProcessScript(TGHtmlScript *)
Definition: TGHtml.h:958
int fNToken
Definition: TGHtml.h:1138
TGHtmlElement * InsertToken(TGHtmlElement *pToken, char *zType, char *zArgs, int offs)
This routine takes a text representation of a token, converts it into an TGHtmlElement object and ins...
const char * TypeToName(int type)
Convert a type into a symbolic name.
int Tokenize()
Process as much of the input HTML as possible.
int NameToType(char *zType)
Convert a markup name into a type integer.
int fIPlaintext
Definition: TGHtml.h:1183
void AppendArglist(TGString *str, TGHtmlMarkupElement *pElem)
Append all the arguments of the given markup to the given TGString.
char * fZText
Definition: TGHtml.h:1176
int fNAlloc
Definition: TGHtml.h:1178
void AppendElement(TGHtmlElement *pElem)
Append the given TGHtmlElement to the tokenizers list of elements.
virtual int ProcessToken(TGHtmlElement *, const char *, int)
Definition: TGHtml.h:928
TGHtmlScript * fPScript
Definition: TGHtml.h:1187
int fHasFrames
Definition: TGHtml.h:1253
TGHtmlMarkupElement * MakeMarkupEntry(int objType, int type, int argc, int arglen[], char *argv[])
Make one markup entry.
TGHtmlElement * fPLast
Definition: TGHtml.h:1137
SHtmlTokenMap_t * GetMarkupMap(int n)
Returns token map at location n.
int fNComplete
Definition: TGHtml.h:1179
SHtmlTokenMap_t * NameToPmap(char *zType)
Returns token map matching zType name.
char * DumpToken(TGHtmlElement *p)
For debugging purposes, print information about a token.
char * GetTokenName(TGHtmlElement *p)
Returns token name of html element p.
int GetIndex(const char *zIndex, TGHtmlElement **ppToken, int *pIndex)
This routine decodes a complete index specification.
int fHasScript
Definition: TGHtml.h:1252
void PrintList(TGHtmlElement *first, TGHtmlElement *last)
Print a list of tokens.
void AddFormInfo(TGHtmlElement *p)
Add the DOM control information for form elements.
Definition: TGHtmlForm.cxx:565
int fNText
Definition: TGHtml.h:1177
TGString wraps a TString and adds some graphics routines like drawing, size of string on screen depen...
Definition: TGString.h:20
TString & Append(const char *cs)
Definition: TString.h:564
TText * text
const Int_t n
Definition: legend1.C:16
null_t< F > null()
Definition: first.py:1
const char * cnt
Definition: TXMLSetup.cxx:75
unsigned int fColor
Definition: TGHtml.h:146
unsigned int fAlign
Definition: TGHtml.h:148
unsigned int fFont
Definition: TGHtml.h:145
unsigned int fFlags
Definition: TGHtml.h:151
Html_16_t fObjType
Definition: TGHtml.h:843
Html_16_t fType
Definition: TGHtml.h:842
SHtmlTokenMap_t * fPCollide
Definition: TGHtml.h:844
const char * fZName
Definition: TGHtml.h:841
char fValue[8]
Definition: TGHtmlParse.cxx:68
SgEsc_t * fPNext
Definition: TGHtmlParse.cxx:69
const char * fZName
Definition: TGHtmlParse.cxx:67
auto * l
Definition: textangle.C:4
static uint64_t sum(uint64_t i)
Definition: Factory.cxx:2345