Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TGHtmlParse.cxx
Go to the documentation of this file.
1// $Id: TGHtmlParse.cxx,v 1.1 2007/05/04 17:07:01 brun Exp $
2// Author: Valeriy Onuchin 03/05/2007
3
4/*************************************************************************
5 * Copyright (C) 1995-2001, Rene Brun, Fons Rademakers and Reiner Rohlfs *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/**************************************************************************
13
14 HTML widget for xclass. Based on tkhtml 1.28
15 Copyright (C) 1997-2000 D. Richard Hipp <drh@acm.org>
16 Copyright (C) 2002-2003 Hector Peraza.
17
18 This library is free software; you can redistribute it and/or
19 modify it under the terms of the GNU Library General Public
20 License as published by the Free Software Foundation; either
21 version 2 of the License, or (at your option) any later version.
22
23 This library is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 Library General Public License for more details.
27
28 You should have received a copy of the GNU Library General Public
29 License along with this library; if not, write to the Free
30 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
31
32**************************************************************************/
33
34// A tokenizer that converts raw HTML into a linked list of HTML elements.
35
36#include <cstring>
37#include <cstdlib>
38#include <cstdio>
39#include <cctype>
40
41#include "TGHtml.h"
42#include "TGHtmlTokens.h"
43#include "strlcpy.h"
44#include "snprintf.h"
45
46//----------------------------------------------------------------------
47
49
50
51/****************** Begin Escape Sequence Translator *************/
52
53// The next section of code implements routines used to translate
54// the '&' escape sequences of SGML to individual characters.
55// Examples:
56//
57// &amp; &
58// &lt; <
59// &gt; >
60// &nbsp; nonbreakable space
61//
62
63// Each escape sequence is recorded as an instance of the following
64// structure
65
66struct SgEsc_t {
67 const char *fZName; // The name of this escape sequence. ex: "amp"
68 char fValue[8]; // The value for this sequence. ex: "&"
69 SgEsc_t *fPNext; // Next sequence with the same hash on zName
70};
71
72// The following is a table of all escape sequences. Add new sequences
73// by adding entries to this table.
74
75static struct SgEsc_t gEscSequences[] = {
76 { "quot", "\"", 0 },
77 { "amp", "&", 0 },
78 { "lt", "<", 0 },
79 { "gt", ">", 0 },
80 { "nbsp", " ", 0 },
81 { "iexcl", "\241", 0 },
82 { "cent", "\242", 0 },
83 { "pound", "\243", 0 },
84 { "curren", "\244", 0 },
85 { "yen", "\245", 0 },
86 { "brvbar", "\246", 0 },
87 { "sect", "\247", 0 },
88 { "uml", "\250", 0 },
89 { "copy", "\251", 0 },
90 { "ordf", "\252", 0 },
91 { "laquo", "\253", 0 },
92 { "not", "\254", 0 },
93 { "shy", "\255", 0 },
94 { "reg", "\256", 0 },
95 { "macr", "\257", 0 },
96 { "deg", "\260", 0 },
97 { "plusmn", "\261", 0 },
98 { "sup2", "\262", 0 },
99 { "sup3", "\263", 0 },
100 { "acute", "\264", 0 },
101 { "micro", "\265", 0 },
102 { "para", "\266", 0 },
103 { "middot", "\267", 0 },
104 { "cedil", "\270", 0 },
105 { "sup1", "\271", 0 },
106 { "ordm", "\272", 0 },
107 { "raquo", "\273", 0 },
108 { "frac14", "\274", 0 },
109 { "frac12", "\275", 0 },
110 { "frac34", "\276", 0 },
111 { "iquest", "\277", 0 },
112 { "Agrave", "\300", 0 },
113 { "Aacute", "\301", 0 },
114 { "Acirc", "\302", 0 },
115 { "Atilde", "\303", 0 },
116 { "Auml", "\304", 0 },
117 { "Aring", "\305", 0 },
118 { "AElig", "\306", 0 },
119 { "Ccedil", "\307", 0 },
120 { "Egrave", "\310", 0 },
121 { "Eacute", "\311", 0 },
122 { "Ecirc", "\312", 0 },
123 { "Euml", "\313", 0 },
124 { "Igrave", "\314", 0 },
125 { "Iacute", "\315", 0 },
126 { "Icirc", "\316", 0 },
127 { "Iuml", "\317", 0 },
128 { "ETH", "\320", 0 },
129 { "Ntilde", "\321", 0 },
130 { "Ograve", "\322", 0 },
131 { "Oacute", "\323", 0 },
132 { "Ocirc", "\324", 0 },
133 { "Otilde", "\325", 0 },
134 { "Ouml", "\326", 0 },
135 { "times", "\327", 0 },
136 { "Oslash", "\330", 0 },
137 { "Ugrave", "\331", 0 },
138 { "Uacute", "\332", 0 },
139 { "Ucirc", "\333", 0 },
140 { "Uuml", "\334", 0 },
141 { "Yacute", "\335", 0 },
142 { "THORN", "\336", 0 },
143 { "szlig", "\337", 0 },
144 { "agrave", "\340", 0 },
145 { "aacute", "\341", 0 },
146 { "acirc", "\342", 0 },
147 { "atilde", "\343", 0 },
148 { "auml", "\344", 0 },
149 { "aring", "\345", 0 },
150 { "aelig", "\346", 0 },
151 { "ccedil", "\347", 0 },
152 { "egrave", "\350", 0 },
153 { "eacute", "\351", 0 },
154 { "ecirc", "\352", 0 },
155 { "euml", "\353", 0 },
156 { "igrave", "\354", 0 },
157 { "iacute", "\355", 0 },
158 { "icirc", "\356", 0 },
159 { "iuml", "\357", 0 },
160 { "eth", "\360", 0 },
161 { "ntilde", "\361", 0 },
162 { "ograve", "\362", 0 },
163 { "oacute", "\363", 0 },
164 { "ocirc", "\364", 0 },
165 { "otilde", "\365", 0 },
166 { "ouml", "\366", 0 },
167 { "divide", "\367", 0 },
168 { "oslash", "\370", 0 },
169 { "ugrave", "\371", 0 },
170 { "uacute", "\372", 0 },
171 { "ucirc", "\373", 0 },
172 { "uuml", "\374", 0 },
173 { "yacute", "\375", 0 },
174 { "thorn", "\376", 0 },
175 { "yuml", "\377", 0 },
176};
177
178
179// The size of the handler hash table. For best results this should
180// be a prime number which is about the same size as the number of
181// escape sequences known to the system.
182
183#define ESC_HASH_SIZE (sizeof(gEscSequences)/sizeof(gEscSequences[0])+7)
184
185
186// The hash table
187//
188// If the name of an escape sequence hashes to the value H, then
189// gApEscHash[H] will point to a linked list of Esc structures, one of
190// which will be the Esc structure for that escape sequence.
191
192static struct SgEsc_t *gApEscHash[ESC_HASH_SIZE];
193
194
195// Hash a escape sequence name. The value returned is an integer
196// between 0 and ESC_HASH_SIZE-1, inclusive.
197
198static int EscHash(const char *zName) {
199 int h = 0; // The hash value to be returned
200 char c; // The next character in the name being hashed
201
202 while ((c = *zName) != 0) {
203 h = h<<5 ^ h ^ c;
204 zName++;
205 }
206 if (h < 0) h = -h;
207
208 return h % ESC_HASH_SIZE;
209}
210
211#ifdef TEST
212// Compute the longest and average collision chain length for the
213// escape sequence hash table
214
215static void EscHashStats()
216{
217 int i;
218 int sum = 0;
219 int max = 0;
220 int cnt;
221 int notempty = 0;
222 struct SgEsc_t *p;
223
224 for (i = 0; i < sizeof(gEscSequences) / sizeof(gEscSequences[0]); i++) {
225 cnt = 0;
226 p = gApEscHash[i];
227 if (p) notempty++;
228 while (p) {
229 ++cnt;
230 p = p->fPNext;
231 }
232 sum += cnt;
233 if (cnt > max) max = cnt;
234 }
235 printf("Longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
236 max, (double)sum/(double)notempty, i, i-notempty,
237 100.0*(i-notempty)/(double)i);
238}
239#endif
240
241// Initialize the escape sequence hash table
242
243static void EscInit() {
244 int i; /* For looping thru the list of escape sequences */
245 int h; /* The hash on a sequence */
246
247 for (i = 0; i < int(sizeof(gEscSequences) / sizeof(gEscSequences[i])); i++) {
248/* #ifdef XCLASS_UTF_MAX */
249#if 0
250 int c = gEscSequences[i].value[0];
251 xclass::UniCharToUtf(c, gEscSequences[i].value);
252 }
253#endif
254 h = EscHash(gEscSequences[i].fZName);
255 gEscSequences[i].fPNext = gApEscHash[h];
257 }
258#ifdef TEST
259 EscHashStats();
260#endif
261}
262
263
264// This table translates the non-standard microsoft characters between 0x80
265// and 0x9f into plain ASCII so that the characters will be visible on Unix
266// systems. Care is taken to translate the characters into values less than
267// 0x80, to avoid UTF-8 problems.
268
269static char gAcMsChar[] = {
270 /* 0x80 */ 'C',
271 /* 0x81 */ ' ',
272 /* 0x82 */ ',',
273 /* 0x83 */ 'f',
274 /* 0x84 */ '"',
275 /* 0x85 */ '.',
276 /* 0x86 */ '*',
277 /* 0x87 */ '*',
278 /* 0x88 */ '^',
279 /* 0x89 */ '%',
280 /* 0x8a */ 'S',
281 /* 0x8b */ '<',
282 /* 0x8c */ 'O',
283 /* 0x8d */ ' ',
284 /* 0x8e */ 'Z',
285 /* 0x8f */ ' ',
286 /* 0x90 */ ' ',
287 /* 0x91 */ '\'',
288 /* 0x92 */ '\'',
289 /* 0x93 */ '"',
290 /* 0x94 */ '"',
291 /* 0x95 */ '*',
292 /* 0x96 */ '-',
293 /* 0x97 */ '-',
294 /* 0x98 */ '~',
295 /* 0x99 */ '@',
296 /* 0x9a */ 's',
297 /* 0x9b */ '>',
298 /* 0x9c */ 'o',
299 /* 0x9d */ ' ',
300 /* 0x9e */ 'z',
301 /* 0x9f */ 'Y',
302};
303
304
305////////////////////////////////////////////////////////////////////////////////
306/// Translate escape sequences in the string "z". "z" is overwritten
307/// with the translated sequence.
308///
309/// Unrecognized escape sequences are unaltered.
310///
311/// Example:
312///
313/// input = "AT&amp;T &gt MCI"
314/// output = "AT&T > MCI"
315
317{
318 int from; // Read characters from this position in z[]
319 int to; // Write characters into this position in z[]
320 int h; // A hash on the escape sequence
321 struct SgEsc_t *p; // For looping down the escape sequence collision chain
322 static int isInit = 0; // True after initialization
323
324 from = to = 0;
325 if (!isInit) {
326 EscInit();
327 isInit = 1;
328 }
329 while (z[from]) {
330 if (z[from] == '&') {
331 if (z[from+1] == '#') {
332 int i = from + 2;
333 int v = 0;
334 while (isdigit(z[i])) {
335 v = v*10 + z[i] - '0';
336 i++;
337 }
338 if (z[i] == ';') { i++; }
339
340 // Translate the non-standard microsoft characters in the range of
341 // 0x80 to 0x9f into something we can see.
342
343 if (v >= 0x80 && v < 0xa0) {
344 v = gAcMsChar[v & 0x1f];
345 }
346
347 // Put the character in the output stream in place of the "&#000;".
348 // How we do this depends on whether or not we are using UTF-8.
349
350 z[to++] = v;
351 from = i;
352 } else {
353 int i = from+1;
354 int c;
355 while (z[i] && isalnum(z[i])) ++i;
356 c = z[i];
357 z[i] = 0;
358 h = EscHash(&z[from+1]);
359 p = gApEscHash[h];
360 while (p && strcmp(p->fZName, &z[from+1]) != 0) p = p->fPNext;
361 z[i] = c;
362 if (p) {
363 int j;
364 for (j = 0; p->fValue[j]; ++j) z[to++] = p->fValue[j];
365 from = i;
366 if (c == ';') from++;
367 } else {
368 z[to++] = z[from++];
369 }
370 }
371
372 // Look for the non-standard microsoft characters between 0x80 and 0x9f
373 // and translate them into printable ASCII codes. Separate algorithms
374 // are required to do this for plain ascii and for utf-8.
375
376 } else if (((unsigned char) z[from]) >= 0x80 &&
377 ((unsigned char) z[from]) < 0xa0) {
378 z[to++] = gAcMsChar[z[from++] & 0x1f];
379 } else {
380 z[to++] = z[from++];
381 }
382 }
383 z[to] = 0;
384}
385
386/******************* End Escape Sequence Translator ***************/
387
388/******************* Begin HTML tokenizer code *******************/
389
390// The following variable becomes TRUE when the markup hash table
391// (stored in HtmlMarkupMap[]) is initialized.
392
393static int gIsInit = 0;
394
395// The hash table for HTML markup names.
396//
397// If an HTML markup name hashes to H, then gApMap[H] will point to
398// a linked list of sgMap structure, one of which will describe the
399// the particular markup (if it exists.)
400
402
403// Hash a markup name
404//
405// HTML markup is case insensitive, so this function will give the
406// same hash regardless of the case of the markup name.
407//
408// The value returned is an integer between 0 and HTML_MARKUP_HASH_SIZE-1,
409// inclusive.
410
411static int HtmlHash(const char *zName) {
412 int h = 0;
413 char c;
414
415 while ((c = *zName) != 0) {
416 if (isupper(c)) { // do we have to check for this??????
417 c = tolower(c);
418 }
419 h = h<<5 ^ h ^ c;
420 zName++;
421 }
422 if (h < 0) {
423 h = -h;
424 }
425
426 return h % HTML_MARKUP_HASH_SIZE;
427}
428
429
430#ifdef TEST
431// Compute the longest and average collision chain length for the
432// markup hash table
433
434static void HtmlHashStats() {
435 int i;
436 int sum = 0;
437 int max = 0;
438 int cnt;
439 int notempty = 0;
440 struct sgMap *p;
441
442 for (i = 0; i < HTML_MARKUP_COUNT; i++) {
443 cnt = 0;
444 p = gApMap[i];
445 if (p) notempty++;
446 while (p) {
447 cnt++;
448 p = p->fPCollide;
449 }
450 sum += cnt;
451 if (cnt > max) max = cnt;
452 }
453
454 printf("longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
455 max, (double)sum/(double)notempty, i, i-notempty,
456 100.0*(i-notempty)/(double)i);
457}
458#endif
459
460
461// Initialize the escape sequence hash table
462
463static void HtmlHashInit(void){
464 int i;
465 int h; // The hash on a markup name
466
467 for (i = 0; i < HTML_MARKUP_COUNT; i++) {
468 h = HtmlHash(HtmlMarkupMap[i].fZName);
470 gApMap[h] = &HtmlMarkupMap[i];
471 }
472#ifdef TEST
473 HtmlHashStats();
474#endif
475}
476
477////////////////////////////////////////////////////////////////////////////////
478/// Append the given TGHtmlElement to the tokenizers list of elements
479
481{
482 pElem->fPNext = 0;
483 pElem->fPPrev = fPLast;
484 if (fPFirst == 0) {
485 fPFirst = pElem;
486 } else {
487 fPLast->fPNext = pElem;
488 }
489 fPLast = pElem;
490 fNToken++;
491}
492
493////////////////////////////////////////////////////////////////////////////////
494/// Insert token pNew before token p
495
497{
498 if (offs < 0) {
499 if (p) {
500 offs = p->fOffs;
501 } else {
502 offs = fNText;
503 }
504 }
505
506////if (p) { pNew->fStyle = p->fStyle; pNew->fFlags = p->fFlags; }
507
508// pNew->fCount = 0;
509 pNew->fOffs = offs;
510 pNew->fPNext = p;
511 if (p) {
512 pNew->fElId = p->fElId;
513 p->fElId = ++fIdind;
514 pNew->fPPrev = p->fPPrev;
515 if (p->fPPrev) p->fPPrev->fPNext = pNew;
516 if (fPFirst == p) fPFirst = pNew;
517 p->fPPrev = pNew;
518 } else {
519 pNew->fElId = ++fIdind;
520 AppendElement(pNew);
521 }
522 fNToken++;
523}
524
525////////////////////////////////////////////////////////////////////////////////
526/// Compute the new column index following the given character.
527
528static int NextColumn(int iCol, char c)
529{
530 switch (c) {
531 case '\n': return 0;
532 case '\t': return (iCol | 7) + 1;
533 default: return iCol+1;
534 }
535 /* NOT REACHED */
536}
537
538////////////////////////////////////////////////////////////////////////////////
539/// Convert a string to all lower-case letters.
540
541void ToLower(char *z)
542{
543 while (*z) {
544 if (isupper(*z)) *z = tolower(*z);
545 z++;
546 }
547}
548
549////////////////////////////////////////////////////////////////////////////////
550/// Process as much of the input HTML as possible. Construct new
551/// TGHtmlElement objects and appended them to the list. Return
552/// the number of characters actually processed.
553///
554/// This routine may invoke a callback procedure which could delete
555/// the HTML widget.
556///
557/// This routine is not reentrant for the same HTML widget. To
558/// prevent reentrancy (during a callback), the p->fICol field is
559/// set to a negative number. This is a flag to future invocations
560/// not to reentry this routine. The p->fICol field is restored
561/// before exiting, of course.
562
564{
565 char *z; // The input HTML text
566 int c; // The next character of input
567 int n; // Number of characters processed so far
568 int inpCol; // Column of input
569 int i, j; // Loop counters
570 int h; // Result from HtmlHash()
571 TGHtmlElement *pElem;// A new HTML element
572 int selfClose; // True for content free elements. Ex: <br/>
573 int argc; // The number of arguments on a markup
574 SHtmlTokenMap_t *pMap; // For searching the markup name hash table
575# define mxARG 200 // Maximum number of parameters in a single markup
576 char *argv[mxARG]; // Pointers to each markup argument.
577 int arglen[mxARG]; // Length of each markup argument
578 //int rl, ol;
579#ifdef pIsInMeachnism
580 int pIsInScript = 0;
581 int pIsInNoScript = 0;
582 int pIsInNoFrames = 0;
583#endif
584 int sawdot = 0;
585 int inLi = 0;
586
587 static char null[1] = { "" };
588
589 inpCol = fICol;
590 n = fNComplete;
591 z = fZText;
592 if (inpCol < 0) return n; // Prevents recursion
593 fICol = -1;
594 pElem = 0;
595
596 while ((c = z[n]) != 0) {
597
598 sawdot--;
599 if (c == -64 && z[n+1] == -128) {
600 n += 2;
601 continue;
602 }
603
604 if (fPScript) {
605
606 // We are in the middle of <SCRIPT>...</SCRIPT>. Just look for
607 // the </SCRIPT> markup. (later:) Treat <STYLE>...</STYLE> the
608 // same way.
609
610 TGHtmlScript *pScr = fPScript;
611 const char *zEnd;
612 int nEnd;
613 //int curline, curch, curlast = n;
614 int sqcnt;
615 if (pScr->fType == Html_SCRIPT) {
616 zEnd = "</script>";
617 nEnd = 9;
618 } else if (pScr->fType == Html_NOSCRIPT) {
619 zEnd = "</noscript>";
620 nEnd = 11;
621 } else if (pScr->fType == Html_NOFRAMES) {
622 zEnd = "</noframes>";
623 nEnd = 11;
624 } else {
625 zEnd = "</style>";
626 nEnd = 8;
627 }
628 if (pScr->fNStart < 0) {
629 pScr->fNStart = n;
630 pScr->fNScript = 0;
631 }
632 sqcnt = 0;
633 for (i = n /*pScr->fNStart + pScr->fNScript*/; z[i]; i++) {
634 if (z[i] == '\'' || z[i] == '"') {
635 sqcnt++; // Skip if odd # quotes
636 } else if (z[i] == '\n') {
637 sqcnt = 0;
638 }
639 if (z[i] == '<' && z[i+1] == '/' &&
640 strncasecmp(&z[i], zEnd, nEnd) == 0) {
641 if (zEnd[3] == 'c' && ((sqcnt % 2) == 1)) continue;
642 pScr->fNScript = i - n;
643 fPScript = 0;
644 n = i + nEnd;
645 break;
646 }
647 }
648 if (z[i] == 0) goto incomplete;
649 if (fPScript) {
650 pScr->fNScript = i - n;
651 n = i;
652 }
653 else {
654#ifdef pIsInMeachnism
655 // If there is a script, execute it now and insert any output
656 // to the html stream for parsing as html. (ie. client side scripting)
657
658 if (pIsInScript && !pIsInNoScript && !pIsInNoFrames) {
659
660 //for (curch = 0, curline = 1; curch <= curlast; curch++)
661 // if (z[curch] == '\n') curline++;
662
663 // arglist in pElem and text pointers in pScr?
664 // Inline scripts can contain unmatched brackets :-)
665 //char varind[50];
666 //sprintf(varind, "HtmlScrVar%d", p->varind++);
667 //char savech = fZText[pScr->fNStart + pScr->fNScript];
668 //fZText[pScr->fNStart + pScr->fNScript] = 0;
669 //char *scriptBody = StrDup(fZText[pScr->fNStart]);
670 //fZText[pScr->fNStart + pScr->fNScript] = savech;
671 AdvanceLayout(p);
672 inParse++;
673 char *result = ProcessScript((TGHtmlScript *) pElem); // pElem or pScr??
674 inParse--;
675 if (result) {
676 ol = fNAlloc;
677 rl = strlen(result);
678 fNAlloc += rl;
679 z = fZText = HtmlRealloc(z, ol+rl);
680 memmove(z + n + rl, z+n, ol - n);
681 memmove(z + n, result, rl);
682 }
683 }
684 pIsInScript = 0;
685 pIsInNoScript = 0;
686 pIsInNoFrames = 0;
687#endif
688 }
689 //continue;
690
691 }
692 else if (isspace((unsigned char)c)) {
693
694 // White space
695 for (i = 0;
696 (c = z[n+i]) != 0 && isspace((unsigned char)c) && c != '\n' && c != '\r';
697 i++) { }
698 if (c == '\r' && z[n+i+1] == '\n') ++i;
699#if 0 // this is certainly NOT OK, since it alters pre-formatted text
700 if (sawdot == 1) {
701 pElem = new TGHtmlTextElement(2);
702 strcpy(((TGHtmlTextElement *)pElem)->fZText, " ");
703 pElem->fElId = ++fIdind;
704 pElem->fOffs = n;
705 pElem->fCount = 1;
706 AppendElement(pElem);
707 }
708#endif
709 pElem = new TGHtmlSpaceElement;
710 if (pElem == 0) goto incomplete;
711 ((TGHtmlSpaceElement *)pElem)->fW = 0;
712 pElem->fOffs = n;
713 pElem->fElId = ++fIdind;
714 if (c == '\n' || c == '\r') {
715 pElem->fFlags = HTML_NewLine;
716 pElem->fCount = 1;
717 i++;
718 inpCol = 0;
719 } else {
720 int iColStart = inpCol;
721 pElem->fFlags = 0;
722 for (j = 0; j < i; j++) {
723 inpCol = NextColumn(inpCol, z[n+j]);
724 }
725 pElem->fCount = inpCol - iColStart;
726 }
727 AppendElement(pElem);
728 n += i;
729
730 }
731 else if (c != '<' || fIPlaintext != 0 ||
732 (!isalpha(z[n+1]) && z[n+1] != '/' && z[n+1] != '!' && z[n+1] != '?')) {
733
734 // Ordinary text
735 for (i = 1; (c = z[n+i]) != 0 && !isspace((unsigned char)c) && c != '<'; i++) {}
736 if (z[n+i-1] == '.' || z[n+i-1] == '!' || z[n+i-1] == '?') sawdot = 2;
737 if (c == 0) goto incomplete;
738 if (fIPlaintext != 0 && z[n] == '<') {
739 switch (fIPlaintext) {
740 case Html_LISTING:
741 if (i >= 10 && strncasecmp(&z[n], "</listing>", 10) == 0) {
742 fIPlaintext = 0;
743 goto doMarkup;
744 }
745 break;
746
747 case Html_XMP:
748 if (i >= 6 && strncasecmp(&z[n], "</xmp>", 6) == 0) {
749 fIPlaintext = 0;
750 goto doMarkup;
751 }
752 break;
753
754 case Html_TEXTAREA:
755 if (i >= 11 && strncasecmp(&z[n], "</textarea>", 11) == 0) {
756 fIPlaintext = 0;
757 goto doMarkup;
758 }
759 break;
760
761 default:
762 break;
763 }
764 }
765 pElem = new TGHtmlTextElement(i);
766 if (pElem == 0) goto incomplete;
767 TGHtmlTextElement *tpElem = (TGHtmlTextElement *) pElem;
768 tpElem->fElId = ++fIdind;
769 tpElem->fOffs = n;
770 strncpy(tpElem->fZText, &z[n], i);
771 tpElem->fZText[i] = 0;
772 AppendElement(pElem);
773 if (fIPlaintext == 0 || fIPlaintext == Html_TEXTAREA) {
775 }
776 pElem->fCount = (Html_16_t) strlen(tpElem->fZText);
777 n += i;
778 inpCol += i;
779
780 } else if (strncmp(&z[n], "<!--", 4) == 0) {
781
782 // An HTML comment. Just skip it.
783 for (i = 4; z[n+i]; i++) {
784 if (z[n+i] == '-' && strncmp(&z[n+i], "-->", 3) == 0) break;
785 }
786 if (z[n+i] == 0) goto incomplete;
787
788 pElem = new TGHtmlTextElement(i);
789 if (pElem == 0) goto incomplete;
790 TGHtmlTextElement *tpElem = (TGHtmlTextElement *) pElem;
791 tpElem->fType = Html_COMMENT;
792 tpElem->fElId = ++fIdind;
793 tpElem->fOffs = n;
794 strncpy(tpElem->fZText, &z[n+4], i-4);
795 tpElem->fZText[i-4] = 0;
796 tpElem->fCount = 0;
797 AppendElement(pElem);
798
799 pElem = new TGHtmlElement(Html_EndCOMMENT);
800 AppToken(pElem, 0, n+4);
801
802 for (j = 0; j < i+3; j++) {
803 inpCol = NextColumn(inpCol, z[n+j]);
804 }
805 n += i + 3;
806
807 }
808 else {
809
810 // Markup.
811 //
812 // First get the name of the markup
813doMarkup:
814 argc = 1;
815 argv[0] = &z[n+1];
816 for (i = 1;
817 (c = z[n+i]) != 0 && !isspace((unsigned char)c) && c != '>' && (i < 2 || c != '/');
818 i++) {}
819 arglen[0] = i - 1;
820 if (c == 0) goto incomplete;
821
822 // Now parse up the arguments
823
824 while (isspace((unsigned char)z[n+i])) ++i;
825 while ((c = z[n+i]) != 0 && c != '>' && (c != '/' || z[n+i+1] != '>')) {
826 if (argc > mxARG - 3) argc = mxARG - 3;
827 argv[argc] = &z[n+i];
828 j = 0;
829 while ((c = z[n+i+j]) != 0 && !isspace((unsigned char)c) && c != '>' &&
830 c != '=' && (c != '/' || z[n+i+j+1] != '>')) ++j;
831 arglen[argc] = j;
832 if (c == 0) goto incomplete;
833 i += j;
834 while (isspace((unsigned char)c)) {
835 i++;
836 c = z[n+i];
837 }
838 if (c == 0) goto incomplete;
839 argc++;
840 if (c != '=') {
841 argv[argc] = null;
842 arglen[argc] = 0;
843 argc++;
844 continue;
845 }
846 i++;
847 c = z[n+i];
848 while (isspace((unsigned char)c)) {
849 i++;
850 c = z[n+i];
851 }
852 if (c == 0) goto incomplete;
853 if (c == '\'' || c == '"') {
854 int cQuote = c;
855 i++;
856 argv[argc] = &z[n+i];
857 for (j = 0; (c = z[n+i+j]) != 0 && c != cQuote; j++) {}
858 if (c == 0) goto incomplete;
859 arglen[argc] = j;
860 i += j+1;
861 } else {
862 argv[argc] = &z[n+i];
863 for (j = 0; (c = z[n+i+j]) != 0 && !isspace((unsigned char)c) && c != '>'; j++) {}
864 if (c == 0) goto incomplete;
865 arglen[argc] = j;
866 i += j;
867 }
868 argc++;
869 while (isspace(z[n+i])) ++i;
870 }
871 if (c == '/') {
872 i++;
873 c = z[n+i];
874 selfClose = 1;
875 } else {
876 selfClose = 0;
877 }
878 if (c == 0) goto incomplete;
879 for (j = 0; j < i+1; j++) {
880 inpCol = NextColumn(inpCol, z[n+j]);
881 }
882 n += i + 1;
883
884 // Lookup the markup name in the hash table
885
886 if (!gIsInit) {
887 HtmlHashInit();
888 gIsInit = 1;
889 }
890 c = argv[0][arglen[0]];
891 argv[0][arglen[0]] = 0;
892 h = HtmlHash(argv[0]);
893 for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
894 if (strcasecmp(pMap->fZName, argv[0]) == 0) break;
895 }
896 argv[0][arglen[0]] = c;
897 if (pMap == 0) continue; // Ignore unknown markup
898
899makeMarkupEntry:
900 // Construct a TGHtmlMarkupElement object for this markup.
901
902 pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, argc, arglen, argv);
903 if (pElem == 0) goto incomplete;
904
905 pElem->fElId = ++fIdind;
906 pElem->fOffs = n;
907
908 AddFormInfo(pElem);
909
910 // The new markup has now been constructed in pElem. But before
911 // appending it to the list, check to see if there is a special
912 // handler for this markup type.
913
914 if (ProcessToken(pElem, pMap->fZName, pMap->fType)) {
915 // delete pElem;
916
917 // Tricky, tricky. The user function might have caused the p->fZText
918 // pointer to change, so renew our copy of that pointer.
919
920 z = fZText;
921 if (z == 0) {
922 n = 0;
923 inpCol = 0;
924 goto incomplete;
925 }
926 continue;
927 }
928
929 // No special handler for this markup. Just append it to the
930 // list of all tokens.
931
932 AppendElement(pElem);
933 switch (pMap->fType) {
934 case Html_TABLE:
935 break;
936
937 case Html_PLAINTEXT:
938 case Html_LISTING:
939 case Html_XMP:
940 case Html_TEXTAREA:
941 fIPlaintext = pMap->fType;
942 break;
943
944 case Html_NOFRAMES:
945 if (!fHasFrames) break;
946#ifdef pIsInMeachnism
947 pIsInNoFrames = 1;
948#endif
949 case Html_NOSCRIPT:
950 break;
951 // coverity[unreachable]
952 if (!fHasScript) break;
953#ifdef pIsInMeachnism
954 pIsInNoScript = 1;
955#endif
956 case Html_SCRIPT:
957#ifdef pIsInMeachnism
958 pIsInScript = 1;
959#endif
960 // fallthrough
961 case Html_STYLE:
962 fPScript = (TGHtmlScript *) pElem;
963 break;
964
965 case Html_LI:
966 if (!fAddEndTags) break;
967 if (inLi) {
969 AppToken(e, pElem, n);
970 } else {
971 inLi = 1;
972 }
973 break;
974
975 case Html_EndLI:
976 inLi=0;
977 break;
978
979 case Html_EndOL:
980 case Html_EndUL:
981 if (!fAddEndTags) break;
982 if (inLi) {
984 AppToken(e, pElem, n);
985 } else {
986 inLi = 0;
987 }
988 break;
989
990 default:
991 break;
992 }
993
994 // If this is self-closing markup (ex: <br/> or <img/>) then
995 // synthesize a closing token.
996
997 if (selfClose && argv[0][0] != '/' &&
998 strcmp(&pMap[1].fZName[1], pMap->fZName) == 0) {
999 selfClose = 0;
1000 pMap++;
1001 argc = 1;
1002 goto makeMarkupEntry;
1003 }
1004 }
1005 }
1006
1007incomplete:
1008 fICol = inpCol;
1009 ////fPScript = 0;
1010
1011 return n;
1012}
1013
1014/************************** End HTML Tokenizer Code ***************************/
1015
1016////////////////////////////////////////////////////////////////////////////////
1017/// Make one markup entry.
1018
1020 int arglen[], char *argv[])
1021{
1023
1024 switch (objType) {
1025 case O_HtmlCell:
1026 e = new TGHtmlCell(type, argc, arglen, argv);
1027 break;
1028
1029 case O_HtmlTable:
1030 e = new TGHtmlTable(type, argc, arglen, argv);
1031 break;
1032
1033 case O_HtmlRef:
1034 e = new TGHtmlRef(type, argc, arglen, argv);
1035 break;
1036
1037 case O_HtmlLi:
1038 e = new TGHtmlLi(type, argc, arglen, argv);
1039 break;
1040
1041 case O_HtmlListStart:
1042 e = new TGHtmlListStart(type, argc, arglen, argv);
1043 break;
1044
1045 case O_HtmlImageMarkup:
1046 e = new TGHtmlImageMarkup(type, argc, arglen, argv);
1047 break;
1048
1049 case O_HtmlInput:
1050 e = new TGHtmlInput(type, argc, arglen, argv);
1051 break;
1052
1053 case O_HtmlForm:
1054 e = new TGHtmlForm(type, argc, arglen, argv);
1055 break;
1056
1057 case O_HtmlHr:
1058 e = new TGHtmlHr(type, argc, arglen, argv);
1059 break;
1060
1061 case O_HtmlAnchor:
1062 e = new TGHtmlAnchor(type, argc, arglen, argv);
1063 break;
1064
1065 case O_HtmlScript:
1066 e = new TGHtmlScript(type, argc, arglen, argv);
1067 break;
1068
1069 case O_HtmlMapArea:
1070 e = new TGHtmlMapArea(type, argc, arglen, argv);
1071 break;
1072
1073 default:
1074 e = new TGHtmlMarkupElement(type, argc, arglen, argv);
1075 break;
1076 }
1077
1078 return e;
1079}
1080
1081////////////////////////////////////////////////////////////////////////////////
1082/// Append text to the tokenizer engine.
1083
1085{
1086 int len = strlen(text);
1087
1088 if (fNText == 0) {
1089 fNAlloc = len + 100;
1090 fZText = new char [fNAlloc];
1091 } else if (fNText + len >= fNAlloc) {
1092 fNAlloc += len + 100;
1093 char *tmp = new char[fNAlloc];
1094 // coverity[secure_coding]
1095 strcpy(tmp, fZText);
1096 delete[] fZText;
1097 fZText = tmp;
1098 }
1099
1100 if (fZText == 0) {
1101 fNText = 0;
1102 UNTESTED;
1103 return;
1104 }
1105
1106 // coverity[secure_coding]
1107 strcpy(&fZText[fNText], text);
1108 fNText += len;
1109 fNComplete = Tokenize();
1110}
1111
1112////////////////////////////////////////////////////////////////////////////////
1113/// This routine takes a text representation of a token, converts it into an
1114/// TGHtmlElement object and inserts it immediately prior to pToken. If pToken
1115/// is 0, then the newly created TGHtmlElement is appended.
1116///
1117/// This routine does nothing to resize, restyle, relayout or redisplay
1118/// the HTML. That is the calling routines responsibility.
1119///
1120/// Return the new TGHtmlElement object if successful. Return zero if
1121/// zType is not a known markup name.
1122///
1123/// pToken - Insert before this. Append if pToken == 0
1124/// zType - Type of markup. Ex: "/a" or "table"
1125/// zArgs - List of arguments
1126/// offs - Calculate offset, and insert changed text into fZText!
1127
1129 char *zType, char *zArgs, int offs)
1130{
1131 SHtmlTokenMap_t *pMap; // For searching the markup name hash table
1132 int h; // The hash on zType
1133 TGHtmlElement *pElem; // The new element
1134 //int nByte; // How many bytes to allocate
1135 //int i; // Loop counter
1136
1137 if (!gIsInit) {
1138 HtmlHashInit();
1139 gIsInit = 1;
1140 }
1141
1142 if (strcmp(zType, "Text") == 0) {
1143 pElem = new TGHtmlTextElement(zArgs ? strlen(zArgs) : 0);
1144 if (pElem == 0) return 0;
1145 if (zArgs) {
1146 // coverity[secure_coding]
1147 strcpy (((TGHtmlTextElement *)pElem)->fZText, zArgs);
1148 pElem->fCount = (Html_16_t) strlen(zArgs);
1149 }
1150 } else if (!strcmp(zType, "Space")) {
1151 pElem = new TGHtmlSpaceElement();
1152 if (pElem == 0) return 0;
1153 } else {
1154 h = HtmlHash(zType);
1155 for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
1156 if (strcasecmp(pMap->fZName, zType) == 0) break;
1157 }
1158 if (pMap == 0) return 0;
1159 if (zArgs == 0 || *zArgs == 0) {
1160 // Special case of no arguments. This is a lot easier...
1161 // well... now its the same thing!
1162 pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, 1, 0, 0);
1163 if (pElem == 0) return 0;
1164 } else {
1165 // The general case. There are arguments that need to be parsed
1166 // up. This is slower, but we gotta do it.
1167 //int argc;
1168 //char **argv;
1169 //char *zBuf;
1170
1171#if 0
1172 if (!SplitList(zArgs, &argc, &argv)) return 0;
1173
1174 // shall we insert a dummy argv[0]?
1175
1176 pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, argc/*+1??*/, 0, argv);
1177 if (pElem == 0) return 1;
1178
1179 while (--argc >= 0) if (argv[argc]) delete[] argv[argc];
1180 delete[] argv;
1181#else
1182 return 0;
1183#endif
1184 }
1185 }
1186
1187 pElem->fElId = ++fIdind;
1188
1189 AppToken(pElem, pToken, offs);
1190
1191 return pElem;
1192}
1193
1194////////////////////////////////////////////////////////////////////////////////
1195/// Insert text into text token, or break token into two text tokens.
1196/// Also, handle backspace char by deleting text.
1197/// Should also handle newline char by splitting text.
1198
1199int TGHtml::TextInsertCmd(int /*argc*/, char ** /*argv*/)
1200{
1201#if 0
1202 TGHtmlElement *p, *pElem;
1203 int i, l, n = 0;
1204 int idx = 0;
1205 int ptyp = Html_Unknown;
1206 int istxt = 0;
1207 char *cp = 0, c, *cp2;
1208
1209 if (GetIndex(argv[3], &p, &i) != 0) {
1210 // sprintf(tmp, "malformed index: \"%s\"", argv[3]);
1211 return 0;
1212 }
1213 if (p) {
1214 ptyp = p->fType;
1215 if ((istxt = (ptyp == Html_Text))) {
1216 l = p->fCount;
1217 cp = ((TGHtmlTextElement *)p)->fZText;
1218 }
1219 }
1220 if (argv[2][0] == 'b') { // Break text token into two.
1221 if (!istxt) return 1;
1222 if (i == 0 || i == l) return 1;
1223 pElem = InsertToken(p->fPNext, "Text", cp + i, -1);
1224 cp[i] = 0;
1225 p->fCount = i;
1226 return 1;
1227 }
1228 c = argv[4][0];
1229 if (!c) return 1;
1230 if (c == '\b') {
1231 if ((!istxt) || (!l) || (!i)) {
1232 if (!p) return 1;
1233 if (p->fType == Html_BR)
1234 RemoveElements(p, p);
1235 return 1;
1236 }
1237 if (p && l == 1) {
1238 RemoveElements(p, p);
1239 return 1;
1240 }
1241 if (i == l)
1242 cp[p->fCount] = 0;
1243 else
1244 memcpy(cp+i-1, cp+i, l-i+1);
1245
1246 cp[--p->fCount] = 0;
1247 if (ins.i-- <= 0) ins.i = 0;
1248 ins.p = p;
1249 return 1;
1250 }
1251 if (c == '\n' || c == '\r') {
1252 }
1253 if (istxt) {
1254 char *cp;
1255 int t, j, alen = strlen(argv[4]);
1256 n = alen + l;
1257
1259
1260 if (text->fZText == (char*) ((&text->fZText)+1)) {
1261 cp = new char[n+1];
1262 strcpy(cp, text->fZText);
1263 } else {
1264 cp = new char[n+1];
1265 strcpy(cp, text->fZText);
1266 }
1267 cp2 = new char[alen+1];
1268 memcpy(cp2, argv[4], alen+1);
1270 alen = strlen(cp2);
1271 memmove(cp+alen+i, cp+i, l-i+1);
1272 for (j = 0; j < alen; j++) cp[i+j] = cp2[j];
1273 delete[] cp2;
1274 delete[] text->fZText;
1275 text->fZText = cp;
1276 p->fCount = strlen(cp);
1277 ins.p = p;
1278 ins.i = i+alen;
1279 } else {
1280 p = InsertToken(p ? p->fPNext : 0, "Text", argv[4], -1);
1281 AddStyle(p);
1282 i = 0;
1283 ins.p = p;
1284 ins.i = 1;
1285 }
1286 if (p) {
1287 idx = p->base.id;
1288 AddStrOffset(p, argv[4], i);
1289 }
1290#endif
1291 return 1;
1292}
1293
1294////////////////////////////////////////////////////////////////////////////////
1295/// Returns token map matching zType name.
1296
1298{
1299 SHtmlTokenMap_t *pMap; // For searching the markup name hash table
1300 int h; // The hash on zType
1301
1302 if (!gIsInit) {
1303 HtmlHashInit();
1304 gIsInit = 1;
1305 }
1306 h = HtmlHash(zType);
1307 for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
1308 if (strcasecmp(pMap->fZName, zType) == 0) break;
1309 }
1310
1311 return pMap;
1312}
1313
1314////////////////////////////////////////////////////////////////////////////////
1315/// Convert a markup name into a type integer
1316
1317int TGHtml::NameToType(char *zType)
1318{
1319 SHtmlTokenMap_t *pMap = NameToPmap(zType);
1320 return pMap ? pMap->fType : (int)Html_Unknown;
1321}
1322
1323////////////////////////////////////////////////////////////////////////////////
1324/// Convert a type into a symbolic name
1325
1326const char *TGHtml::TypeToName(int type)
1327{
1328 if (type >= Html_A && type <= Html_EndXMP) {
1329 SHtmlTokenMap_t *pMap = gApMap[type - Html_A];
1330 return pMap->fZName;
1331 } else {
1332 return "???";
1333 }
1334}
1335
1336////////////////////////////////////////////////////////////////////////////////
1337/// For debugging purposes, print information about a token
1338
1340{
1341//#ifdef DEBUG
1342 static char zBuf[200];
1343 int j;
1344 const char *zName;
1345
1346 if (p == 0) {
1347 snprintf(zBuf, 200, "NULL");
1348 return zBuf;
1349 }
1350 switch (p->fType) {
1351 case Html_Text:
1352 snprintf(zBuf, 200, "text: \"%.*s\"", p->fCount, ((TGHtmlTextElement *)p)->fZText);
1353 break;
1354
1355 case Html_Space:
1356 if (p->fFlags & HTML_NewLine) {
1357 snprintf(zBuf, 200, "space: \"\\n\"");
1358 } else {
1359 snprintf(zBuf, 200, "space: \" \"");
1360 }
1361 break;
1362
1363 case Html_Block: {
1364 TGHtmlBlock *block = (TGHtmlBlock *) p;
1365 if (block->fN > 0) {
1366 int n = block->fN;
1367 if (n > 150) n = 150;
1368 snprintf(zBuf, 200, "<Block z=\"%.*s\">", n, block->fZ);
1369 } else {
1370 snprintf(zBuf, 200, "<Block>");
1371 }
1372 break;
1373 }
1374
1375 default:
1376 if (p->fType >= HtmlMarkupMap[0].fType
1378 zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1379 } else {
1380 zName = "Unknown";
1381 }
1382 snprintf(zBuf, 200, "markup (%d) <%s", p->fType, zName);
1383 for (j = 1 ; j < p->fCount; j += 2) {
1384 snprintf(&zBuf[strlen(zBuf)], 200-strlen(zBuf), " %s=\"%s\"",
1385 ((TGHtmlMarkupElement *)p)->fArgv[j-1],
1386 ((TGHtmlMarkupElement *)p)->fArgv[j]);
1387 }
1388 // coverity[secure_coding]
1389 strcat(zBuf, ">");
1390 break;
1391 }
1392 return zBuf;
1393//#else
1394// return 0;
1395//#endif
1396}
1397
1398////////////////////////////////////////////////////////////////////////////////
1399/// Append all the arguments of the given markup to the given TGString.
1400///
1401/// Example: If the markup is <IMG SRC=image.gif ALT="hello!">
1402/// then the following text is appended to the TGString:
1403///
1404/// "src image.gif alt hello!"
1405///
1406/// Notice how all attribute names are converted to lower case.
1407/// This conversion happens in the parser.
1408
1410{
1411 int i;
1412
1413 for (i = 0; i + 1 < pElem->fCount; i += 2) {
1414 str->Append(pElem->fArgv[i]);
1415 str->Append("=");
1416 str->Append(pElem->fArgv[i+1]);
1417 str->Append(" ");
1418 }
1419}
1420
1421////////////////////////////////////////////////////////////////////////////////
1422/// Returns token name of html element p.
1423
1425{
1426 static char zBuf[200];
1427 //int j;
1428 const char *zName;
1429
1430 zBuf[0] = 0;
1431 if (p == 0) {
1432 // coverity[secure_coding]: zBuf is large enough
1433 strcpy(zBuf, "NULL");
1434 return zBuf;
1435 }
1436 switch (p->fType) {
1437 case Html_Text:
1438 case Html_Space:
1439 break;
1440
1441 case Html_Block:
1442 break;
1443
1444 default:
1445 if (p->fType >= HtmlMarkupMap[0].fType &&
1447 zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1448 } else {
1449 zName = "Unknown";
1450 }
1451 strlcpy(zBuf, zName, sizeof(zBuf));
1452 break;
1453 }
1454
1455 return zBuf;
1456}
1457
1458////////////////////////////////////////////////////////////////////////////////
1459/// Returns token map at location n.
1460
1462{
1463 return HtmlMarkupMap+n;
1464}
1465
1466////////////////////////////////////////////////////////////////////////////////
1467/// Return all tokens between the two elements as a string list.
1468
1470{
1471 TGString *str;
1472 int i;
1473 const char *zName;
1474 char zLine[100];
1475
1476 str = new TGString("");
1477 while (p && p != pEnd) {
1478 switch (p->fType) {
1479 case Html_Block:
1480 break;
1481
1482 case Html_Text:
1483 str->Append("{ Text \"");
1484 str->Append(((TGHtmlTextElement *)p)->fZText);
1485 str->Append("\" } ");
1486 break;
1487
1488 case Html_Space:
1489 snprintf(zLine, 100, "Space %d %d ",
1490 p->fCount, (p->fFlags & HTML_NewLine) != 0);
1491 str->Append(zLine);
1492 break;
1493
1494 case Html_Unknown:
1495 str->Append("Unknown ");
1496 break;
1497
1498 default:
1499 str->Append("{ Markup ");
1500 if (p->fType >= HtmlMarkupMap[0].fType &&
1502 zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1503 } else {
1504 zName = "Unknown";
1505 }
1506 str->Append(zName);
1507 str->Append(" ");
1508 for (i = 0; i < p->fCount; ++i) {
1509 str->Append(((TGHtmlMarkupElement *)p)->fArgv[i]);
1510 str->Append(" ");
1511 }
1512 str->Append("} ");
1513 break;
1514 }
1515 p = p->fPNext;
1516 }
1517
1518 return str;
1519}
1520
1521////////////////////////////////////////////////////////////////////////////////
1522/// Print a list of tokens
1523
1525{
1526 TGHtmlElement *p;
1527
1528 for (p = first; p != last; p = p->fPNext) {
1529 if (p->fType == Html_Block) {
1530 TGHtmlBlock *block = (TGHtmlBlock *) p;
1531 const char *z = block->fZ;
1532 int n = block->fN;
1533 if (n == 0 || z == 0) {
1534 n = 1;
1535 z = "";
1536 }
1537 printf("Block flags=%02x cnt=%d x=%d..%d y=%d..%d z=\"%.*s\"\n",
1538 p->fFlags, p->fCount, block->fLeft, block->fRight,
1539 block->fTop, block->fBottom, n, z);
1540 } else {
1541 printf("Token font=%2d color=%2d align=%d flags=0x%04x name=%s\n",
1542 p->fStyle.fFont, p->fStyle.fColor,
1543 p->fStyle.fAlign, p->fStyle.fFlags, DumpToken(p));
1544 }
1545 }
1546}
#define c(i)
Definition RSha256.hxx:101
#define h(i)
Definition RSha256.hxx:106
#define e(i)
Definition RSha256.hxx:103
void HtmlTranslateEscapes(char *z)
Translate escape sequences in the string "z".
void ToLower(char *z)
Convert a string to all lower-case letters.
static struct SgEsc_t * gApEscHash[ESC_HASH_SIZE]
static int EscHash(const char *zName)
static int HtmlHash(const char *zName)
static char gAcMsChar[]
static int gIsInit
static int NextColumn(int iCol, char c)
Compute the new column index following the given character.
static SHtmlTokenMap_t * gApMap[HTML_MARKUP_HASH_SIZE]
void HtmlTranslateEscapes(char *z)
Translate escape sequences in the string "z".
SHtmlTokenMap_t HtmlMarkupMap[]
static struct SgEsc_t gEscSequences[]
#define mxARG
static void HtmlHashInit(void)
#define ESC_HASH_SIZE
static void EscInit()
@ Html_COMMENT
@ Html_TEXTAREA
@ Html_XMP
@ Html_STYLE
@ Html_SCRIPT
@ Html_LI
@ Html_LISTING
@ Html_TABLE
@ Html_EndUL
@ Html_EndOL
@ Html_NOFRAMES
@ Html_PLAINTEXT
@ Html_Block
@ Html_Space
@ Html_Text
@ Html_A
@ Html_NOSCRIPT
@ Html_EndLI
@ Html_EndXMP
@ Html_BR
@ Html_Unknown
@ Html_EndCOMMENT
#define HTML_MARKUP_HASH_SIZE
#define HTML_MARKUP_COUNT
#define O_HtmlInput
Definition TGHtml.h:857
#define O_HtmlHr
Definition TGHtml.h:859
#define O_HtmlTable
Definition TGHtml.h:852
#define UNTESTED
Definition TGHtml.h:64
#define HTML_NewLine
Definition TGHtml.h:275
#define O_HtmlImageMarkup
Definition TGHtml.h:856
short Html_16_t
Definition TGHtml.h:136
#define O_HtmlAnchor
Definition TGHtml.h:860
#define O_HtmlLi
Definition TGHtml.h:854
#define O_HtmlMapArea
Definition TGHtml.h:862
#define O_HtmlRef
Definition TGHtml.h:853
#define O_HtmlCell
Definition TGHtml.h:851
#define O_HtmlScript
Definition TGHtml.h:861
#define O_HtmlListStart
Definition TGHtml.h:855
#define O_HtmlForm
Definition TGHtml.h:858
int type
Definition TGX11.cxx:121
#define snprintf
Definition civetweb.c:1540
Html_u16_t fN
Definition TGHtml.h:719
Html_u16_t fRight
Definition TGHtml.h:718
char * fZ
Definition TGHtml.h:716
Html_u16_t fLeft
Definition TGHtml.h:718
int fBottom
Definition TGHtml.h:717
Html_u8_t fFlags
Definition TGHtml.h:265
Html_u8_t fType
Definition TGHtml.h:264
SHtmlStyle_t fStyle
Definition TGHtml.h:263
TGHtmlElement * fPPrev
Definition TGHtml.h:262
Html_16_t fCount
Definition TGHtml.h:266
TGHtmlElement * fPNext
Definition TGHtml.h:261
int fNStart
Definition TGHtml.h:683
int fNScript
Definition TGHtml.h:684
Html_16_t fW
Definition TGHtml.h:308
int fAddEndTags
Definition TGHtml.h:1245
TGString * ListTokens(TGHtmlElement *p, TGHtmlElement *pEnd)
Return all tokens between the two elements as a string list.
int fICol
Definition TGHtml.h:1173
void AppToken(TGHtmlElement *pNew, TGHtmlElement *p, int offs)
Insert token pNew before token p.
int fIdind
Definition TGHtml.h:1270
int TextInsertCmd(int argc, char **argv)
Insert text into text token, or break token into two text tokens.
void TokenizerAppend(const char *text)
Append text to the tokenizer engine.
TGHtmlElement * fPFirst
Definition TGHtml.h:1128
void AddStyle(TGHtmlElement *p)
This routine adds information to the input texts that doesn't change when the display is resized or w...
virtual char * ProcessScript(TGHtmlScript *)
Definition TGHtml.h:957
int fNToken
Definition TGHtml.h:1130
TGHtmlElement * InsertToken(TGHtmlElement *pToken, char *zType, char *zArgs, int offs)
This routine takes a text representation of a token, converts it into an TGHtmlElement object and ins...
const char * TypeToName(int type)
Convert a type into a symbolic name.
int Tokenize()
Process as much of the input HTML as possible.
int NameToType(char *zType)
Convert a markup name into a type integer.
int fIPlaintext
Definition TGHtml.h:1175
void AppendArglist(TGString *str, TGHtmlMarkupElement *pElem)
Append all the arguments of the given markup to the given TGString.
char * fZText
Definition TGHtml.h:1168
int fNAlloc
Definition TGHtml.h:1170
void AppendElement(TGHtmlElement *pElem)
Append the given TGHtmlElement to the tokenizers list of elements.
virtual int ProcessToken(TGHtmlElement *, const char *, int)
Definition TGHtml.h:927
TGHtmlScript * fPScript
Definition TGHtml.h:1179
int fHasFrames
Definition TGHtml.h:1244
TGHtmlMarkupElement * MakeMarkupEntry(int objType, int type, int argc, int arglen[], char *argv[])
Make one markup entry.
TGHtmlElement * fPLast
Definition TGHtml.h:1129
SHtmlTokenMap_t * GetMarkupMap(int n)
Returns token map at location n.
int fNComplete
Definition TGHtml.h:1171
SHtmlTokenMap_t * NameToPmap(char *zType)
Returns token map matching zType name.
char * DumpToken(TGHtmlElement *p)
For debugging purposes, print information about a token.
char * GetTokenName(TGHtmlElement *p)
Returns token name of html element p.
int GetIndex(const char *zIndex, TGHtmlElement **ppToken, int *pIndex)
This routine decodes a complete index specification.
int fHasScript
Definition TGHtml.h:1243
void PrintList(TGHtmlElement *first, TGHtmlElement *last)
Print a list of tokens.
void AddFormInfo(TGHtmlElement *p)
Add the DOM control information for form elements.
int fNText
Definition TGHtml.h:1169
TString & Append(const char *cs)
Definition TString.h:564
TText * text
const Int_t n
Definition legend1.C:16
Definition first.py:1
const char * cnt
Definition TXMLSetup.cxx:75
unsigned int fColor
Definition TGHtml.h:145
unsigned int fAlign
Definition TGHtml.h:147
unsigned int fFont
Definition TGHtml.h:144
unsigned int fFlags
Definition TGHtml.h:150
Html_16_t fObjType
Definition TGHtml.h:842
Html_16_t fType
Definition TGHtml.h:841
SHtmlTokenMap_t * fPCollide
Definition TGHtml.h:843
const char * fZName
Definition TGHtml.h:840
auto * l
Definition textangle.C:4
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345