1*7c568831SAndroid Build Coastguard Worker /*
2*7c568831SAndroid Build Coastguard Worker * HTMLparser.c : an HTML parser
3*7c568831SAndroid Build Coastguard Worker *
4*7c568831SAndroid Build Coastguard Worker * References:
5*7c568831SAndroid Build Coastguard Worker * HTML Living Standard
6*7c568831SAndroid Build Coastguard Worker * https://html.spec.whatwg.org/multipage/parsing.html
7*7c568831SAndroid Build Coastguard Worker *
8*7c568831SAndroid Build Coastguard Worker * Tokenization now conforms to HTML5. Tree construction still follows
9*7c568831SAndroid Build Coastguard Worker * a custom, non-standard implementation. See:
10*7c568831SAndroid Build Coastguard Worker *
11*7c568831SAndroid Build Coastguard Worker * https://gitlab.gnome.org/GNOME/libxml2/-/issues/211
12*7c568831SAndroid Build Coastguard Worker *
13*7c568831SAndroid Build Coastguard Worker * See Copyright for the status of this software.
14*7c568831SAndroid Build Coastguard Worker *
15*7c568831SAndroid Build Coastguard Worker * [email protected]
16*7c568831SAndroid Build Coastguard Worker */
17*7c568831SAndroid Build Coastguard Worker
18*7c568831SAndroid Build Coastguard Worker #define IN_LIBXML
19*7c568831SAndroid Build Coastguard Worker #include "libxml.h"
20*7c568831SAndroid Build Coastguard Worker #ifdef LIBXML_HTML_ENABLED
21*7c568831SAndroid Build Coastguard Worker
22*7c568831SAndroid Build Coastguard Worker #include <string.h>
23*7c568831SAndroid Build Coastguard Worker #include <ctype.h>
24*7c568831SAndroid Build Coastguard Worker #include <stdlib.h>
25*7c568831SAndroid Build Coastguard Worker
26*7c568831SAndroid Build Coastguard Worker #include <libxml/HTMLparser.h>
27*7c568831SAndroid Build Coastguard Worker #include <libxml/xmlmemory.h>
28*7c568831SAndroid Build Coastguard Worker #include <libxml/tree.h>
29*7c568831SAndroid Build Coastguard Worker #include <libxml/parser.h>
30*7c568831SAndroid Build Coastguard Worker #include <libxml/parserInternals.h>
31*7c568831SAndroid Build Coastguard Worker #include <libxml/xmlerror.h>
32*7c568831SAndroid Build Coastguard Worker #include <libxml/HTMLtree.h>
33*7c568831SAndroid Build Coastguard Worker #include <libxml/entities.h>
34*7c568831SAndroid Build Coastguard Worker #include <libxml/encoding.h>
35*7c568831SAndroid Build Coastguard Worker #include <libxml/xmlIO.h>
36*7c568831SAndroid Build Coastguard Worker #include <libxml/uri.h>
37*7c568831SAndroid Build Coastguard Worker
38*7c568831SAndroid Build Coastguard Worker #include "private/buf.h"
39*7c568831SAndroid Build Coastguard Worker #include "private/dict.h"
40*7c568831SAndroid Build Coastguard Worker #include "private/enc.h"
41*7c568831SAndroid Build Coastguard Worker #include "private/error.h"
42*7c568831SAndroid Build Coastguard Worker #include "private/html.h"
43*7c568831SAndroid Build Coastguard Worker #include "private/io.h"
44*7c568831SAndroid Build Coastguard Worker #include "private/parser.h"
45*7c568831SAndroid Build Coastguard Worker #include "private/tree.h"
46*7c568831SAndroid Build Coastguard Worker
47*7c568831SAndroid Build Coastguard Worker #define HTML_MAX_NAMELEN 1000
48*7c568831SAndroid Build Coastguard Worker #define HTML_PARSER_BIG_BUFFER_SIZE 1000
49*7c568831SAndroid Build Coastguard Worker #define HTML_PARSER_BUFFER_SIZE 100
50*7c568831SAndroid Build Coastguard Worker
51*7c568831SAndroid Build Coastguard Worker #define IS_WS_HTML(c) \
52*7c568831SAndroid Build Coastguard Worker (((c) == 0x20) || \
53*7c568831SAndroid Build Coastguard Worker (((c) >= 0x09) && ((c) <= 0x0D) && ((c) != 0x0B)))
54*7c568831SAndroid Build Coastguard Worker
55*7c568831SAndroid Build Coastguard Worker #define IS_HEX_DIGIT(c) \
56*7c568831SAndroid Build Coastguard Worker ((IS_ASCII_DIGIT(c)) || \
57*7c568831SAndroid Build Coastguard Worker ((((c) | 0x20) >= 'a') && (((c) | 0x20) <= 'f')))
58*7c568831SAndroid Build Coastguard Worker
59*7c568831SAndroid Build Coastguard Worker #define IS_UPPER(c) \
60*7c568831SAndroid Build Coastguard Worker (((c) >= 'A') && ((c) <= 'Z'))
61*7c568831SAndroid Build Coastguard Worker
62*7c568831SAndroid Build Coastguard Worker #define IS_ALNUM(c) \
63*7c568831SAndroid Build Coastguard Worker (IS_ASCII_LETTER(c) || IS_ASCII_DIGIT(c))
64*7c568831SAndroid Build Coastguard Worker
65*7c568831SAndroid Build Coastguard Worker typedef const unsigned htmlAsciiMask[2];
66*7c568831SAndroid Build Coastguard Worker
67*7c568831SAndroid Build Coastguard Worker static htmlAsciiMask MASK_DQ = {
68*7c568831SAndroid Build Coastguard Worker 0,
69*7c568831SAndroid Build Coastguard Worker 1u << ('"' - 32),
70*7c568831SAndroid Build Coastguard Worker };
71*7c568831SAndroid Build Coastguard Worker static htmlAsciiMask MASK_SQ = {
72*7c568831SAndroid Build Coastguard Worker 0,
73*7c568831SAndroid Build Coastguard Worker 1u << ('\'' - 32),
74*7c568831SAndroid Build Coastguard Worker };
75*7c568831SAndroid Build Coastguard Worker static htmlAsciiMask MASK_GT = {
76*7c568831SAndroid Build Coastguard Worker 0,
77*7c568831SAndroid Build Coastguard Worker 1u << ('>' - 32),
78*7c568831SAndroid Build Coastguard Worker };
79*7c568831SAndroid Build Coastguard Worker static htmlAsciiMask MASK_DASH = {
80*7c568831SAndroid Build Coastguard Worker 0,
81*7c568831SAndroid Build Coastguard Worker 1u << ('-' - 32),
82*7c568831SAndroid Build Coastguard Worker };
83*7c568831SAndroid Build Coastguard Worker static htmlAsciiMask MASK_WS_GT = {
84*7c568831SAndroid Build Coastguard Worker 1u << 0x09 | 1u << 0x0A | 1u << 0x0C | 1u << 0x0D,
85*7c568831SAndroid Build Coastguard Worker 1u << (' ' - 32) | 1u << ('>' - 32),
86*7c568831SAndroid Build Coastguard Worker };
87*7c568831SAndroid Build Coastguard Worker static htmlAsciiMask MASK_DQ_GT = {
88*7c568831SAndroid Build Coastguard Worker 0,
89*7c568831SAndroid Build Coastguard Worker 1u << ('"' - 32) | 1u << ('>' - 32),
90*7c568831SAndroid Build Coastguard Worker };
91*7c568831SAndroid Build Coastguard Worker static htmlAsciiMask MASK_SQ_GT = {
92*7c568831SAndroid Build Coastguard Worker 0,
93*7c568831SAndroid Build Coastguard Worker 1u << ('\'' - 32) | 1u << ('>' - 32),
94*7c568831SAndroid Build Coastguard Worker };
95*7c568831SAndroid Build Coastguard Worker
96*7c568831SAndroid Build Coastguard Worker static int htmlOmittedDefaultValue = 1;
97*7c568831SAndroid Build Coastguard Worker
98*7c568831SAndroid Build Coastguard Worker static int
99*7c568831SAndroid Build Coastguard Worker htmlParseElementInternal(htmlParserCtxtPtr ctxt);
100*7c568831SAndroid Build Coastguard Worker
101*7c568831SAndroid Build Coastguard Worker /************************************************************************
102*7c568831SAndroid Build Coastguard Worker * *
103*7c568831SAndroid Build Coastguard Worker * Some factorized error routines *
104*7c568831SAndroid Build Coastguard Worker * *
105*7c568831SAndroid Build Coastguard Worker ************************************************************************/
106*7c568831SAndroid Build Coastguard Worker
107*7c568831SAndroid Build Coastguard Worker /**
108*7c568831SAndroid Build Coastguard Worker * htmlErrMemory:
109*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
110*7c568831SAndroid Build Coastguard Worker * @extra: extra information
111*7c568831SAndroid Build Coastguard Worker *
112*7c568831SAndroid Build Coastguard Worker * Handle a redefinition of attribute error
113*7c568831SAndroid Build Coastguard Worker */
114*7c568831SAndroid Build Coastguard Worker static void
htmlErrMemory(xmlParserCtxtPtr ctxt)115*7c568831SAndroid Build Coastguard Worker htmlErrMemory(xmlParserCtxtPtr ctxt)
116*7c568831SAndroid Build Coastguard Worker {
117*7c568831SAndroid Build Coastguard Worker xmlCtxtErrMemory(ctxt);
118*7c568831SAndroid Build Coastguard Worker }
119*7c568831SAndroid Build Coastguard Worker
120*7c568831SAndroid Build Coastguard Worker /**
121*7c568831SAndroid Build Coastguard Worker * htmlParseErr:
122*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
123*7c568831SAndroid Build Coastguard Worker * @error: the error number
124*7c568831SAndroid Build Coastguard Worker * @msg: the error message
125*7c568831SAndroid Build Coastguard Worker * @str1: string infor
126*7c568831SAndroid Build Coastguard Worker * @str2: string infor
127*7c568831SAndroid Build Coastguard Worker *
128*7c568831SAndroid Build Coastguard Worker * Handle a fatal parser error, i.e. violating Well-Formedness constraints
129*7c568831SAndroid Build Coastguard Worker */
130*7c568831SAndroid Build Coastguard Worker static void LIBXML_ATTR_FORMAT(3,0)
htmlParseErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)131*7c568831SAndroid Build Coastguard Worker htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
132*7c568831SAndroid Build Coastguard Worker const char *msg, const xmlChar *str1, const xmlChar *str2)
133*7c568831SAndroid Build Coastguard Worker {
134*7c568831SAndroid Build Coastguard Worker xmlCtxtErr(ctxt, NULL, XML_FROM_HTML, error, XML_ERR_ERROR,
135*7c568831SAndroid Build Coastguard Worker str1, str2, NULL, 0, msg, str1, str2);
136*7c568831SAndroid Build Coastguard Worker }
137*7c568831SAndroid Build Coastguard Worker
138*7c568831SAndroid Build Coastguard Worker /************************************************************************
139*7c568831SAndroid Build Coastguard Worker * *
140*7c568831SAndroid Build Coastguard Worker * Parser stacks related functions and macros *
141*7c568831SAndroid Build Coastguard Worker * *
142*7c568831SAndroid Build Coastguard Worker ************************************************************************/
143*7c568831SAndroid Build Coastguard Worker
144*7c568831SAndroid Build Coastguard Worker /**
145*7c568831SAndroid Build Coastguard Worker * htmlnamePush:
146*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
147*7c568831SAndroid Build Coastguard Worker * @value: the element name
148*7c568831SAndroid Build Coastguard Worker *
149*7c568831SAndroid Build Coastguard Worker * Pushes a new element name on top of the name stack
150*7c568831SAndroid Build Coastguard Worker *
151*7c568831SAndroid Build Coastguard Worker * Returns -1 in case of error, the index in the stack otherwise
152*7c568831SAndroid Build Coastguard Worker */
153*7c568831SAndroid Build Coastguard Worker static int
htmlnamePush(htmlParserCtxtPtr ctxt,const xmlChar * value)154*7c568831SAndroid Build Coastguard Worker htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)
155*7c568831SAndroid Build Coastguard Worker {
156*7c568831SAndroid Build Coastguard Worker if ((ctxt->html < 3) && (xmlStrEqual(value, BAD_CAST "head")))
157*7c568831SAndroid Build Coastguard Worker ctxt->html = 3;
158*7c568831SAndroid Build Coastguard Worker if ((ctxt->html < 10) && (xmlStrEqual(value, BAD_CAST "body")))
159*7c568831SAndroid Build Coastguard Worker ctxt->html = 10;
160*7c568831SAndroid Build Coastguard Worker if (ctxt->nameNr >= ctxt->nameMax) {
161*7c568831SAndroid Build Coastguard Worker size_t newSize = ctxt->nameMax * 2;
162*7c568831SAndroid Build Coastguard Worker const xmlChar **tmp;
163*7c568831SAndroid Build Coastguard Worker
164*7c568831SAndroid Build Coastguard Worker tmp = xmlRealloc((xmlChar **) ctxt->nameTab,
165*7c568831SAndroid Build Coastguard Worker newSize * sizeof(ctxt->nameTab[0]));
166*7c568831SAndroid Build Coastguard Worker if (tmp == NULL) {
167*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
168*7c568831SAndroid Build Coastguard Worker return (-1);
169*7c568831SAndroid Build Coastguard Worker }
170*7c568831SAndroid Build Coastguard Worker ctxt->nameTab = tmp;
171*7c568831SAndroid Build Coastguard Worker ctxt->nameMax = newSize;
172*7c568831SAndroid Build Coastguard Worker }
173*7c568831SAndroid Build Coastguard Worker ctxt->nameTab[ctxt->nameNr] = value;
174*7c568831SAndroid Build Coastguard Worker ctxt->name = value;
175*7c568831SAndroid Build Coastguard Worker return (ctxt->nameNr++);
176*7c568831SAndroid Build Coastguard Worker }
177*7c568831SAndroid Build Coastguard Worker /**
178*7c568831SAndroid Build Coastguard Worker * htmlnamePop:
179*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
180*7c568831SAndroid Build Coastguard Worker *
181*7c568831SAndroid Build Coastguard Worker * Pops the top element name from the name stack
182*7c568831SAndroid Build Coastguard Worker *
183*7c568831SAndroid Build Coastguard Worker * Returns the name just removed
184*7c568831SAndroid Build Coastguard Worker */
185*7c568831SAndroid Build Coastguard Worker static const xmlChar *
htmlnamePop(htmlParserCtxtPtr ctxt)186*7c568831SAndroid Build Coastguard Worker htmlnamePop(htmlParserCtxtPtr ctxt)
187*7c568831SAndroid Build Coastguard Worker {
188*7c568831SAndroid Build Coastguard Worker const xmlChar *ret;
189*7c568831SAndroid Build Coastguard Worker
190*7c568831SAndroid Build Coastguard Worker if (ctxt->nameNr <= 0)
191*7c568831SAndroid Build Coastguard Worker return (NULL);
192*7c568831SAndroid Build Coastguard Worker ctxt->nameNr--;
193*7c568831SAndroid Build Coastguard Worker if (ctxt->nameNr < 0)
194*7c568831SAndroid Build Coastguard Worker return (NULL);
195*7c568831SAndroid Build Coastguard Worker if (ctxt->nameNr > 0)
196*7c568831SAndroid Build Coastguard Worker ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
197*7c568831SAndroid Build Coastguard Worker else
198*7c568831SAndroid Build Coastguard Worker ctxt->name = NULL;
199*7c568831SAndroid Build Coastguard Worker ret = ctxt->nameTab[ctxt->nameNr];
200*7c568831SAndroid Build Coastguard Worker ctxt->nameTab[ctxt->nameNr] = NULL;
201*7c568831SAndroid Build Coastguard Worker return (ret);
202*7c568831SAndroid Build Coastguard Worker }
203*7c568831SAndroid Build Coastguard Worker
204*7c568831SAndroid Build Coastguard Worker /**
205*7c568831SAndroid Build Coastguard Worker * htmlNodeInfoPush:
206*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
207*7c568831SAndroid Build Coastguard Worker * @value: the node info
208*7c568831SAndroid Build Coastguard Worker *
209*7c568831SAndroid Build Coastguard Worker * Pushes a new element name on top of the node info stack
210*7c568831SAndroid Build Coastguard Worker *
211*7c568831SAndroid Build Coastguard Worker * Returns 0 in case of error, the index in the stack otherwise
212*7c568831SAndroid Build Coastguard Worker */
213*7c568831SAndroid Build Coastguard Worker static int
htmlNodeInfoPush(htmlParserCtxtPtr ctxt,htmlParserNodeInfo * value)214*7c568831SAndroid Build Coastguard Worker htmlNodeInfoPush(htmlParserCtxtPtr ctxt, htmlParserNodeInfo *value)
215*7c568831SAndroid Build Coastguard Worker {
216*7c568831SAndroid Build Coastguard Worker if (ctxt->nodeInfoNr >= ctxt->nodeInfoMax) {
217*7c568831SAndroid Build Coastguard Worker if (ctxt->nodeInfoMax == 0)
218*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfoMax = 5;
219*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfoMax *= 2;
220*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfoTab = (htmlParserNodeInfo *)
221*7c568831SAndroid Build Coastguard Worker xmlRealloc((htmlParserNodeInfo *)ctxt->nodeInfoTab,
222*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfoMax *
223*7c568831SAndroid Build Coastguard Worker sizeof(ctxt->nodeInfoTab[0]));
224*7c568831SAndroid Build Coastguard Worker if (ctxt->nodeInfoTab == NULL) {
225*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
226*7c568831SAndroid Build Coastguard Worker return (0);
227*7c568831SAndroid Build Coastguard Worker }
228*7c568831SAndroid Build Coastguard Worker }
229*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfoTab[ctxt->nodeInfoNr] = *value;
230*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
231*7c568831SAndroid Build Coastguard Worker return (ctxt->nodeInfoNr++);
232*7c568831SAndroid Build Coastguard Worker }
233*7c568831SAndroid Build Coastguard Worker
234*7c568831SAndroid Build Coastguard Worker /**
235*7c568831SAndroid Build Coastguard Worker * htmlNodeInfoPop:
236*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
237*7c568831SAndroid Build Coastguard Worker *
238*7c568831SAndroid Build Coastguard Worker * Pops the top element name from the node info stack
239*7c568831SAndroid Build Coastguard Worker *
240*7c568831SAndroid Build Coastguard Worker * Returns 0 in case of error, the pointer to NodeInfo otherwise
241*7c568831SAndroid Build Coastguard Worker */
242*7c568831SAndroid Build Coastguard Worker static htmlParserNodeInfo *
htmlNodeInfoPop(htmlParserCtxtPtr ctxt)243*7c568831SAndroid Build Coastguard Worker htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
244*7c568831SAndroid Build Coastguard Worker {
245*7c568831SAndroid Build Coastguard Worker if (ctxt->nodeInfoNr <= 0)
246*7c568831SAndroid Build Coastguard Worker return (NULL);
247*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfoNr--;
248*7c568831SAndroid Build Coastguard Worker if (ctxt->nodeInfoNr < 0)
249*7c568831SAndroid Build Coastguard Worker return (NULL);
250*7c568831SAndroid Build Coastguard Worker if (ctxt->nodeInfoNr > 0)
251*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr - 1];
252*7c568831SAndroid Build Coastguard Worker else
253*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfo = NULL;
254*7c568831SAndroid Build Coastguard Worker return &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
255*7c568831SAndroid Build Coastguard Worker }
256*7c568831SAndroid Build Coastguard Worker
257*7c568831SAndroid Build Coastguard Worker /*
258*7c568831SAndroid Build Coastguard Worker * Macros for accessing the content. Those should be used only by the parser,
259*7c568831SAndroid Build Coastguard Worker * and not exported.
260*7c568831SAndroid Build Coastguard Worker *
261*7c568831SAndroid Build Coastguard Worker * Dirty macros, i.e. one need to make assumption on the context to use them
262*7c568831SAndroid Build Coastguard Worker *
263*7c568831SAndroid Build Coastguard Worker * CUR_PTR return the current pointer to the xmlChar to be parsed.
264*7c568831SAndroid Build Coastguard Worker * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
265*7c568831SAndroid Build Coastguard Worker * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
266*7c568831SAndroid Build Coastguard Worker * in UNICODE mode. This should be used internally by the parser
267*7c568831SAndroid Build Coastguard Worker * only to compare to ASCII values otherwise it would break when
268*7c568831SAndroid Build Coastguard Worker * running with UTF-8 encoding.
269*7c568831SAndroid Build Coastguard Worker * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
270*7c568831SAndroid Build Coastguard Worker * to compare on ASCII based substring.
271*7c568831SAndroid Build Coastguard Worker * UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR
272*7c568831SAndroid Build Coastguard Worker * it should be used only to compare on ASCII based substring.
273*7c568831SAndroid Build Coastguard Worker * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
274*7c568831SAndroid Build Coastguard Worker * strings without newlines within the parser.
275*7c568831SAndroid Build Coastguard Worker *
276*7c568831SAndroid Build Coastguard Worker * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
277*7c568831SAndroid Build Coastguard Worker *
278*7c568831SAndroid Build Coastguard Worker * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
279*7c568831SAndroid Build Coastguard Worker */
280*7c568831SAndroid Build Coastguard Worker
281*7c568831SAndroid Build Coastguard Worker #define UPPER (toupper(*ctxt->input->cur))
282*7c568831SAndroid Build Coastguard Worker
283*7c568831SAndroid Build Coastguard Worker #define SKIP(val) ctxt->input->cur += (val),ctxt->input->col+=(val)
284*7c568831SAndroid Build Coastguard Worker
285*7c568831SAndroid Build Coastguard Worker #define NXT(val) ctxt->input->cur[(val)]
286*7c568831SAndroid Build Coastguard Worker
287*7c568831SAndroid Build Coastguard Worker #define UPP(val) (toupper(ctxt->input->cur[(val)]))
288*7c568831SAndroid Build Coastguard Worker
289*7c568831SAndroid Build Coastguard Worker #define CUR_PTR ctxt->input->cur
290*7c568831SAndroid Build Coastguard Worker #define BASE_PTR ctxt->input->base
291*7c568831SAndroid Build Coastguard Worker
292*7c568831SAndroid Build Coastguard Worker #define SHRINK \
293*7c568831SAndroid Build Coastguard Worker if ((!PARSER_PROGRESSIVE(ctxt)) && \
294*7c568831SAndroid Build Coastguard Worker (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
295*7c568831SAndroid Build Coastguard Worker (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
296*7c568831SAndroid Build Coastguard Worker xmlParserShrink(ctxt);
297*7c568831SAndroid Build Coastguard Worker
298*7c568831SAndroid Build Coastguard Worker #define GROW \
299*7c568831SAndroid Build Coastguard Worker if ((!PARSER_PROGRESSIVE(ctxt)) && \
300*7c568831SAndroid Build Coastguard Worker (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
301*7c568831SAndroid Build Coastguard Worker xmlParserGrow(ctxt);
302*7c568831SAndroid Build Coastguard Worker
303*7c568831SAndroid Build Coastguard Worker #define SKIP_BLANKS htmlSkipBlankChars(ctxt)
304*7c568831SAndroid Build Coastguard Worker
305*7c568831SAndroid Build Coastguard Worker /* Imported from XML */
306*7c568831SAndroid Build Coastguard Worker
307*7c568831SAndroid Build Coastguard Worker #define CUR (*ctxt->input->cur)
308*7c568831SAndroid Build Coastguard Worker
309*7c568831SAndroid Build Coastguard Worker /**
310*7c568831SAndroid Build Coastguard Worker * htmlFindEncoding:
311*7c568831SAndroid Build Coastguard Worker * @the HTML parser context
312*7c568831SAndroid Build Coastguard Worker *
313*7c568831SAndroid Build Coastguard Worker * Ty to find and encoding in the current data available in the input
314*7c568831SAndroid Build Coastguard Worker * buffer this is needed to try to switch to the proper encoding when
315*7c568831SAndroid Build Coastguard Worker * one face a character error.
316*7c568831SAndroid Build Coastguard Worker * That's an heuristic, since it's operating outside of parsing it could
317*7c568831SAndroid Build Coastguard Worker * try to use a meta which had been commented out, that's the reason it
318*7c568831SAndroid Build Coastguard Worker * should only be used in case of error, not as a default.
319*7c568831SAndroid Build Coastguard Worker *
320*7c568831SAndroid Build Coastguard Worker * Returns an encoding string or NULL if not found, the string need to
321*7c568831SAndroid Build Coastguard Worker * be freed
322*7c568831SAndroid Build Coastguard Worker */
323*7c568831SAndroid Build Coastguard Worker static xmlChar *
htmlFindEncoding(xmlParserCtxtPtr ctxt)324*7c568831SAndroid Build Coastguard Worker htmlFindEncoding(xmlParserCtxtPtr ctxt) {
325*7c568831SAndroid Build Coastguard Worker const xmlChar *start, *cur, *end;
326*7c568831SAndroid Build Coastguard Worker xmlChar *ret;
327*7c568831SAndroid Build Coastguard Worker
328*7c568831SAndroid Build Coastguard Worker if ((ctxt == NULL) || (ctxt->input == NULL) ||
329*7c568831SAndroid Build Coastguard Worker (ctxt->input->flags & XML_INPUT_HAS_ENCODING))
330*7c568831SAndroid Build Coastguard Worker return(NULL);
331*7c568831SAndroid Build Coastguard Worker if ((ctxt->input->cur == NULL) || (ctxt->input->end == NULL))
332*7c568831SAndroid Build Coastguard Worker return(NULL);
333*7c568831SAndroid Build Coastguard Worker
334*7c568831SAndroid Build Coastguard Worker start = ctxt->input->cur;
335*7c568831SAndroid Build Coastguard Worker end = ctxt->input->end;
336*7c568831SAndroid Build Coastguard Worker /* we also expect the input buffer to be zero terminated */
337*7c568831SAndroid Build Coastguard Worker if (*end != 0)
338*7c568831SAndroid Build Coastguard Worker return(NULL);
339*7c568831SAndroid Build Coastguard Worker
340*7c568831SAndroid Build Coastguard Worker cur = xmlStrcasestr(start, BAD_CAST "HTTP-EQUIV");
341*7c568831SAndroid Build Coastguard Worker if (cur == NULL)
342*7c568831SAndroid Build Coastguard Worker return(NULL);
343*7c568831SAndroid Build Coastguard Worker cur = xmlStrcasestr(cur, BAD_CAST "CONTENT");
344*7c568831SAndroid Build Coastguard Worker if (cur == NULL)
345*7c568831SAndroid Build Coastguard Worker return(NULL);
346*7c568831SAndroid Build Coastguard Worker cur = xmlStrcasestr(cur, BAD_CAST "CHARSET=");
347*7c568831SAndroid Build Coastguard Worker if (cur == NULL)
348*7c568831SAndroid Build Coastguard Worker return(NULL);
349*7c568831SAndroid Build Coastguard Worker cur += 8;
350*7c568831SAndroid Build Coastguard Worker start = cur;
351*7c568831SAndroid Build Coastguard Worker while ((IS_ALNUM(*cur)) ||
352*7c568831SAndroid Build Coastguard Worker (*cur == '-') || (*cur == '_') || (*cur == ':') || (*cur == '/'))
353*7c568831SAndroid Build Coastguard Worker cur++;
354*7c568831SAndroid Build Coastguard Worker if (cur == start)
355*7c568831SAndroid Build Coastguard Worker return(NULL);
356*7c568831SAndroid Build Coastguard Worker ret = xmlStrndup(start, cur - start);
357*7c568831SAndroid Build Coastguard Worker if (ret == NULL)
358*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
359*7c568831SAndroid Build Coastguard Worker return(ret);
360*7c568831SAndroid Build Coastguard Worker }
361*7c568831SAndroid Build Coastguard Worker
362*7c568831SAndroid Build Coastguard Worker static int
htmlMaskMatch(htmlAsciiMask mask,unsigned c)363*7c568831SAndroid Build Coastguard Worker htmlMaskMatch(htmlAsciiMask mask, unsigned c) {
364*7c568831SAndroid Build Coastguard Worker if (c >= 64)
365*7c568831SAndroid Build Coastguard Worker return(0);
366*7c568831SAndroid Build Coastguard Worker return((mask[c/32] >> (c & 31)) & 1);
367*7c568831SAndroid Build Coastguard Worker }
368*7c568831SAndroid Build Coastguard Worker
369*7c568831SAndroid Build Coastguard Worker static int
htmlValidateUtf8(xmlParserCtxtPtr ctxt,const xmlChar * str,size_t len)370*7c568831SAndroid Build Coastguard Worker htmlValidateUtf8(xmlParserCtxtPtr ctxt, const xmlChar *str, size_t len) {
371*7c568831SAndroid Build Coastguard Worker unsigned c = str[0];
372*7c568831SAndroid Build Coastguard Worker int size;
373*7c568831SAndroid Build Coastguard Worker
374*7c568831SAndroid Build Coastguard Worker if (c < 0xC2) {
375*7c568831SAndroid Build Coastguard Worker goto invalid;
376*7c568831SAndroid Build Coastguard Worker } else if (c < 0xE0) {
377*7c568831SAndroid Build Coastguard Worker if (len < 2)
378*7c568831SAndroid Build Coastguard Worker goto incomplete;
379*7c568831SAndroid Build Coastguard Worker if ((str[1] & 0xC0) != 0x80)
380*7c568831SAndroid Build Coastguard Worker goto invalid;
381*7c568831SAndroid Build Coastguard Worker size = 2;
382*7c568831SAndroid Build Coastguard Worker } else if (c < 0xF0) {
383*7c568831SAndroid Build Coastguard Worker unsigned v;
384*7c568831SAndroid Build Coastguard Worker
385*7c568831SAndroid Build Coastguard Worker if (len < 3)
386*7c568831SAndroid Build Coastguard Worker goto incomplete;
387*7c568831SAndroid Build Coastguard Worker
388*7c568831SAndroid Build Coastguard Worker v = str[1] << 8 | str[2]; /* hint to generate 16-bit load */
389*7c568831SAndroid Build Coastguard Worker v |= c << 16;
390*7c568831SAndroid Build Coastguard Worker
391*7c568831SAndroid Build Coastguard Worker if (((v & 0x00C0C0) != 0x008080) ||
392*7c568831SAndroid Build Coastguard Worker ((v & 0x0F2000) == 0x000000) ||
393*7c568831SAndroid Build Coastguard Worker ((v & 0x0F2000) == 0x0D2000))
394*7c568831SAndroid Build Coastguard Worker goto invalid;
395*7c568831SAndroid Build Coastguard Worker
396*7c568831SAndroid Build Coastguard Worker size = 3;
397*7c568831SAndroid Build Coastguard Worker } else {
398*7c568831SAndroid Build Coastguard Worker unsigned v;
399*7c568831SAndroid Build Coastguard Worker
400*7c568831SAndroid Build Coastguard Worker if (len < 4)
401*7c568831SAndroid Build Coastguard Worker goto incomplete;
402*7c568831SAndroid Build Coastguard Worker
403*7c568831SAndroid Build Coastguard Worker v = c << 24 | str[1] << 16 | str[2] << 8 | str[3];
404*7c568831SAndroid Build Coastguard Worker
405*7c568831SAndroid Build Coastguard Worker if (((v & 0x00C0C0C0) != 0x00808080) ||
406*7c568831SAndroid Build Coastguard Worker (v < 0xF0900000) || (v >= 0xF4900000))
407*7c568831SAndroid Build Coastguard Worker goto invalid;
408*7c568831SAndroid Build Coastguard Worker
409*7c568831SAndroid Build Coastguard Worker size = 4;
410*7c568831SAndroid Build Coastguard Worker }
411*7c568831SAndroid Build Coastguard Worker
412*7c568831SAndroid Build Coastguard Worker return(size);
413*7c568831SAndroid Build Coastguard Worker
414*7c568831SAndroid Build Coastguard Worker incomplete:
415*7c568831SAndroid Build Coastguard Worker return(0);
416*7c568831SAndroid Build Coastguard Worker
417*7c568831SAndroid Build Coastguard Worker invalid:
418*7c568831SAndroid Build Coastguard Worker /* Only report the first error */
419*7c568831SAndroid Build Coastguard Worker if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
420*7c568831SAndroid Build Coastguard Worker htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
421*7c568831SAndroid Build Coastguard Worker "Invalid bytes in character encoding", NULL, NULL);
422*7c568831SAndroid Build Coastguard Worker ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
423*7c568831SAndroid Build Coastguard Worker }
424*7c568831SAndroid Build Coastguard Worker
425*7c568831SAndroid Build Coastguard Worker return(-1);
426*7c568831SAndroid Build Coastguard Worker }
427*7c568831SAndroid Build Coastguard Worker
428*7c568831SAndroid Build Coastguard Worker /**
429*7c568831SAndroid Build Coastguard Worker * htmlSkipBlankChars:
430*7c568831SAndroid Build Coastguard Worker * @ctxt: the HTML parser context
431*7c568831SAndroid Build Coastguard Worker *
432*7c568831SAndroid Build Coastguard Worker * skip all blanks character found at that point in the input streams.
433*7c568831SAndroid Build Coastguard Worker *
434*7c568831SAndroid Build Coastguard Worker * Returns the number of space chars skipped
435*7c568831SAndroid Build Coastguard Worker */
436*7c568831SAndroid Build Coastguard Worker
437*7c568831SAndroid Build Coastguard Worker static int
htmlSkipBlankChars(xmlParserCtxtPtr ctxt)438*7c568831SAndroid Build Coastguard Worker htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
439*7c568831SAndroid Build Coastguard Worker const xmlChar *cur = ctxt->input->cur;
440*7c568831SAndroid Build Coastguard Worker size_t avail = ctxt->input->end - cur;
441*7c568831SAndroid Build Coastguard Worker int res = 0;
442*7c568831SAndroid Build Coastguard Worker int line = ctxt->input->line;
443*7c568831SAndroid Build Coastguard Worker int col = ctxt->input->col;
444*7c568831SAndroid Build Coastguard Worker
445*7c568831SAndroid Build Coastguard Worker while (!PARSER_STOPPED(ctxt)) {
446*7c568831SAndroid Build Coastguard Worker if (avail == 0) {
447*7c568831SAndroid Build Coastguard Worker ctxt->input->cur = cur;
448*7c568831SAndroid Build Coastguard Worker GROW;
449*7c568831SAndroid Build Coastguard Worker cur = ctxt->input->cur;
450*7c568831SAndroid Build Coastguard Worker avail = ctxt->input->end - cur;
451*7c568831SAndroid Build Coastguard Worker
452*7c568831SAndroid Build Coastguard Worker if (avail == 0)
453*7c568831SAndroid Build Coastguard Worker break;
454*7c568831SAndroid Build Coastguard Worker }
455*7c568831SAndroid Build Coastguard Worker
456*7c568831SAndroid Build Coastguard Worker if (*cur == '\n') {
457*7c568831SAndroid Build Coastguard Worker line++;
458*7c568831SAndroid Build Coastguard Worker col = 1;
459*7c568831SAndroid Build Coastguard Worker } else if (IS_WS_HTML(*cur)) {
460*7c568831SAndroid Build Coastguard Worker col++;
461*7c568831SAndroid Build Coastguard Worker } else {
462*7c568831SAndroid Build Coastguard Worker break;
463*7c568831SAndroid Build Coastguard Worker }
464*7c568831SAndroid Build Coastguard Worker
465*7c568831SAndroid Build Coastguard Worker cur += 1;
466*7c568831SAndroid Build Coastguard Worker avail -= 1;
467*7c568831SAndroid Build Coastguard Worker
468*7c568831SAndroid Build Coastguard Worker if (res < INT_MAX)
469*7c568831SAndroid Build Coastguard Worker res++;
470*7c568831SAndroid Build Coastguard Worker }
471*7c568831SAndroid Build Coastguard Worker
472*7c568831SAndroid Build Coastguard Worker ctxt->input->cur = cur;
473*7c568831SAndroid Build Coastguard Worker ctxt->input->line = line;
474*7c568831SAndroid Build Coastguard Worker ctxt->input->col = col;
475*7c568831SAndroid Build Coastguard Worker
476*7c568831SAndroid Build Coastguard Worker if (res > 8)
477*7c568831SAndroid Build Coastguard Worker GROW;
478*7c568831SAndroid Build Coastguard Worker
479*7c568831SAndroid Build Coastguard Worker return(res);
480*7c568831SAndroid Build Coastguard Worker }
481*7c568831SAndroid Build Coastguard Worker
482*7c568831SAndroid Build Coastguard Worker
483*7c568831SAndroid Build Coastguard Worker
484*7c568831SAndroid Build Coastguard Worker /************************************************************************
485*7c568831SAndroid Build Coastguard Worker * *
486*7c568831SAndroid Build Coastguard Worker * The list of HTML elements and their properties *
487*7c568831SAndroid Build Coastguard Worker * *
488*7c568831SAndroid Build Coastguard Worker ************************************************************************/
489*7c568831SAndroid Build Coastguard Worker
490*7c568831SAndroid Build Coastguard Worker /*
491*7c568831SAndroid Build Coastguard Worker * Start Tag: 1 means the start tag can be omitted
492*7c568831SAndroid Build Coastguard Worker * End Tag: 1 means the end tag can be omitted
493*7c568831SAndroid Build Coastguard Worker * 2 means it's forbidden (empty elements)
494*7c568831SAndroid Build Coastguard Worker * 3 means the tag is stylistic and should be closed easily
495*7c568831SAndroid Build Coastguard Worker * Depr: this element is deprecated
496*7c568831SAndroid Build Coastguard Worker * DTD: 1 means that this element is valid only in the Loose DTD
497*7c568831SAndroid Build Coastguard Worker * 2 means that this element is valid only in the Frameset DTD
498*7c568831SAndroid Build Coastguard Worker *
499*7c568831SAndroid Build Coastguard Worker * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description
500*7c568831SAndroid Build Coastguard Worker */
501*7c568831SAndroid Build Coastguard Worker
502*7c568831SAndroid Build Coastguard Worker #define DATA_RCDATA 1
503*7c568831SAndroid Build Coastguard Worker #define DATA_RAWTEXT 2
504*7c568831SAndroid Build Coastguard Worker #define DATA_PLAINTEXT 3
505*7c568831SAndroid Build Coastguard Worker #define DATA_SCRIPT 4
506*7c568831SAndroid Build Coastguard Worker #define DATA_SCRIPT_ESC1 5
507*7c568831SAndroid Build Coastguard Worker #define DATA_SCRIPT_ESC2 6
508*7c568831SAndroid Build Coastguard Worker
509*7c568831SAndroid Build Coastguard Worker static const htmlElemDesc
510*7c568831SAndroid Build Coastguard Worker html40ElementTable[] = {
511*7c568831SAndroid Build Coastguard Worker { "a", 0, 0, 0, 0, 0, 0, 1, "anchor ",
512*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
513*7c568831SAndroid Build Coastguard Worker 0
514*7c568831SAndroid Build Coastguard Worker },
515*7c568831SAndroid Build Coastguard Worker { "abbr", 0, 0, 0, 0, 0, 0, 1, "abbreviated form",
516*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
517*7c568831SAndroid Build Coastguard Worker 0
518*7c568831SAndroid Build Coastguard Worker },
519*7c568831SAndroid Build Coastguard Worker { "acronym", 0, 0, 0, 0, 0, 0, 1, "",
520*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
521*7c568831SAndroid Build Coastguard Worker 0
522*7c568831SAndroid Build Coastguard Worker },
523*7c568831SAndroid Build Coastguard Worker { "address", 0, 0, 0, 0, 0, 0, 0, "information on author ",
524*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
525*7c568831SAndroid Build Coastguard Worker 0
526*7c568831SAndroid Build Coastguard Worker },
527*7c568831SAndroid Build Coastguard Worker { "applet", 0, 0, 0, 0, 1, 1, 2, "java applet ",
528*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
529*7c568831SAndroid Build Coastguard Worker 0
530*7c568831SAndroid Build Coastguard Worker },
531*7c568831SAndroid Build Coastguard Worker { "area", 0, 2, 2, 1, 0, 0, 0, "client-side image map area ",
532*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
533*7c568831SAndroid Build Coastguard Worker 0
534*7c568831SAndroid Build Coastguard Worker },
535*7c568831SAndroid Build Coastguard Worker { "b", 0, 3, 0, 0, 0, 0, 1, "bold text style",
536*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
537*7c568831SAndroid Build Coastguard Worker 0
538*7c568831SAndroid Build Coastguard Worker },
539*7c568831SAndroid Build Coastguard Worker { "base", 0, 2, 2, 1, 0, 0, 0, "document base uri ",
540*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
541*7c568831SAndroid Build Coastguard Worker 0
542*7c568831SAndroid Build Coastguard Worker },
543*7c568831SAndroid Build Coastguard Worker { "basefont", 0, 2, 2, 1, 1, 1, 1, "base font size " ,
544*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
545*7c568831SAndroid Build Coastguard Worker 0
546*7c568831SAndroid Build Coastguard Worker },
547*7c568831SAndroid Build Coastguard Worker { "bdo", 0, 0, 0, 0, 0, 0, 1, "i18n bidi over-ride ",
548*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
549*7c568831SAndroid Build Coastguard Worker 0
550*7c568831SAndroid Build Coastguard Worker },
551*7c568831SAndroid Build Coastguard Worker { "big", 0, 3, 0, 0, 0, 0, 1, "large text style",
552*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
553*7c568831SAndroid Build Coastguard Worker 0
554*7c568831SAndroid Build Coastguard Worker },
555*7c568831SAndroid Build Coastguard Worker { "blockquote", 0, 0, 0, 0, 0, 0, 0, "long quotation ",
556*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
557*7c568831SAndroid Build Coastguard Worker 0
558*7c568831SAndroid Build Coastguard Worker },
559*7c568831SAndroid Build Coastguard Worker { "body", 1, 1, 0, 0, 0, 0, 0, "document body ",
560*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
561*7c568831SAndroid Build Coastguard Worker 0
562*7c568831SAndroid Build Coastguard Worker },
563*7c568831SAndroid Build Coastguard Worker { "br", 0, 2, 2, 1, 0, 0, 1, "forced line break ",
564*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
565*7c568831SAndroid Build Coastguard Worker 0
566*7c568831SAndroid Build Coastguard Worker },
567*7c568831SAndroid Build Coastguard Worker { "button", 0, 0, 0, 0, 0, 0, 2, "push button ",
568*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
569*7c568831SAndroid Build Coastguard Worker 0
570*7c568831SAndroid Build Coastguard Worker },
571*7c568831SAndroid Build Coastguard Worker { "caption", 0, 0, 0, 0, 0, 0, 0, "table caption ",
572*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
573*7c568831SAndroid Build Coastguard Worker 0
574*7c568831SAndroid Build Coastguard Worker },
575*7c568831SAndroid Build Coastguard Worker { "center", 0, 3, 0, 0, 1, 1, 0, "shorthand for div align=center ",
576*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
577*7c568831SAndroid Build Coastguard Worker 0
578*7c568831SAndroid Build Coastguard Worker },
579*7c568831SAndroid Build Coastguard Worker { "cite", 0, 0, 0, 0, 0, 0, 1, "citation",
580*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
581*7c568831SAndroid Build Coastguard Worker 0
582*7c568831SAndroid Build Coastguard Worker },
583*7c568831SAndroid Build Coastguard Worker { "code", 0, 0, 0, 0, 0, 0, 1, "computer code fragment",
584*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
585*7c568831SAndroid Build Coastguard Worker 0
586*7c568831SAndroid Build Coastguard Worker },
587*7c568831SAndroid Build Coastguard Worker { "col", 0, 2, 2, 1, 0, 0, 0, "table column ",
588*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
589*7c568831SAndroid Build Coastguard Worker 0
590*7c568831SAndroid Build Coastguard Worker },
591*7c568831SAndroid Build Coastguard Worker { "colgroup", 0, 1, 0, 0, 0, 0, 0, "table column group ",
592*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
593*7c568831SAndroid Build Coastguard Worker 0
594*7c568831SAndroid Build Coastguard Worker },
595*7c568831SAndroid Build Coastguard Worker { "dd", 0, 1, 0, 0, 0, 0, 0, "definition description ",
596*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
597*7c568831SAndroid Build Coastguard Worker 0
598*7c568831SAndroid Build Coastguard Worker },
599*7c568831SAndroid Build Coastguard Worker { "del", 0, 0, 0, 0, 0, 0, 2, "deleted text ",
600*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
601*7c568831SAndroid Build Coastguard Worker 0
602*7c568831SAndroid Build Coastguard Worker },
603*7c568831SAndroid Build Coastguard Worker { "dfn", 0, 0, 0, 0, 0, 0, 1, "instance definition",
604*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
605*7c568831SAndroid Build Coastguard Worker 0
606*7c568831SAndroid Build Coastguard Worker },
607*7c568831SAndroid Build Coastguard Worker { "dir", 0, 0, 0, 0, 1, 1, 0, "directory list",
608*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
609*7c568831SAndroid Build Coastguard Worker 0
610*7c568831SAndroid Build Coastguard Worker },
611*7c568831SAndroid Build Coastguard Worker { "div", 0, 0, 0, 0, 0, 0, 0, "generic language/style container",
612*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
613*7c568831SAndroid Build Coastguard Worker 0
614*7c568831SAndroid Build Coastguard Worker },
615*7c568831SAndroid Build Coastguard Worker { "dl", 0, 0, 0, 0, 0, 0, 0, "definition list ",
616*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
617*7c568831SAndroid Build Coastguard Worker 0
618*7c568831SAndroid Build Coastguard Worker },
619*7c568831SAndroid Build Coastguard Worker { "dt", 0, 1, 0, 0, 0, 0, 0, "definition term ",
620*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
621*7c568831SAndroid Build Coastguard Worker 0
622*7c568831SAndroid Build Coastguard Worker },
623*7c568831SAndroid Build Coastguard Worker { "em", 0, 3, 0, 0, 0, 0, 1, "emphasis",
624*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
625*7c568831SAndroid Build Coastguard Worker 0
626*7c568831SAndroid Build Coastguard Worker },
627*7c568831SAndroid Build Coastguard Worker { "embed", 0, 1, 0, 0, 1, 1, 1, "generic embedded object ",
628*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
629*7c568831SAndroid Build Coastguard Worker 0
630*7c568831SAndroid Build Coastguard Worker },
631*7c568831SAndroid Build Coastguard Worker { "fieldset", 0, 0, 0, 0, 0, 0, 0, "form control group ",
632*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
633*7c568831SAndroid Build Coastguard Worker 0
634*7c568831SAndroid Build Coastguard Worker },
635*7c568831SAndroid Build Coastguard Worker { "font", 0, 3, 0, 0, 1, 1, 1, "local change to font ",
636*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
637*7c568831SAndroid Build Coastguard Worker 0
638*7c568831SAndroid Build Coastguard Worker },
639*7c568831SAndroid Build Coastguard Worker { "form", 0, 0, 0, 0, 0, 0, 0, "interactive form ",
640*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
641*7c568831SAndroid Build Coastguard Worker 0
642*7c568831SAndroid Build Coastguard Worker },
643*7c568831SAndroid Build Coastguard Worker { "frame", 0, 2, 2, 1, 0, 2, 0, "subwindow " ,
644*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
645*7c568831SAndroid Build Coastguard Worker 0
646*7c568831SAndroid Build Coastguard Worker },
647*7c568831SAndroid Build Coastguard Worker { "frameset", 0, 0, 0, 0, 0, 2, 0, "window subdivision" ,
648*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
649*7c568831SAndroid Build Coastguard Worker 0
650*7c568831SAndroid Build Coastguard Worker },
651*7c568831SAndroid Build Coastguard Worker { "h1", 0, 0, 0, 0, 0, 0, 0, "heading ",
652*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
653*7c568831SAndroid Build Coastguard Worker 0
654*7c568831SAndroid Build Coastguard Worker },
655*7c568831SAndroid Build Coastguard Worker { "h2", 0, 0, 0, 0, 0, 0, 0, "heading ",
656*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
657*7c568831SAndroid Build Coastguard Worker 0
658*7c568831SAndroid Build Coastguard Worker },
659*7c568831SAndroid Build Coastguard Worker { "h3", 0, 0, 0, 0, 0, 0, 0, "heading ",
660*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
661*7c568831SAndroid Build Coastguard Worker 0
662*7c568831SAndroid Build Coastguard Worker },
663*7c568831SAndroid Build Coastguard Worker { "h4", 0, 0, 0, 0, 0, 0, 0, "heading ",
664*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
665*7c568831SAndroid Build Coastguard Worker 0
666*7c568831SAndroid Build Coastguard Worker },
667*7c568831SAndroid Build Coastguard Worker { "h5", 0, 0, 0, 0, 0, 0, 0, "heading ",
668*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
669*7c568831SAndroid Build Coastguard Worker 0
670*7c568831SAndroid Build Coastguard Worker },
671*7c568831SAndroid Build Coastguard Worker { "h6", 0, 0, 0, 0, 0, 0, 0, "heading ",
672*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
673*7c568831SAndroid Build Coastguard Worker 0
674*7c568831SAndroid Build Coastguard Worker },
675*7c568831SAndroid Build Coastguard Worker { "head", 1, 1, 0, 0, 0, 0, 0, "document head ",
676*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
677*7c568831SAndroid Build Coastguard Worker 0
678*7c568831SAndroid Build Coastguard Worker },
679*7c568831SAndroid Build Coastguard Worker { "hr", 0, 2, 2, 1, 0, 0, 0, "horizontal rule " ,
680*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
681*7c568831SAndroid Build Coastguard Worker 0
682*7c568831SAndroid Build Coastguard Worker },
683*7c568831SAndroid Build Coastguard Worker { "html", 1, 1, 0, 0, 0, 0, 0, "document root element ",
684*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
685*7c568831SAndroid Build Coastguard Worker 0
686*7c568831SAndroid Build Coastguard Worker },
687*7c568831SAndroid Build Coastguard Worker { "i", 0, 3, 0, 0, 0, 0, 1, "italic text style",
688*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
689*7c568831SAndroid Build Coastguard Worker 0
690*7c568831SAndroid Build Coastguard Worker },
691*7c568831SAndroid Build Coastguard Worker { "iframe", 0, 0, 0, 0, 0, 1, 2, "inline subwindow ",
692*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
693*7c568831SAndroid Build Coastguard Worker DATA_RAWTEXT
694*7c568831SAndroid Build Coastguard Worker },
695*7c568831SAndroid Build Coastguard Worker { "img", 0, 2, 2, 1, 0, 0, 1, "embedded image ",
696*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
697*7c568831SAndroid Build Coastguard Worker 0
698*7c568831SAndroid Build Coastguard Worker },
699*7c568831SAndroid Build Coastguard Worker { "input", 0, 2, 2, 1, 0, 0, 1, "form control ",
700*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
701*7c568831SAndroid Build Coastguard Worker 0
702*7c568831SAndroid Build Coastguard Worker },
703*7c568831SAndroid Build Coastguard Worker { "ins", 0, 0, 0, 0, 0, 0, 2, "inserted text",
704*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
705*7c568831SAndroid Build Coastguard Worker 0
706*7c568831SAndroid Build Coastguard Worker },
707*7c568831SAndroid Build Coastguard Worker { "isindex", 0, 2, 2, 1, 1, 1, 0, "single line prompt ",
708*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
709*7c568831SAndroid Build Coastguard Worker 0
710*7c568831SAndroid Build Coastguard Worker },
711*7c568831SAndroid Build Coastguard Worker { "kbd", 0, 0, 0, 0, 0, 0, 1, "text to be entered by the user",
712*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
713*7c568831SAndroid Build Coastguard Worker 0
714*7c568831SAndroid Build Coastguard Worker },
715*7c568831SAndroid Build Coastguard Worker { "label", 0, 0, 0, 0, 0, 0, 1, "form field label text ",
716*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
717*7c568831SAndroid Build Coastguard Worker 0
718*7c568831SAndroid Build Coastguard Worker },
719*7c568831SAndroid Build Coastguard Worker { "legend", 0, 0, 0, 0, 0, 0, 0, "fieldset legend ",
720*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
721*7c568831SAndroid Build Coastguard Worker 0
722*7c568831SAndroid Build Coastguard Worker },
723*7c568831SAndroid Build Coastguard Worker { "li", 0, 1, 1, 0, 0, 0, 0, "list item ",
724*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
725*7c568831SAndroid Build Coastguard Worker 0
726*7c568831SAndroid Build Coastguard Worker },
727*7c568831SAndroid Build Coastguard Worker { "link", 0, 2, 2, 1, 0, 0, 0, "a media-independent link ",
728*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
729*7c568831SAndroid Build Coastguard Worker 0
730*7c568831SAndroid Build Coastguard Worker },
731*7c568831SAndroid Build Coastguard Worker { "map", 0, 0, 0, 0, 0, 0, 2, "client-side image map ",
732*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
733*7c568831SAndroid Build Coastguard Worker 0
734*7c568831SAndroid Build Coastguard Worker },
735*7c568831SAndroid Build Coastguard Worker { "menu", 0, 0, 0, 0, 1, 1, 0, "menu list ",
736*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
737*7c568831SAndroid Build Coastguard Worker 0
738*7c568831SAndroid Build Coastguard Worker },
739*7c568831SAndroid Build Coastguard Worker { "meta", 0, 2, 2, 1, 0, 0, 0, "generic metainformation ",
740*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
741*7c568831SAndroid Build Coastguard Worker 0
742*7c568831SAndroid Build Coastguard Worker },
743*7c568831SAndroid Build Coastguard Worker { "noembed", 0, 0, 0, 0, 0, 0, 0, "",
744*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
745*7c568831SAndroid Build Coastguard Worker DATA_RAWTEXT
746*7c568831SAndroid Build Coastguard Worker },
747*7c568831SAndroid Build Coastguard Worker { "noframes", 0, 0, 0, 0, 0, 2, 0, "alternate content container for non frame-based rendering ",
748*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
749*7c568831SAndroid Build Coastguard Worker DATA_RAWTEXT
750*7c568831SAndroid Build Coastguard Worker },
751*7c568831SAndroid Build Coastguard Worker { "noscript", 0, 0, 0, 0, 0, 0, 0, "alternate content container for non script-based rendering ",
752*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
753*7c568831SAndroid Build Coastguard Worker 0
754*7c568831SAndroid Build Coastguard Worker },
755*7c568831SAndroid Build Coastguard Worker { "object", 0, 0, 0, 0, 0, 0, 2, "generic embedded object ",
756*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
757*7c568831SAndroid Build Coastguard Worker 0
758*7c568831SAndroid Build Coastguard Worker },
759*7c568831SAndroid Build Coastguard Worker { "ol", 0, 0, 0, 0, 0, 0, 0, "ordered list ",
760*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
761*7c568831SAndroid Build Coastguard Worker 0
762*7c568831SAndroid Build Coastguard Worker },
763*7c568831SAndroid Build Coastguard Worker { "optgroup", 0, 0, 0, 0, 0, 0, 0, "option group ",
764*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
765*7c568831SAndroid Build Coastguard Worker 0
766*7c568831SAndroid Build Coastguard Worker },
767*7c568831SAndroid Build Coastguard Worker { "option", 0, 1, 0, 0, 0, 0, 0, "selectable choice " ,
768*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
769*7c568831SAndroid Build Coastguard Worker 0
770*7c568831SAndroid Build Coastguard Worker },
771*7c568831SAndroid Build Coastguard Worker { "p", 0, 1, 0, 0, 0, 0, 0, "paragraph ",
772*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
773*7c568831SAndroid Build Coastguard Worker 0
774*7c568831SAndroid Build Coastguard Worker },
775*7c568831SAndroid Build Coastguard Worker { "param", 0, 2, 2, 1, 0, 0, 0, "named property value ",
776*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
777*7c568831SAndroid Build Coastguard Worker 0
778*7c568831SAndroid Build Coastguard Worker },
779*7c568831SAndroid Build Coastguard Worker { "plaintext", 0, 0, 0, 0, 0, 0, 0, "",
780*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
781*7c568831SAndroid Build Coastguard Worker DATA_PLAINTEXT
782*7c568831SAndroid Build Coastguard Worker },
783*7c568831SAndroid Build Coastguard Worker { "pre", 0, 0, 0, 0, 0, 0, 0, "preformatted text ",
784*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
785*7c568831SAndroid Build Coastguard Worker 0
786*7c568831SAndroid Build Coastguard Worker },
787*7c568831SAndroid Build Coastguard Worker { "q", 0, 0, 0, 0, 0, 0, 1, "short inline quotation ",
788*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
789*7c568831SAndroid Build Coastguard Worker 0
790*7c568831SAndroid Build Coastguard Worker },
791*7c568831SAndroid Build Coastguard Worker { "s", 0, 3, 0, 0, 1, 1, 1, "strike-through text style",
792*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
793*7c568831SAndroid Build Coastguard Worker 0
794*7c568831SAndroid Build Coastguard Worker },
795*7c568831SAndroid Build Coastguard Worker { "samp", 0, 0, 0, 0, 0, 0, 1, "sample program output, scripts, etc.",
796*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
797*7c568831SAndroid Build Coastguard Worker 0
798*7c568831SAndroid Build Coastguard Worker },
799*7c568831SAndroid Build Coastguard Worker { "script", 0, 0, 0, 0, 0, 0, 2, "script statements ",
800*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
801*7c568831SAndroid Build Coastguard Worker DATA_SCRIPT
802*7c568831SAndroid Build Coastguard Worker },
803*7c568831SAndroid Build Coastguard Worker { "select", 0, 0, 0, 0, 0, 0, 1, "option selector ",
804*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
805*7c568831SAndroid Build Coastguard Worker 0
806*7c568831SAndroid Build Coastguard Worker },
807*7c568831SAndroid Build Coastguard Worker { "small", 0, 3, 0, 0, 0, 0, 1, "small text style",
808*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
809*7c568831SAndroid Build Coastguard Worker 0
810*7c568831SAndroid Build Coastguard Worker },
811*7c568831SAndroid Build Coastguard Worker { "span", 0, 0, 0, 0, 0, 0, 1, "generic language/style container ",
812*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
813*7c568831SAndroid Build Coastguard Worker 0
814*7c568831SAndroid Build Coastguard Worker },
815*7c568831SAndroid Build Coastguard Worker { "strike", 0, 3, 0, 0, 1, 1, 1, "strike-through text",
816*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
817*7c568831SAndroid Build Coastguard Worker 0
818*7c568831SAndroid Build Coastguard Worker },
819*7c568831SAndroid Build Coastguard Worker { "strong", 0, 3, 0, 0, 0, 0, 1, "strong emphasis",
820*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
821*7c568831SAndroid Build Coastguard Worker 0
822*7c568831SAndroid Build Coastguard Worker },
823*7c568831SAndroid Build Coastguard Worker { "style", 0, 0, 0, 0, 0, 0, 0, "style info ",
824*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
825*7c568831SAndroid Build Coastguard Worker DATA_RAWTEXT
826*7c568831SAndroid Build Coastguard Worker },
827*7c568831SAndroid Build Coastguard Worker { "sub", 0, 3, 0, 0, 0, 0, 1, "subscript",
828*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
829*7c568831SAndroid Build Coastguard Worker 0
830*7c568831SAndroid Build Coastguard Worker },
831*7c568831SAndroid Build Coastguard Worker { "sup", 0, 3, 0, 0, 0, 0, 1, "superscript ",
832*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
833*7c568831SAndroid Build Coastguard Worker 0
834*7c568831SAndroid Build Coastguard Worker },
835*7c568831SAndroid Build Coastguard Worker { "table", 0, 0, 0, 0, 0, 0, 0, "",
836*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
837*7c568831SAndroid Build Coastguard Worker 0
838*7c568831SAndroid Build Coastguard Worker },
839*7c568831SAndroid Build Coastguard Worker { "tbody", 1, 0, 0, 0, 0, 0, 0, "table body ",
840*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
841*7c568831SAndroid Build Coastguard Worker 0
842*7c568831SAndroid Build Coastguard Worker },
843*7c568831SAndroid Build Coastguard Worker { "td", 0, 0, 0, 0, 0, 0, 0, "table data cell",
844*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
845*7c568831SAndroid Build Coastguard Worker 0
846*7c568831SAndroid Build Coastguard Worker },
847*7c568831SAndroid Build Coastguard Worker { "textarea", 0, 0, 0, 0, 0, 0, 1, "multi-line text field ",
848*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
849*7c568831SAndroid Build Coastguard Worker DATA_RCDATA
850*7c568831SAndroid Build Coastguard Worker },
851*7c568831SAndroid Build Coastguard Worker { "tfoot", 0, 1, 0, 0, 0, 0, 0, "table footer ",
852*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
853*7c568831SAndroid Build Coastguard Worker 0
854*7c568831SAndroid Build Coastguard Worker },
855*7c568831SAndroid Build Coastguard Worker { "th", 0, 1, 0, 0, 0, 0, 0, "table header cell",
856*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
857*7c568831SAndroid Build Coastguard Worker 0
858*7c568831SAndroid Build Coastguard Worker },
859*7c568831SAndroid Build Coastguard Worker { "thead", 0, 1, 0, 0, 0, 0, 0, "table header ",
860*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
861*7c568831SAndroid Build Coastguard Worker 0
862*7c568831SAndroid Build Coastguard Worker },
863*7c568831SAndroid Build Coastguard Worker { "title", 0, 0, 0, 0, 0, 0, 0, "document title ",
864*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
865*7c568831SAndroid Build Coastguard Worker DATA_RCDATA
866*7c568831SAndroid Build Coastguard Worker },
867*7c568831SAndroid Build Coastguard Worker { "tr", 0, 0, 0, 0, 0, 0, 0, "table row ",
868*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
869*7c568831SAndroid Build Coastguard Worker 0
870*7c568831SAndroid Build Coastguard Worker },
871*7c568831SAndroid Build Coastguard Worker { "tt", 0, 3, 0, 0, 0, 0, 1, "teletype or monospaced text style",
872*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
873*7c568831SAndroid Build Coastguard Worker 0
874*7c568831SAndroid Build Coastguard Worker },
875*7c568831SAndroid Build Coastguard Worker { "u", 0, 3, 0, 0, 1, 1, 1, "underlined text style",
876*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
877*7c568831SAndroid Build Coastguard Worker 0
878*7c568831SAndroid Build Coastguard Worker },
879*7c568831SAndroid Build Coastguard Worker { "ul", 0, 0, 0, 0, 0, 0, 0, "unordered list ",
880*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
881*7c568831SAndroid Build Coastguard Worker 0
882*7c568831SAndroid Build Coastguard Worker },
883*7c568831SAndroid Build Coastguard Worker { "var", 0, 0, 0, 0, 0, 0, 1, "instance of a variable or program argument",
884*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
885*7c568831SAndroid Build Coastguard Worker 0
886*7c568831SAndroid Build Coastguard Worker },
887*7c568831SAndroid Build Coastguard Worker { "xmp", 0, 0, 0, 0, 0, 0, 1, "",
888*7c568831SAndroid Build Coastguard Worker NULL, NULL, NULL, NULL, NULL,
889*7c568831SAndroid Build Coastguard Worker DATA_RAWTEXT
890*7c568831SAndroid Build Coastguard Worker }
891*7c568831SAndroid Build Coastguard Worker };
892*7c568831SAndroid Build Coastguard Worker
893*7c568831SAndroid Build Coastguard Worker typedef struct {
894*7c568831SAndroid Build Coastguard Worker const char *oldTag;
895*7c568831SAndroid Build Coastguard Worker const char *newTag;
896*7c568831SAndroid Build Coastguard Worker } htmlStartCloseEntry;
897*7c568831SAndroid Build Coastguard Worker
898*7c568831SAndroid Build Coastguard Worker /*
899*7c568831SAndroid Build Coastguard Worker * start tags that imply the end of current element
900*7c568831SAndroid Build Coastguard Worker */
901*7c568831SAndroid Build Coastguard Worker static const htmlStartCloseEntry htmlStartClose[] = {
902*7c568831SAndroid Build Coastguard Worker { "a", "a" },
903*7c568831SAndroid Build Coastguard Worker { "a", "fieldset" },
904*7c568831SAndroid Build Coastguard Worker { "a", "table" },
905*7c568831SAndroid Build Coastguard Worker { "a", "td" },
906*7c568831SAndroid Build Coastguard Worker { "a", "th" },
907*7c568831SAndroid Build Coastguard Worker { "address", "dd" },
908*7c568831SAndroid Build Coastguard Worker { "address", "dl" },
909*7c568831SAndroid Build Coastguard Worker { "address", "dt" },
910*7c568831SAndroid Build Coastguard Worker { "address", "form" },
911*7c568831SAndroid Build Coastguard Worker { "address", "li" },
912*7c568831SAndroid Build Coastguard Worker { "address", "ul" },
913*7c568831SAndroid Build Coastguard Worker { "b", "center" },
914*7c568831SAndroid Build Coastguard Worker { "b", "p" },
915*7c568831SAndroid Build Coastguard Worker { "b", "td" },
916*7c568831SAndroid Build Coastguard Worker { "b", "th" },
917*7c568831SAndroid Build Coastguard Worker { "big", "p" },
918*7c568831SAndroid Build Coastguard Worker { "caption", "col" },
919*7c568831SAndroid Build Coastguard Worker { "caption", "colgroup" },
920*7c568831SAndroid Build Coastguard Worker { "caption", "tbody" },
921*7c568831SAndroid Build Coastguard Worker { "caption", "tfoot" },
922*7c568831SAndroid Build Coastguard Worker { "caption", "thead" },
923*7c568831SAndroid Build Coastguard Worker { "caption", "tr" },
924*7c568831SAndroid Build Coastguard Worker { "col", "col" },
925*7c568831SAndroid Build Coastguard Worker { "col", "colgroup" },
926*7c568831SAndroid Build Coastguard Worker { "col", "tbody" },
927*7c568831SAndroid Build Coastguard Worker { "col", "tfoot" },
928*7c568831SAndroid Build Coastguard Worker { "col", "thead" },
929*7c568831SAndroid Build Coastguard Worker { "col", "tr" },
930*7c568831SAndroid Build Coastguard Worker { "colgroup", "colgroup" },
931*7c568831SAndroid Build Coastguard Worker { "colgroup", "tbody" },
932*7c568831SAndroid Build Coastguard Worker { "colgroup", "tfoot" },
933*7c568831SAndroid Build Coastguard Worker { "colgroup", "thead" },
934*7c568831SAndroid Build Coastguard Worker { "colgroup", "tr" },
935*7c568831SAndroid Build Coastguard Worker { "dd", "dt" },
936*7c568831SAndroid Build Coastguard Worker { "dir", "dd" },
937*7c568831SAndroid Build Coastguard Worker { "dir", "dl" },
938*7c568831SAndroid Build Coastguard Worker { "dir", "dt" },
939*7c568831SAndroid Build Coastguard Worker { "dir", "form" },
940*7c568831SAndroid Build Coastguard Worker { "dir", "ul" },
941*7c568831SAndroid Build Coastguard Worker { "dl", "form" },
942*7c568831SAndroid Build Coastguard Worker { "dl", "li" },
943*7c568831SAndroid Build Coastguard Worker { "dt", "dd" },
944*7c568831SAndroid Build Coastguard Worker { "dt", "dl" },
945*7c568831SAndroid Build Coastguard Worker { "font", "center" },
946*7c568831SAndroid Build Coastguard Worker { "font", "td" },
947*7c568831SAndroid Build Coastguard Worker { "font", "th" },
948*7c568831SAndroid Build Coastguard Worker { "form", "form" },
949*7c568831SAndroid Build Coastguard Worker { "h1", "fieldset" },
950*7c568831SAndroid Build Coastguard Worker { "h1", "form" },
951*7c568831SAndroid Build Coastguard Worker { "h1", "li" },
952*7c568831SAndroid Build Coastguard Worker { "h1", "p" },
953*7c568831SAndroid Build Coastguard Worker { "h1", "table" },
954*7c568831SAndroid Build Coastguard Worker { "h2", "fieldset" },
955*7c568831SAndroid Build Coastguard Worker { "h2", "form" },
956*7c568831SAndroid Build Coastguard Worker { "h2", "li" },
957*7c568831SAndroid Build Coastguard Worker { "h2", "p" },
958*7c568831SAndroid Build Coastguard Worker { "h2", "table" },
959*7c568831SAndroid Build Coastguard Worker { "h3", "fieldset" },
960*7c568831SAndroid Build Coastguard Worker { "h3", "form" },
961*7c568831SAndroid Build Coastguard Worker { "h3", "li" },
962*7c568831SAndroid Build Coastguard Worker { "h3", "p" },
963*7c568831SAndroid Build Coastguard Worker { "h3", "table" },
964*7c568831SAndroid Build Coastguard Worker { "h4", "fieldset" },
965*7c568831SAndroid Build Coastguard Worker { "h4", "form" },
966*7c568831SAndroid Build Coastguard Worker { "h4", "li" },
967*7c568831SAndroid Build Coastguard Worker { "h4", "p" },
968*7c568831SAndroid Build Coastguard Worker { "h4", "table" },
969*7c568831SAndroid Build Coastguard Worker { "h5", "fieldset" },
970*7c568831SAndroid Build Coastguard Worker { "h5", "form" },
971*7c568831SAndroid Build Coastguard Worker { "h5", "li" },
972*7c568831SAndroid Build Coastguard Worker { "h5", "p" },
973*7c568831SAndroid Build Coastguard Worker { "h5", "table" },
974*7c568831SAndroid Build Coastguard Worker { "h6", "fieldset" },
975*7c568831SAndroid Build Coastguard Worker { "h6", "form" },
976*7c568831SAndroid Build Coastguard Worker { "h6", "li" },
977*7c568831SAndroid Build Coastguard Worker { "h6", "p" },
978*7c568831SAndroid Build Coastguard Worker { "h6", "table" },
979*7c568831SAndroid Build Coastguard Worker { "head", "a" },
980*7c568831SAndroid Build Coastguard Worker { "head", "abbr" },
981*7c568831SAndroid Build Coastguard Worker { "head", "acronym" },
982*7c568831SAndroid Build Coastguard Worker { "head", "address" },
983*7c568831SAndroid Build Coastguard Worker { "head", "b" },
984*7c568831SAndroid Build Coastguard Worker { "head", "bdo" },
985*7c568831SAndroid Build Coastguard Worker { "head", "big" },
986*7c568831SAndroid Build Coastguard Worker { "head", "blockquote" },
987*7c568831SAndroid Build Coastguard Worker { "head", "body" },
988*7c568831SAndroid Build Coastguard Worker { "head", "br" },
989*7c568831SAndroid Build Coastguard Worker { "head", "center" },
990*7c568831SAndroid Build Coastguard Worker { "head", "cite" },
991*7c568831SAndroid Build Coastguard Worker { "head", "code" },
992*7c568831SAndroid Build Coastguard Worker { "head", "dd" },
993*7c568831SAndroid Build Coastguard Worker { "head", "dfn" },
994*7c568831SAndroid Build Coastguard Worker { "head", "dir" },
995*7c568831SAndroid Build Coastguard Worker { "head", "div" },
996*7c568831SAndroid Build Coastguard Worker { "head", "dl" },
997*7c568831SAndroid Build Coastguard Worker { "head", "dt" },
998*7c568831SAndroid Build Coastguard Worker { "head", "em" },
999*7c568831SAndroid Build Coastguard Worker { "head", "fieldset" },
1000*7c568831SAndroid Build Coastguard Worker { "head", "font" },
1001*7c568831SAndroid Build Coastguard Worker { "head", "form" },
1002*7c568831SAndroid Build Coastguard Worker { "head", "frameset" },
1003*7c568831SAndroid Build Coastguard Worker { "head", "h1" },
1004*7c568831SAndroid Build Coastguard Worker { "head", "h2" },
1005*7c568831SAndroid Build Coastguard Worker { "head", "h3" },
1006*7c568831SAndroid Build Coastguard Worker { "head", "h4" },
1007*7c568831SAndroid Build Coastguard Worker { "head", "h5" },
1008*7c568831SAndroid Build Coastguard Worker { "head", "h6" },
1009*7c568831SAndroid Build Coastguard Worker { "head", "hr" },
1010*7c568831SAndroid Build Coastguard Worker { "head", "i" },
1011*7c568831SAndroid Build Coastguard Worker { "head", "iframe" },
1012*7c568831SAndroid Build Coastguard Worker { "head", "img" },
1013*7c568831SAndroid Build Coastguard Worker { "head", "kbd" },
1014*7c568831SAndroid Build Coastguard Worker { "head", "li" },
1015*7c568831SAndroid Build Coastguard Worker { "head", "listing" },
1016*7c568831SAndroid Build Coastguard Worker { "head", "map" },
1017*7c568831SAndroid Build Coastguard Worker { "head", "menu" },
1018*7c568831SAndroid Build Coastguard Worker { "head", "ol" },
1019*7c568831SAndroid Build Coastguard Worker { "head", "p" },
1020*7c568831SAndroid Build Coastguard Worker { "head", "pre" },
1021*7c568831SAndroid Build Coastguard Worker { "head", "q" },
1022*7c568831SAndroid Build Coastguard Worker { "head", "s" },
1023*7c568831SAndroid Build Coastguard Worker { "head", "samp" },
1024*7c568831SAndroid Build Coastguard Worker { "head", "small" },
1025*7c568831SAndroid Build Coastguard Worker { "head", "span" },
1026*7c568831SAndroid Build Coastguard Worker { "head", "strike" },
1027*7c568831SAndroid Build Coastguard Worker { "head", "strong" },
1028*7c568831SAndroid Build Coastguard Worker { "head", "sub" },
1029*7c568831SAndroid Build Coastguard Worker { "head", "sup" },
1030*7c568831SAndroid Build Coastguard Worker { "head", "table" },
1031*7c568831SAndroid Build Coastguard Worker { "head", "tt" },
1032*7c568831SAndroid Build Coastguard Worker { "head", "u" },
1033*7c568831SAndroid Build Coastguard Worker { "head", "ul" },
1034*7c568831SAndroid Build Coastguard Worker { "head", "var" },
1035*7c568831SAndroid Build Coastguard Worker { "head", "xmp" },
1036*7c568831SAndroid Build Coastguard Worker { "hr", "form" },
1037*7c568831SAndroid Build Coastguard Worker { "i", "center" },
1038*7c568831SAndroid Build Coastguard Worker { "i", "p" },
1039*7c568831SAndroid Build Coastguard Worker { "i", "td" },
1040*7c568831SAndroid Build Coastguard Worker { "i", "th" },
1041*7c568831SAndroid Build Coastguard Worker { "legend", "fieldset" },
1042*7c568831SAndroid Build Coastguard Worker { "li", "li" },
1043*7c568831SAndroid Build Coastguard Worker { "link", "body" },
1044*7c568831SAndroid Build Coastguard Worker { "link", "frameset" },
1045*7c568831SAndroid Build Coastguard Worker { "listing", "dd" },
1046*7c568831SAndroid Build Coastguard Worker { "listing", "dl" },
1047*7c568831SAndroid Build Coastguard Worker { "listing", "dt" },
1048*7c568831SAndroid Build Coastguard Worker { "listing", "fieldset" },
1049*7c568831SAndroid Build Coastguard Worker { "listing", "form" },
1050*7c568831SAndroid Build Coastguard Worker { "listing", "li" },
1051*7c568831SAndroid Build Coastguard Worker { "listing", "table" },
1052*7c568831SAndroid Build Coastguard Worker { "listing", "ul" },
1053*7c568831SAndroid Build Coastguard Worker { "menu", "dd" },
1054*7c568831SAndroid Build Coastguard Worker { "menu", "dl" },
1055*7c568831SAndroid Build Coastguard Worker { "menu", "dt" },
1056*7c568831SAndroid Build Coastguard Worker { "menu", "form" },
1057*7c568831SAndroid Build Coastguard Worker { "menu", "ul" },
1058*7c568831SAndroid Build Coastguard Worker { "ol", "form" },
1059*7c568831SAndroid Build Coastguard Worker { "option", "optgroup" },
1060*7c568831SAndroid Build Coastguard Worker { "option", "option" },
1061*7c568831SAndroid Build Coastguard Worker { "p", "address" },
1062*7c568831SAndroid Build Coastguard Worker { "p", "blockquote" },
1063*7c568831SAndroid Build Coastguard Worker { "p", "body" },
1064*7c568831SAndroid Build Coastguard Worker { "p", "caption" },
1065*7c568831SAndroid Build Coastguard Worker { "p", "center" },
1066*7c568831SAndroid Build Coastguard Worker { "p", "col" },
1067*7c568831SAndroid Build Coastguard Worker { "p", "colgroup" },
1068*7c568831SAndroid Build Coastguard Worker { "p", "dd" },
1069*7c568831SAndroid Build Coastguard Worker { "p", "dir" },
1070*7c568831SAndroid Build Coastguard Worker { "p", "div" },
1071*7c568831SAndroid Build Coastguard Worker { "p", "dl" },
1072*7c568831SAndroid Build Coastguard Worker { "p", "dt" },
1073*7c568831SAndroid Build Coastguard Worker { "p", "fieldset" },
1074*7c568831SAndroid Build Coastguard Worker { "p", "form" },
1075*7c568831SAndroid Build Coastguard Worker { "p", "frameset" },
1076*7c568831SAndroid Build Coastguard Worker { "p", "h1" },
1077*7c568831SAndroid Build Coastguard Worker { "p", "h2" },
1078*7c568831SAndroid Build Coastguard Worker { "p", "h3" },
1079*7c568831SAndroid Build Coastguard Worker { "p", "h4" },
1080*7c568831SAndroid Build Coastguard Worker { "p", "h5" },
1081*7c568831SAndroid Build Coastguard Worker { "p", "h6" },
1082*7c568831SAndroid Build Coastguard Worker { "p", "head" },
1083*7c568831SAndroid Build Coastguard Worker { "p", "hr" },
1084*7c568831SAndroid Build Coastguard Worker { "p", "li" },
1085*7c568831SAndroid Build Coastguard Worker { "p", "listing" },
1086*7c568831SAndroid Build Coastguard Worker { "p", "menu" },
1087*7c568831SAndroid Build Coastguard Worker { "p", "ol" },
1088*7c568831SAndroid Build Coastguard Worker { "p", "p" },
1089*7c568831SAndroid Build Coastguard Worker { "p", "pre" },
1090*7c568831SAndroid Build Coastguard Worker { "p", "table" },
1091*7c568831SAndroid Build Coastguard Worker { "p", "tbody" },
1092*7c568831SAndroid Build Coastguard Worker { "p", "td" },
1093*7c568831SAndroid Build Coastguard Worker { "p", "tfoot" },
1094*7c568831SAndroid Build Coastguard Worker { "p", "th" },
1095*7c568831SAndroid Build Coastguard Worker { "p", "title" },
1096*7c568831SAndroid Build Coastguard Worker { "p", "tr" },
1097*7c568831SAndroid Build Coastguard Worker { "p", "ul" },
1098*7c568831SAndroid Build Coastguard Worker { "p", "xmp" },
1099*7c568831SAndroid Build Coastguard Worker { "pre", "dd" },
1100*7c568831SAndroid Build Coastguard Worker { "pre", "dl" },
1101*7c568831SAndroid Build Coastguard Worker { "pre", "dt" },
1102*7c568831SAndroid Build Coastguard Worker { "pre", "fieldset" },
1103*7c568831SAndroid Build Coastguard Worker { "pre", "form" },
1104*7c568831SAndroid Build Coastguard Worker { "pre", "li" },
1105*7c568831SAndroid Build Coastguard Worker { "pre", "table" },
1106*7c568831SAndroid Build Coastguard Worker { "pre", "ul" },
1107*7c568831SAndroid Build Coastguard Worker { "s", "p" },
1108*7c568831SAndroid Build Coastguard Worker { "script", "noscript" },
1109*7c568831SAndroid Build Coastguard Worker { "small", "p" },
1110*7c568831SAndroid Build Coastguard Worker { "span", "td" },
1111*7c568831SAndroid Build Coastguard Worker { "span", "th" },
1112*7c568831SAndroid Build Coastguard Worker { "strike", "p" },
1113*7c568831SAndroid Build Coastguard Worker { "style", "body" },
1114*7c568831SAndroid Build Coastguard Worker { "style", "frameset" },
1115*7c568831SAndroid Build Coastguard Worker { "tbody", "tbody" },
1116*7c568831SAndroid Build Coastguard Worker { "tbody", "tfoot" },
1117*7c568831SAndroid Build Coastguard Worker { "td", "tbody" },
1118*7c568831SAndroid Build Coastguard Worker { "td", "td" },
1119*7c568831SAndroid Build Coastguard Worker { "td", "tfoot" },
1120*7c568831SAndroid Build Coastguard Worker { "td", "th" },
1121*7c568831SAndroid Build Coastguard Worker { "td", "tr" },
1122*7c568831SAndroid Build Coastguard Worker { "tfoot", "tbody" },
1123*7c568831SAndroid Build Coastguard Worker { "th", "tbody" },
1124*7c568831SAndroid Build Coastguard Worker { "th", "td" },
1125*7c568831SAndroid Build Coastguard Worker { "th", "tfoot" },
1126*7c568831SAndroid Build Coastguard Worker { "th", "th" },
1127*7c568831SAndroid Build Coastguard Worker { "th", "tr" },
1128*7c568831SAndroid Build Coastguard Worker { "thead", "tbody" },
1129*7c568831SAndroid Build Coastguard Worker { "thead", "tfoot" },
1130*7c568831SAndroid Build Coastguard Worker { "title", "body" },
1131*7c568831SAndroid Build Coastguard Worker { "title", "frameset" },
1132*7c568831SAndroid Build Coastguard Worker { "tr", "tbody" },
1133*7c568831SAndroid Build Coastguard Worker { "tr", "tfoot" },
1134*7c568831SAndroid Build Coastguard Worker { "tr", "tr" },
1135*7c568831SAndroid Build Coastguard Worker { "tt", "p" },
1136*7c568831SAndroid Build Coastguard Worker { "u", "p" },
1137*7c568831SAndroid Build Coastguard Worker { "u", "td" },
1138*7c568831SAndroid Build Coastguard Worker { "u", "th" },
1139*7c568831SAndroid Build Coastguard Worker { "ul", "address" },
1140*7c568831SAndroid Build Coastguard Worker { "ul", "form" },
1141*7c568831SAndroid Build Coastguard Worker { "ul", "menu" },
1142*7c568831SAndroid Build Coastguard Worker { "ul", "pre" },
1143*7c568831SAndroid Build Coastguard Worker { "xmp", "dd" },
1144*7c568831SAndroid Build Coastguard Worker { "xmp", "dl" },
1145*7c568831SAndroid Build Coastguard Worker { "xmp", "dt" },
1146*7c568831SAndroid Build Coastguard Worker { "xmp", "fieldset" },
1147*7c568831SAndroid Build Coastguard Worker { "xmp", "form" },
1148*7c568831SAndroid Build Coastguard Worker { "xmp", "li" },
1149*7c568831SAndroid Build Coastguard Worker { "xmp", "table" },
1150*7c568831SAndroid Build Coastguard Worker { "xmp", "ul" }
1151*7c568831SAndroid Build Coastguard Worker };
1152*7c568831SAndroid Build Coastguard Worker
1153*7c568831SAndroid Build Coastguard Worker /*
1154*7c568831SAndroid Build Coastguard Worker * The list of HTML elements which are supposed not to have
1155*7c568831SAndroid Build Coastguard Worker * CDATA content and where a p element will be implied
1156*7c568831SAndroid Build Coastguard Worker *
1157*7c568831SAndroid Build Coastguard Worker * TODO: extend that list by reading the HTML SGML DTD on
1158*7c568831SAndroid Build Coastguard Worker * implied paragraph
1159*7c568831SAndroid Build Coastguard Worker */
1160*7c568831SAndroid Build Coastguard Worker static const char *const htmlNoContentElements[] = {
1161*7c568831SAndroid Build Coastguard Worker "html",
1162*7c568831SAndroid Build Coastguard Worker "head",
1163*7c568831SAndroid Build Coastguard Worker NULL
1164*7c568831SAndroid Build Coastguard Worker };
1165*7c568831SAndroid Build Coastguard Worker
1166*7c568831SAndroid Build Coastguard Worker /*
1167*7c568831SAndroid Build Coastguard Worker * The list of HTML attributes which are of content %Script;
1168*7c568831SAndroid Build Coastguard Worker * NOTE: when adding ones, check htmlIsScriptAttribute() since
1169*7c568831SAndroid Build Coastguard Worker * it assumes the name starts with 'on'
1170*7c568831SAndroid Build Coastguard Worker */
1171*7c568831SAndroid Build Coastguard Worker static const char *const htmlScriptAttributes[] = {
1172*7c568831SAndroid Build Coastguard Worker "onclick",
1173*7c568831SAndroid Build Coastguard Worker "ondblclick",
1174*7c568831SAndroid Build Coastguard Worker "onmousedown",
1175*7c568831SAndroid Build Coastguard Worker "onmouseup",
1176*7c568831SAndroid Build Coastguard Worker "onmouseover",
1177*7c568831SAndroid Build Coastguard Worker "onmousemove",
1178*7c568831SAndroid Build Coastguard Worker "onmouseout",
1179*7c568831SAndroid Build Coastguard Worker "onkeypress",
1180*7c568831SAndroid Build Coastguard Worker "onkeydown",
1181*7c568831SAndroid Build Coastguard Worker "onkeyup",
1182*7c568831SAndroid Build Coastguard Worker "onload",
1183*7c568831SAndroid Build Coastguard Worker "onunload",
1184*7c568831SAndroid Build Coastguard Worker "onfocus",
1185*7c568831SAndroid Build Coastguard Worker "onblur",
1186*7c568831SAndroid Build Coastguard Worker "onsubmit",
1187*7c568831SAndroid Build Coastguard Worker "onreset",
1188*7c568831SAndroid Build Coastguard Worker "onchange",
1189*7c568831SAndroid Build Coastguard Worker "onselect"
1190*7c568831SAndroid Build Coastguard Worker };
1191*7c568831SAndroid Build Coastguard Worker
1192*7c568831SAndroid Build Coastguard Worker /*
1193*7c568831SAndroid Build Coastguard Worker * This table is used by the htmlparser to know what to do with
1194*7c568831SAndroid Build Coastguard Worker * broken html pages. By assigning different priorities to different
1195*7c568831SAndroid Build Coastguard Worker * elements the parser can decide how to handle extra endtags.
1196*7c568831SAndroid Build Coastguard Worker * Endtags are only allowed to close elements with lower or equal
1197*7c568831SAndroid Build Coastguard Worker * priority.
1198*7c568831SAndroid Build Coastguard Worker */
1199*7c568831SAndroid Build Coastguard Worker
1200*7c568831SAndroid Build Coastguard Worker typedef struct {
1201*7c568831SAndroid Build Coastguard Worker const char *name;
1202*7c568831SAndroid Build Coastguard Worker int priority;
1203*7c568831SAndroid Build Coastguard Worker } elementPriority;
1204*7c568831SAndroid Build Coastguard Worker
1205*7c568831SAndroid Build Coastguard Worker static const elementPriority htmlEndPriority[] = {
1206*7c568831SAndroid Build Coastguard Worker {"div", 150},
1207*7c568831SAndroid Build Coastguard Worker {"td", 160},
1208*7c568831SAndroid Build Coastguard Worker {"th", 160},
1209*7c568831SAndroid Build Coastguard Worker {"tr", 170},
1210*7c568831SAndroid Build Coastguard Worker {"thead", 180},
1211*7c568831SAndroid Build Coastguard Worker {"tbody", 180},
1212*7c568831SAndroid Build Coastguard Worker {"tfoot", 180},
1213*7c568831SAndroid Build Coastguard Worker {"table", 190},
1214*7c568831SAndroid Build Coastguard Worker {"head", 200},
1215*7c568831SAndroid Build Coastguard Worker {"body", 200},
1216*7c568831SAndroid Build Coastguard Worker {"html", 220},
1217*7c568831SAndroid Build Coastguard Worker {NULL, 100} /* Default priority */
1218*7c568831SAndroid Build Coastguard Worker };
1219*7c568831SAndroid Build Coastguard Worker
1220*7c568831SAndroid Build Coastguard Worker /************************************************************************
1221*7c568831SAndroid Build Coastguard Worker * *
1222*7c568831SAndroid Build Coastguard Worker * functions to handle HTML specific data *
1223*7c568831SAndroid Build Coastguard Worker * *
1224*7c568831SAndroid Build Coastguard Worker ************************************************************************/
1225*7c568831SAndroid Build Coastguard Worker
1226*7c568831SAndroid Build Coastguard Worker static void
htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt)1227*7c568831SAndroid Build Coastguard Worker htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) {
1228*7c568831SAndroid Build Coastguard Worker /*
1229*7c568831SAndroid Build Coastguard Worker * Capture end position and add node
1230*7c568831SAndroid Build Coastguard Worker */
1231*7c568831SAndroid Build Coastguard Worker if ( ctxt->node != NULL && ctxt->record_info ) {
1232*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfo->end_pos = ctxt->input->consumed +
1233*7c568831SAndroid Build Coastguard Worker (CUR_PTR - ctxt->input->base);
1234*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfo->end_line = ctxt->input->line;
1235*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfo->node = ctxt->node;
1236*7c568831SAndroid Build Coastguard Worker xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo);
1237*7c568831SAndroid Build Coastguard Worker htmlNodeInfoPop(ctxt);
1238*7c568831SAndroid Build Coastguard Worker }
1239*7c568831SAndroid Build Coastguard Worker }
1240*7c568831SAndroid Build Coastguard Worker
1241*7c568831SAndroid Build Coastguard Worker /**
1242*7c568831SAndroid Build Coastguard Worker * htmlInitAutoClose:
1243*7c568831SAndroid Build Coastguard Worker *
1244*7c568831SAndroid Build Coastguard Worker * DEPRECATED: This is a no-op.
1245*7c568831SAndroid Build Coastguard Worker */
1246*7c568831SAndroid Build Coastguard Worker void
htmlInitAutoClose(void)1247*7c568831SAndroid Build Coastguard Worker htmlInitAutoClose(void) {
1248*7c568831SAndroid Build Coastguard Worker }
1249*7c568831SAndroid Build Coastguard Worker
1250*7c568831SAndroid Build Coastguard Worker static int
htmlCompareTags(const void * key,const void * member)1251*7c568831SAndroid Build Coastguard Worker htmlCompareTags(const void *key, const void *member) {
1252*7c568831SAndroid Build Coastguard Worker const xmlChar *tag = (const xmlChar *) key;
1253*7c568831SAndroid Build Coastguard Worker const htmlElemDesc *desc = (const htmlElemDesc *) member;
1254*7c568831SAndroid Build Coastguard Worker
1255*7c568831SAndroid Build Coastguard Worker return(xmlStrcasecmp(tag, BAD_CAST desc->name));
1256*7c568831SAndroid Build Coastguard Worker }
1257*7c568831SAndroid Build Coastguard Worker
1258*7c568831SAndroid Build Coastguard Worker /**
1259*7c568831SAndroid Build Coastguard Worker * htmlTagLookup:
1260*7c568831SAndroid Build Coastguard Worker * @tag: The tag name in lowercase
1261*7c568831SAndroid Build Coastguard Worker *
1262*7c568831SAndroid Build Coastguard Worker * Lookup the HTML tag in the ElementTable
1263*7c568831SAndroid Build Coastguard Worker *
1264*7c568831SAndroid Build Coastguard Worker * Returns the related htmlElemDescPtr or NULL if not found.
1265*7c568831SAndroid Build Coastguard Worker */
1266*7c568831SAndroid Build Coastguard Worker const htmlElemDesc *
htmlTagLookup(const xmlChar * tag)1267*7c568831SAndroid Build Coastguard Worker htmlTagLookup(const xmlChar *tag) {
1268*7c568831SAndroid Build Coastguard Worker if (tag == NULL)
1269*7c568831SAndroid Build Coastguard Worker return(NULL);
1270*7c568831SAndroid Build Coastguard Worker
1271*7c568831SAndroid Build Coastguard Worker return((const htmlElemDesc *) bsearch(tag, html40ElementTable,
1272*7c568831SAndroid Build Coastguard Worker sizeof(html40ElementTable) / sizeof(htmlElemDesc),
1273*7c568831SAndroid Build Coastguard Worker sizeof(htmlElemDesc), htmlCompareTags));
1274*7c568831SAndroid Build Coastguard Worker }
1275*7c568831SAndroid Build Coastguard Worker
1276*7c568831SAndroid Build Coastguard Worker /**
1277*7c568831SAndroid Build Coastguard Worker * htmlGetEndPriority:
1278*7c568831SAndroid Build Coastguard Worker * @name: The name of the element to look up the priority for.
1279*7c568831SAndroid Build Coastguard Worker *
1280*7c568831SAndroid Build Coastguard Worker * Return value: The "endtag" priority.
1281*7c568831SAndroid Build Coastguard Worker **/
1282*7c568831SAndroid Build Coastguard Worker static int
htmlGetEndPriority(const xmlChar * name)1283*7c568831SAndroid Build Coastguard Worker htmlGetEndPriority (const xmlChar *name) {
1284*7c568831SAndroid Build Coastguard Worker int i = 0;
1285*7c568831SAndroid Build Coastguard Worker
1286*7c568831SAndroid Build Coastguard Worker while ((htmlEndPriority[i].name != NULL) &&
1287*7c568831SAndroid Build Coastguard Worker (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name)))
1288*7c568831SAndroid Build Coastguard Worker i++;
1289*7c568831SAndroid Build Coastguard Worker
1290*7c568831SAndroid Build Coastguard Worker return(htmlEndPriority[i].priority);
1291*7c568831SAndroid Build Coastguard Worker }
1292*7c568831SAndroid Build Coastguard Worker
1293*7c568831SAndroid Build Coastguard Worker
1294*7c568831SAndroid Build Coastguard Worker static int
htmlCompareStartClose(const void * vkey,const void * member)1295*7c568831SAndroid Build Coastguard Worker htmlCompareStartClose(const void *vkey, const void *member) {
1296*7c568831SAndroid Build Coastguard Worker const htmlStartCloseEntry *key = (const htmlStartCloseEntry *) vkey;
1297*7c568831SAndroid Build Coastguard Worker const htmlStartCloseEntry *entry = (const htmlStartCloseEntry *) member;
1298*7c568831SAndroid Build Coastguard Worker int ret;
1299*7c568831SAndroid Build Coastguard Worker
1300*7c568831SAndroid Build Coastguard Worker ret = strcmp(key->oldTag, entry->oldTag);
1301*7c568831SAndroid Build Coastguard Worker if (ret == 0)
1302*7c568831SAndroid Build Coastguard Worker ret = strcmp(key->newTag, entry->newTag);
1303*7c568831SAndroid Build Coastguard Worker
1304*7c568831SAndroid Build Coastguard Worker return(ret);
1305*7c568831SAndroid Build Coastguard Worker }
1306*7c568831SAndroid Build Coastguard Worker
1307*7c568831SAndroid Build Coastguard Worker /**
1308*7c568831SAndroid Build Coastguard Worker * htmlCheckAutoClose:
1309*7c568831SAndroid Build Coastguard Worker * @newtag: The new tag name
1310*7c568831SAndroid Build Coastguard Worker * @oldtag: The old tag name
1311*7c568831SAndroid Build Coastguard Worker *
1312*7c568831SAndroid Build Coastguard Worker * Checks whether the new tag is one of the registered valid tags for
1313*7c568831SAndroid Build Coastguard Worker * closing old.
1314*7c568831SAndroid Build Coastguard Worker *
1315*7c568831SAndroid Build Coastguard Worker * Returns 0 if no, 1 if yes.
1316*7c568831SAndroid Build Coastguard Worker */
1317*7c568831SAndroid Build Coastguard Worker static int
htmlCheckAutoClose(const xmlChar * newtag,const xmlChar * oldtag)1318*7c568831SAndroid Build Coastguard Worker htmlCheckAutoClose(const xmlChar * newtag, const xmlChar * oldtag)
1319*7c568831SAndroid Build Coastguard Worker {
1320*7c568831SAndroid Build Coastguard Worker htmlStartCloseEntry key;
1321*7c568831SAndroid Build Coastguard Worker void *res;
1322*7c568831SAndroid Build Coastguard Worker
1323*7c568831SAndroid Build Coastguard Worker key.oldTag = (const char *) oldtag;
1324*7c568831SAndroid Build Coastguard Worker key.newTag = (const char *) newtag;
1325*7c568831SAndroid Build Coastguard Worker res = bsearch(&key, htmlStartClose,
1326*7c568831SAndroid Build Coastguard Worker sizeof(htmlStartClose) / sizeof(htmlStartCloseEntry),
1327*7c568831SAndroid Build Coastguard Worker sizeof(htmlStartCloseEntry), htmlCompareStartClose);
1328*7c568831SAndroid Build Coastguard Worker return(res != NULL);
1329*7c568831SAndroid Build Coastguard Worker }
1330*7c568831SAndroid Build Coastguard Worker
1331*7c568831SAndroid Build Coastguard Worker /**
1332*7c568831SAndroid Build Coastguard Worker * htmlAutoCloseOnClose:
1333*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
1334*7c568831SAndroid Build Coastguard Worker * @newtag: The new tag name
1335*7c568831SAndroid Build Coastguard Worker * @force: force the tag closure
1336*7c568831SAndroid Build Coastguard Worker *
1337*7c568831SAndroid Build Coastguard Worker * The HTML DTD allows an ending tag to implicitly close other tags.
1338*7c568831SAndroid Build Coastguard Worker */
1339*7c568831SAndroid Build Coastguard Worker static void
htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt,const xmlChar * newtag)1340*7c568831SAndroid Build Coastguard Worker htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1341*7c568831SAndroid Build Coastguard Worker {
1342*7c568831SAndroid Build Coastguard Worker const htmlElemDesc *info;
1343*7c568831SAndroid Build Coastguard Worker int i, priority;
1344*7c568831SAndroid Build Coastguard Worker
1345*7c568831SAndroid Build Coastguard Worker if (ctxt->options & HTML_PARSE_HTML5)
1346*7c568831SAndroid Build Coastguard Worker return;
1347*7c568831SAndroid Build Coastguard Worker
1348*7c568831SAndroid Build Coastguard Worker priority = htmlGetEndPriority(newtag);
1349*7c568831SAndroid Build Coastguard Worker
1350*7c568831SAndroid Build Coastguard Worker for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1351*7c568831SAndroid Build Coastguard Worker
1352*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(newtag, ctxt->nameTab[i]))
1353*7c568831SAndroid Build Coastguard Worker break;
1354*7c568831SAndroid Build Coastguard Worker /*
1355*7c568831SAndroid Build Coastguard Worker * A misplaced endtag can only close elements with lower
1356*7c568831SAndroid Build Coastguard Worker * or equal priority, so if we find an element with higher
1357*7c568831SAndroid Build Coastguard Worker * priority before we find an element with
1358*7c568831SAndroid Build Coastguard Worker * matching name, we just ignore this endtag
1359*7c568831SAndroid Build Coastguard Worker */
1360*7c568831SAndroid Build Coastguard Worker if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)
1361*7c568831SAndroid Build Coastguard Worker return;
1362*7c568831SAndroid Build Coastguard Worker }
1363*7c568831SAndroid Build Coastguard Worker if (i < 0)
1364*7c568831SAndroid Build Coastguard Worker return;
1365*7c568831SAndroid Build Coastguard Worker
1366*7c568831SAndroid Build Coastguard Worker while (!xmlStrEqual(newtag, ctxt->name)) {
1367*7c568831SAndroid Build Coastguard Worker info = htmlTagLookup(ctxt->name);
1368*7c568831SAndroid Build Coastguard Worker if ((info != NULL) && (info->endTag == 3)) {
1369*7c568831SAndroid Build Coastguard Worker htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
1370*7c568831SAndroid Build Coastguard Worker "Opening and ending tag mismatch: %s and %s\n",
1371*7c568831SAndroid Build Coastguard Worker newtag, ctxt->name);
1372*7c568831SAndroid Build Coastguard Worker }
1373*7c568831SAndroid Build Coastguard Worker htmlParserFinishElementParsing(ctxt);
1374*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1375*7c568831SAndroid Build Coastguard Worker ctxt->sax->endElement(ctxt->userData, ctxt->name);
1376*7c568831SAndroid Build Coastguard Worker htmlnamePop(ctxt);
1377*7c568831SAndroid Build Coastguard Worker }
1378*7c568831SAndroid Build Coastguard Worker }
1379*7c568831SAndroid Build Coastguard Worker
1380*7c568831SAndroid Build Coastguard Worker /**
1381*7c568831SAndroid Build Coastguard Worker * htmlAutoCloseOnEnd:
1382*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
1383*7c568831SAndroid Build Coastguard Worker *
1384*7c568831SAndroid Build Coastguard Worker * Close all remaining tags at the end of the stream
1385*7c568831SAndroid Build Coastguard Worker */
1386*7c568831SAndroid Build Coastguard Worker static void
htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)1387*7c568831SAndroid Build Coastguard Worker htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
1388*7c568831SAndroid Build Coastguard Worker {
1389*7c568831SAndroid Build Coastguard Worker int i;
1390*7c568831SAndroid Build Coastguard Worker
1391*7c568831SAndroid Build Coastguard Worker if (ctxt->options & HTML_PARSE_HTML5)
1392*7c568831SAndroid Build Coastguard Worker return;
1393*7c568831SAndroid Build Coastguard Worker
1394*7c568831SAndroid Build Coastguard Worker if (ctxt->nameNr == 0)
1395*7c568831SAndroid Build Coastguard Worker return;
1396*7c568831SAndroid Build Coastguard Worker for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1397*7c568831SAndroid Build Coastguard Worker htmlParserFinishElementParsing(ctxt);
1398*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1399*7c568831SAndroid Build Coastguard Worker ctxt->sax->endElement(ctxt->userData, ctxt->name);
1400*7c568831SAndroid Build Coastguard Worker htmlnamePop(ctxt);
1401*7c568831SAndroid Build Coastguard Worker }
1402*7c568831SAndroid Build Coastguard Worker }
1403*7c568831SAndroid Build Coastguard Worker
1404*7c568831SAndroid Build Coastguard Worker /**
1405*7c568831SAndroid Build Coastguard Worker * htmlAutoClose:
1406*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
1407*7c568831SAndroid Build Coastguard Worker * @newtag: The new tag name or NULL
1408*7c568831SAndroid Build Coastguard Worker *
1409*7c568831SAndroid Build Coastguard Worker * The HTML DTD allows a tag to implicitly close other tags.
1410*7c568831SAndroid Build Coastguard Worker * The list is kept in htmlStartClose array. This function is
1411*7c568831SAndroid Build Coastguard Worker * called when a new tag has been detected and generates the
1412*7c568831SAndroid Build Coastguard Worker * appropriates closes if possible/needed.
1413*7c568831SAndroid Build Coastguard Worker * If newtag is NULL this mean we are at the end of the resource
1414*7c568831SAndroid Build Coastguard Worker * and we should check
1415*7c568831SAndroid Build Coastguard Worker */
1416*7c568831SAndroid Build Coastguard Worker static void
htmlAutoClose(htmlParserCtxtPtr ctxt,const xmlChar * newtag)1417*7c568831SAndroid Build Coastguard Worker htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1418*7c568831SAndroid Build Coastguard Worker {
1419*7c568831SAndroid Build Coastguard Worker if (ctxt->options & HTML_PARSE_HTML5)
1420*7c568831SAndroid Build Coastguard Worker return;
1421*7c568831SAndroid Build Coastguard Worker
1422*7c568831SAndroid Build Coastguard Worker if (newtag == NULL)
1423*7c568831SAndroid Build Coastguard Worker return;
1424*7c568831SAndroid Build Coastguard Worker
1425*7c568831SAndroid Build Coastguard Worker while ((ctxt->name != NULL) &&
1426*7c568831SAndroid Build Coastguard Worker (htmlCheckAutoClose(newtag, ctxt->name))) {
1427*7c568831SAndroid Build Coastguard Worker htmlParserFinishElementParsing(ctxt);
1428*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1429*7c568831SAndroid Build Coastguard Worker ctxt->sax->endElement(ctxt->userData, ctxt->name);
1430*7c568831SAndroid Build Coastguard Worker htmlnamePop(ctxt);
1431*7c568831SAndroid Build Coastguard Worker }
1432*7c568831SAndroid Build Coastguard Worker }
1433*7c568831SAndroid Build Coastguard Worker
1434*7c568831SAndroid Build Coastguard Worker /**
1435*7c568831SAndroid Build Coastguard Worker * htmlAutoCloseTag:
1436*7c568831SAndroid Build Coastguard Worker * @doc: the HTML document
1437*7c568831SAndroid Build Coastguard Worker * @name: The tag name
1438*7c568831SAndroid Build Coastguard Worker * @elem: the HTML element
1439*7c568831SAndroid Build Coastguard Worker *
1440*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Internal function, don't use.
1441*7c568831SAndroid Build Coastguard Worker *
1442*7c568831SAndroid Build Coastguard Worker * The HTML DTD allows a tag to implicitly close other tags.
1443*7c568831SAndroid Build Coastguard Worker * The list is kept in htmlStartClose array. This function checks
1444*7c568831SAndroid Build Coastguard Worker * if the element or one of it's children would autoclose the
1445*7c568831SAndroid Build Coastguard Worker * given tag.
1446*7c568831SAndroid Build Coastguard Worker *
1447*7c568831SAndroid Build Coastguard Worker * Returns 1 if autoclose, 0 otherwise
1448*7c568831SAndroid Build Coastguard Worker */
1449*7c568831SAndroid Build Coastguard Worker int
htmlAutoCloseTag(htmlDocPtr doc,const xmlChar * name,htmlNodePtr elem)1450*7c568831SAndroid Build Coastguard Worker htmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) {
1451*7c568831SAndroid Build Coastguard Worker htmlNodePtr child;
1452*7c568831SAndroid Build Coastguard Worker
1453*7c568831SAndroid Build Coastguard Worker if (elem == NULL) return(1);
1454*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(name, elem->name)) return(0);
1455*7c568831SAndroid Build Coastguard Worker if (htmlCheckAutoClose(elem->name, name)) return(1);
1456*7c568831SAndroid Build Coastguard Worker child = elem->children;
1457*7c568831SAndroid Build Coastguard Worker while (child != NULL) {
1458*7c568831SAndroid Build Coastguard Worker if (htmlAutoCloseTag(doc, name, child)) return(1);
1459*7c568831SAndroid Build Coastguard Worker child = child->next;
1460*7c568831SAndroid Build Coastguard Worker }
1461*7c568831SAndroid Build Coastguard Worker return(0);
1462*7c568831SAndroid Build Coastguard Worker }
1463*7c568831SAndroid Build Coastguard Worker
1464*7c568831SAndroid Build Coastguard Worker /**
1465*7c568831SAndroid Build Coastguard Worker * htmlIsAutoClosed:
1466*7c568831SAndroid Build Coastguard Worker * @doc: the HTML document
1467*7c568831SAndroid Build Coastguard Worker * @elem: the HTML element
1468*7c568831SAndroid Build Coastguard Worker *
1469*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Internal function, don't use.
1470*7c568831SAndroid Build Coastguard Worker *
1471*7c568831SAndroid Build Coastguard Worker * The HTML DTD allows a tag to implicitly close other tags.
1472*7c568831SAndroid Build Coastguard Worker * The list is kept in htmlStartClose array. This function checks
1473*7c568831SAndroid Build Coastguard Worker * if a tag is autoclosed by one of it's child
1474*7c568831SAndroid Build Coastguard Worker *
1475*7c568831SAndroid Build Coastguard Worker * Returns 1 if autoclosed, 0 otherwise
1476*7c568831SAndroid Build Coastguard Worker */
1477*7c568831SAndroid Build Coastguard Worker int
htmlIsAutoClosed(htmlDocPtr doc,htmlNodePtr elem)1478*7c568831SAndroid Build Coastguard Worker htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) {
1479*7c568831SAndroid Build Coastguard Worker htmlNodePtr child;
1480*7c568831SAndroid Build Coastguard Worker
1481*7c568831SAndroid Build Coastguard Worker if (elem == NULL) return(1);
1482*7c568831SAndroid Build Coastguard Worker child = elem->children;
1483*7c568831SAndroid Build Coastguard Worker while (child != NULL) {
1484*7c568831SAndroid Build Coastguard Worker if (htmlAutoCloseTag(doc, elem->name, child)) return(1);
1485*7c568831SAndroid Build Coastguard Worker child = child->next;
1486*7c568831SAndroid Build Coastguard Worker }
1487*7c568831SAndroid Build Coastguard Worker return(0);
1488*7c568831SAndroid Build Coastguard Worker }
1489*7c568831SAndroid Build Coastguard Worker
1490*7c568831SAndroid Build Coastguard Worker /**
1491*7c568831SAndroid Build Coastguard Worker * htmlCheckImplied:
1492*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
1493*7c568831SAndroid Build Coastguard Worker * @newtag: The new tag name
1494*7c568831SAndroid Build Coastguard Worker *
1495*7c568831SAndroid Build Coastguard Worker * The HTML DTD allows a tag to exists only implicitly
1496*7c568831SAndroid Build Coastguard Worker * called when a new tag has been detected and generates the
1497*7c568831SAndroid Build Coastguard Worker * appropriates implicit tags if missing
1498*7c568831SAndroid Build Coastguard Worker */
1499*7c568831SAndroid Build Coastguard Worker static void
htmlCheckImplied(htmlParserCtxtPtr ctxt,const xmlChar * newtag)1500*7c568831SAndroid Build Coastguard Worker htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
1501*7c568831SAndroid Build Coastguard Worker int i;
1502*7c568831SAndroid Build Coastguard Worker
1503*7c568831SAndroid Build Coastguard Worker if (ctxt->options & (HTML_PARSE_NOIMPLIED | HTML_PARSE_HTML5))
1504*7c568831SAndroid Build Coastguard Worker return;
1505*7c568831SAndroid Build Coastguard Worker if (!htmlOmittedDefaultValue)
1506*7c568831SAndroid Build Coastguard Worker return;
1507*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(newtag, BAD_CAST"html"))
1508*7c568831SAndroid Build Coastguard Worker return;
1509*7c568831SAndroid Build Coastguard Worker if (ctxt->nameNr <= 0) {
1510*7c568831SAndroid Build Coastguard Worker htmlnamePush(ctxt, BAD_CAST"html");
1511*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1512*7c568831SAndroid Build Coastguard Worker ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
1513*7c568831SAndroid Build Coastguard Worker }
1514*7c568831SAndroid Build Coastguard Worker if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"head")))
1515*7c568831SAndroid Build Coastguard Worker return;
1516*7c568831SAndroid Build Coastguard Worker if ((ctxt->nameNr <= 1) &&
1517*7c568831SAndroid Build Coastguard Worker ((xmlStrEqual(newtag, BAD_CAST"script")) ||
1518*7c568831SAndroid Build Coastguard Worker (xmlStrEqual(newtag, BAD_CAST"style")) ||
1519*7c568831SAndroid Build Coastguard Worker (xmlStrEqual(newtag, BAD_CAST"meta")) ||
1520*7c568831SAndroid Build Coastguard Worker (xmlStrEqual(newtag, BAD_CAST"link")) ||
1521*7c568831SAndroid Build Coastguard Worker (xmlStrEqual(newtag, BAD_CAST"title")) ||
1522*7c568831SAndroid Build Coastguard Worker (xmlStrEqual(newtag, BAD_CAST"base")))) {
1523*7c568831SAndroid Build Coastguard Worker if (ctxt->html >= 3) {
1524*7c568831SAndroid Build Coastguard Worker /* we already saw or generated an <head> before */
1525*7c568831SAndroid Build Coastguard Worker return;
1526*7c568831SAndroid Build Coastguard Worker }
1527*7c568831SAndroid Build Coastguard Worker /*
1528*7c568831SAndroid Build Coastguard Worker * dropped OBJECT ... i you put it first BODY will be
1529*7c568831SAndroid Build Coastguard Worker * assumed !
1530*7c568831SAndroid Build Coastguard Worker */
1531*7c568831SAndroid Build Coastguard Worker htmlnamePush(ctxt, BAD_CAST"head");
1532*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1533*7c568831SAndroid Build Coastguard Worker ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
1534*7c568831SAndroid Build Coastguard Worker } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) &&
1535*7c568831SAndroid Build Coastguard Worker (!xmlStrEqual(newtag, BAD_CAST"frame")) &&
1536*7c568831SAndroid Build Coastguard Worker (!xmlStrEqual(newtag, BAD_CAST"frameset"))) {
1537*7c568831SAndroid Build Coastguard Worker if (ctxt->html >= 10) {
1538*7c568831SAndroid Build Coastguard Worker /* we already saw or generated a <body> before */
1539*7c568831SAndroid Build Coastguard Worker return;
1540*7c568831SAndroid Build Coastguard Worker }
1541*7c568831SAndroid Build Coastguard Worker for (i = 0;i < ctxt->nameNr;i++) {
1542*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {
1543*7c568831SAndroid Build Coastguard Worker return;
1544*7c568831SAndroid Build Coastguard Worker }
1545*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) {
1546*7c568831SAndroid Build Coastguard Worker return;
1547*7c568831SAndroid Build Coastguard Worker }
1548*7c568831SAndroid Build Coastguard Worker }
1549*7c568831SAndroid Build Coastguard Worker
1550*7c568831SAndroid Build Coastguard Worker htmlnamePush(ctxt, BAD_CAST"body");
1551*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1552*7c568831SAndroid Build Coastguard Worker ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
1553*7c568831SAndroid Build Coastguard Worker }
1554*7c568831SAndroid Build Coastguard Worker }
1555*7c568831SAndroid Build Coastguard Worker
1556*7c568831SAndroid Build Coastguard Worker /**
1557*7c568831SAndroid Build Coastguard Worker * htmlCheckParagraph
1558*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
1559*7c568831SAndroid Build Coastguard Worker *
1560*7c568831SAndroid Build Coastguard Worker * Check whether a p element need to be implied before inserting
1561*7c568831SAndroid Build Coastguard Worker * characters in the current element.
1562*7c568831SAndroid Build Coastguard Worker *
1563*7c568831SAndroid Build Coastguard Worker * Returns 1 if a paragraph has been inserted, 0 if not and -1
1564*7c568831SAndroid Build Coastguard Worker * in case of error.
1565*7c568831SAndroid Build Coastguard Worker */
1566*7c568831SAndroid Build Coastguard Worker
1567*7c568831SAndroid Build Coastguard Worker static int
htmlCheckParagraph(htmlParserCtxtPtr ctxt)1568*7c568831SAndroid Build Coastguard Worker htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
1569*7c568831SAndroid Build Coastguard Worker const xmlChar *tag;
1570*7c568831SAndroid Build Coastguard Worker int i;
1571*7c568831SAndroid Build Coastguard Worker
1572*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
1573*7c568831SAndroid Build Coastguard Worker return(-1);
1574*7c568831SAndroid Build Coastguard Worker if (ctxt->options & HTML_PARSE_HTML5)
1575*7c568831SAndroid Build Coastguard Worker return(0);
1576*7c568831SAndroid Build Coastguard Worker
1577*7c568831SAndroid Build Coastguard Worker tag = ctxt->name;
1578*7c568831SAndroid Build Coastguard Worker if (tag == NULL) {
1579*7c568831SAndroid Build Coastguard Worker htmlAutoClose(ctxt, BAD_CAST"p");
1580*7c568831SAndroid Build Coastguard Worker htmlCheckImplied(ctxt, BAD_CAST"p");
1581*7c568831SAndroid Build Coastguard Worker htmlnamePush(ctxt, BAD_CAST"p");
1582*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1583*7c568831SAndroid Build Coastguard Worker ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1584*7c568831SAndroid Build Coastguard Worker return(1);
1585*7c568831SAndroid Build Coastguard Worker }
1586*7c568831SAndroid Build Coastguard Worker if (!htmlOmittedDefaultValue)
1587*7c568831SAndroid Build Coastguard Worker return(0);
1588*7c568831SAndroid Build Coastguard Worker for (i = 0; htmlNoContentElements[i] != NULL; i++) {
1589*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(tag, BAD_CAST htmlNoContentElements[i])) {
1590*7c568831SAndroid Build Coastguard Worker htmlAutoClose(ctxt, BAD_CAST"p");
1591*7c568831SAndroid Build Coastguard Worker htmlCheckImplied(ctxt, BAD_CAST"p");
1592*7c568831SAndroid Build Coastguard Worker htmlnamePush(ctxt, BAD_CAST"p");
1593*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1594*7c568831SAndroid Build Coastguard Worker ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1595*7c568831SAndroid Build Coastguard Worker return(1);
1596*7c568831SAndroid Build Coastguard Worker }
1597*7c568831SAndroid Build Coastguard Worker }
1598*7c568831SAndroid Build Coastguard Worker return(0);
1599*7c568831SAndroid Build Coastguard Worker }
1600*7c568831SAndroid Build Coastguard Worker
1601*7c568831SAndroid Build Coastguard Worker /**
1602*7c568831SAndroid Build Coastguard Worker * htmlIsScriptAttribute:
1603*7c568831SAndroid Build Coastguard Worker * @name: an attribute name
1604*7c568831SAndroid Build Coastguard Worker *
1605*7c568831SAndroid Build Coastguard Worker * Check if an attribute is of content type Script
1606*7c568831SAndroid Build Coastguard Worker *
1607*7c568831SAndroid Build Coastguard Worker * Returns 1 is the attribute is a script 0 otherwise
1608*7c568831SAndroid Build Coastguard Worker */
1609*7c568831SAndroid Build Coastguard Worker int
htmlIsScriptAttribute(const xmlChar * name)1610*7c568831SAndroid Build Coastguard Worker htmlIsScriptAttribute(const xmlChar *name) {
1611*7c568831SAndroid Build Coastguard Worker unsigned int i;
1612*7c568831SAndroid Build Coastguard Worker
1613*7c568831SAndroid Build Coastguard Worker if (name == NULL)
1614*7c568831SAndroid Build Coastguard Worker return(0);
1615*7c568831SAndroid Build Coastguard Worker /*
1616*7c568831SAndroid Build Coastguard Worker * all script attributes start with 'on'
1617*7c568831SAndroid Build Coastguard Worker */
1618*7c568831SAndroid Build Coastguard Worker if ((name[0] != 'o') || (name[1] != 'n'))
1619*7c568831SAndroid Build Coastguard Worker return(0);
1620*7c568831SAndroid Build Coastguard Worker for (i = 0;
1621*7c568831SAndroid Build Coastguard Worker i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]);
1622*7c568831SAndroid Build Coastguard Worker i++) {
1623*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i]))
1624*7c568831SAndroid Build Coastguard Worker return(1);
1625*7c568831SAndroid Build Coastguard Worker }
1626*7c568831SAndroid Build Coastguard Worker return(0);
1627*7c568831SAndroid Build Coastguard Worker }
1628*7c568831SAndroid Build Coastguard Worker
1629*7c568831SAndroid Build Coastguard Worker /************************************************************************
1630*7c568831SAndroid Build Coastguard Worker * *
1631*7c568831SAndroid Build Coastguard Worker * The list of HTML predefined entities *
1632*7c568831SAndroid Build Coastguard Worker * *
1633*7c568831SAndroid Build Coastguard Worker ************************************************************************/
1634*7c568831SAndroid Build Coastguard Worker
1635*7c568831SAndroid Build Coastguard Worker
1636*7c568831SAndroid Build Coastguard Worker static const htmlEntityDesc html40EntitiesTable[] = {
1637*7c568831SAndroid Build Coastguard Worker /*
1638*7c568831SAndroid Build Coastguard Worker * the 4 absolute ones, plus apostrophe.
1639*7c568831SAndroid Build Coastguard Worker */
1640*7c568831SAndroid Build Coastguard Worker { 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" },
1641*7c568831SAndroid Build Coastguard Worker { 38, "amp", "ampersand, U+0026 ISOnum" },
1642*7c568831SAndroid Build Coastguard Worker { 39, "apos", "single quote" },
1643*7c568831SAndroid Build Coastguard Worker { 60, "lt", "less-than sign, U+003C ISOnum" },
1644*7c568831SAndroid Build Coastguard Worker { 62, "gt", "greater-than sign, U+003E ISOnum" },
1645*7c568831SAndroid Build Coastguard Worker
1646*7c568831SAndroid Build Coastguard Worker /*
1647*7c568831SAndroid Build Coastguard Worker * A bunch still in the 128-255 range
1648*7c568831SAndroid Build Coastguard Worker * Replacing them depend really on the charset used.
1649*7c568831SAndroid Build Coastguard Worker */
1650*7c568831SAndroid Build Coastguard Worker { 160, "nbsp", "no-break space = non-breaking space, U+00A0 ISOnum" },
1651*7c568831SAndroid Build Coastguard Worker { 161, "iexcl","inverted exclamation mark, U+00A1 ISOnum" },
1652*7c568831SAndroid Build Coastguard Worker { 162, "cent", "cent sign, U+00A2 ISOnum" },
1653*7c568831SAndroid Build Coastguard Worker { 163, "pound","pound sign, U+00A3 ISOnum" },
1654*7c568831SAndroid Build Coastguard Worker { 164, "curren","currency sign, U+00A4 ISOnum" },
1655*7c568831SAndroid Build Coastguard Worker { 165, "yen", "yen sign = yuan sign, U+00A5 ISOnum" },
1656*7c568831SAndroid Build Coastguard Worker { 166, "brvbar","broken bar = broken vertical bar, U+00A6 ISOnum" },
1657*7c568831SAndroid Build Coastguard Worker { 167, "sect", "section sign, U+00A7 ISOnum" },
1658*7c568831SAndroid Build Coastguard Worker { 168, "uml", "diaeresis = spacing diaeresis, U+00A8 ISOdia" },
1659*7c568831SAndroid Build Coastguard Worker { 169, "copy", "copyright sign, U+00A9 ISOnum" },
1660*7c568831SAndroid Build Coastguard Worker { 170, "ordf", "feminine ordinal indicator, U+00AA ISOnum" },
1661*7c568831SAndroid Build Coastguard Worker { 171, "laquo","left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum" },
1662*7c568831SAndroid Build Coastguard Worker { 172, "not", "not sign, U+00AC ISOnum" },
1663*7c568831SAndroid Build Coastguard Worker { 173, "shy", "soft hyphen = discretionary hyphen, U+00AD ISOnum" },
1664*7c568831SAndroid Build Coastguard Worker { 174, "reg", "registered sign = registered trade mark sign, U+00AE ISOnum" },
1665*7c568831SAndroid Build Coastguard Worker { 175, "macr", "macron = spacing macron = overline = APL overbar, U+00AF ISOdia" },
1666*7c568831SAndroid Build Coastguard Worker { 176, "deg", "degree sign, U+00B0 ISOnum" },
1667*7c568831SAndroid Build Coastguard Worker { 177, "plusmn","plus-minus sign = plus-or-minus sign, U+00B1 ISOnum" },
1668*7c568831SAndroid Build Coastguard Worker { 178, "sup2", "superscript two = superscript digit two = squared, U+00B2 ISOnum" },
1669*7c568831SAndroid Build Coastguard Worker { 179, "sup3", "superscript three = superscript digit three = cubed, U+00B3 ISOnum" },
1670*7c568831SAndroid Build Coastguard Worker { 180, "acute","acute accent = spacing acute, U+00B4 ISOdia" },
1671*7c568831SAndroid Build Coastguard Worker { 181, "micro","micro sign, U+00B5 ISOnum" },
1672*7c568831SAndroid Build Coastguard Worker { 182, "para", "pilcrow sign = paragraph sign, U+00B6 ISOnum" },
1673*7c568831SAndroid Build Coastguard Worker { 183, "middot","middle dot = Georgian comma Greek middle dot, U+00B7 ISOnum" },
1674*7c568831SAndroid Build Coastguard Worker { 184, "cedil","cedilla = spacing cedilla, U+00B8 ISOdia" },
1675*7c568831SAndroid Build Coastguard Worker { 185, "sup1", "superscript one = superscript digit one, U+00B9 ISOnum" },
1676*7c568831SAndroid Build Coastguard Worker { 186, "ordm", "masculine ordinal indicator, U+00BA ISOnum" },
1677*7c568831SAndroid Build Coastguard Worker { 187, "raquo","right-pointing double angle quotation mark right pointing guillemet, U+00BB ISOnum" },
1678*7c568831SAndroid Build Coastguard Worker { 188, "frac14","vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum" },
1679*7c568831SAndroid Build Coastguard Worker { 189, "frac12","vulgar fraction one half = fraction one half, U+00BD ISOnum" },
1680*7c568831SAndroid Build Coastguard Worker { 190, "frac34","vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum" },
1681*7c568831SAndroid Build Coastguard Worker { 191, "iquest","inverted question mark = turned question mark, U+00BF ISOnum" },
1682*7c568831SAndroid Build Coastguard Worker { 192, "Agrave","latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1" },
1683*7c568831SAndroid Build Coastguard Worker { 193, "Aacute","latin capital letter A with acute, U+00C1 ISOlat1" },
1684*7c568831SAndroid Build Coastguard Worker { 194, "Acirc","latin capital letter A with circumflex, U+00C2 ISOlat1" },
1685*7c568831SAndroid Build Coastguard Worker { 195, "Atilde","latin capital letter A with tilde, U+00C3 ISOlat1" },
1686*7c568831SAndroid Build Coastguard Worker { 196, "Auml", "latin capital letter A with diaeresis, U+00C4 ISOlat1" },
1687*7c568831SAndroid Build Coastguard Worker { 197, "Aring","latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1" },
1688*7c568831SAndroid Build Coastguard Worker { 198, "AElig","latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1" },
1689*7c568831SAndroid Build Coastguard Worker { 199, "Ccedil","latin capital letter C with cedilla, U+00C7 ISOlat1" },
1690*7c568831SAndroid Build Coastguard Worker { 200, "Egrave","latin capital letter E with grave, U+00C8 ISOlat1" },
1691*7c568831SAndroid Build Coastguard Worker { 201, "Eacute","latin capital letter E with acute, U+00C9 ISOlat1" },
1692*7c568831SAndroid Build Coastguard Worker { 202, "Ecirc","latin capital letter E with circumflex, U+00CA ISOlat1" },
1693*7c568831SAndroid Build Coastguard Worker { 203, "Euml", "latin capital letter E with diaeresis, U+00CB ISOlat1" },
1694*7c568831SAndroid Build Coastguard Worker { 204, "Igrave","latin capital letter I with grave, U+00CC ISOlat1" },
1695*7c568831SAndroid Build Coastguard Worker { 205, "Iacute","latin capital letter I with acute, U+00CD ISOlat1" },
1696*7c568831SAndroid Build Coastguard Worker { 206, "Icirc","latin capital letter I with circumflex, U+00CE ISOlat1" },
1697*7c568831SAndroid Build Coastguard Worker { 207, "Iuml", "latin capital letter I with diaeresis, U+00CF ISOlat1" },
1698*7c568831SAndroid Build Coastguard Worker { 208, "ETH", "latin capital letter ETH, U+00D0 ISOlat1" },
1699*7c568831SAndroid Build Coastguard Worker { 209, "Ntilde","latin capital letter N with tilde, U+00D1 ISOlat1" },
1700*7c568831SAndroid Build Coastguard Worker { 210, "Ograve","latin capital letter O with grave, U+00D2 ISOlat1" },
1701*7c568831SAndroid Build Coastguard Worker { 211, "Oacute","latin capital letter O with acute, U+00D3 ISOlat1" },
1702*7c568831SAndroid Build Coastguard Worker { 212, "Ocirc","latin capital letter O with circumflex, U+00D4 ISOlat1" },
1703*7c568831SAndroid Build Coastguard Worker { 213, "Otilde","latin capital letter O with tilde, U+00D5 ISOlat1" },
1704*7c568831SAndroid Build Coastguard Worker { 214, "Ouml", "latin capital letter O with diaeresis, U+00D6 ISOlat1" },
1705*7c568831SAndroid Build Coastguard Worker { 215, "times","multiplication sign, U+00D7 ISOnum" },
1706*7c568831SAndroid Build Coastguard Worker { 216, "Oslash","latin capital letter O with stroke latin capital letter O slash, U+00D8 ISOlat1" },
1707*7c568831SAndroid Build Coastguard Worker { 217, "Ugrave","latin capital letter U with grave, U+00D9 ISOlat1" },
1708*7c568831SAndroid Build Coastguard Worker { 218, "Uacute","latin capital letter U with acute, U+00DA ISOlat1" },
1709*7c568831SAndroid Build Coastguard Worker { 219, "Ucirc","latin capital letter U with circumflex, U+00DB ISOlat1" },
1710*7c568831SAndroid Build Coastguard Worker { 220, "Uuml", "latin capital letter U with diaeresis, U+00DC ISOlat1" },
1711*7c568831SAndroid Build Coastguard Worker { 221, "Yacute","latin capital letter Y with acute, U+00DD ISOlat1" },
1712*7c568831SAndroid Build Coastguard Worker { 222, "THORN","latin capital letter THORN, U+00DE ISOlat1" },
1713*7c568831SAndroid Build Coastguard Worker { 223, "szlig","latin small letter sharp s = ess-zed, U+00DF ISOlat1" },
1714*7c568831SAndroid Build Coastguard Worker { 224, "agrave","latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1" },
1715*7c568831SAndroid Build Coastguard Worker { 225, "aacute","latin small letter a with acute, U+00E1 ISOlat1" },
1716*7c568831SAndroid Build Coastguard Worker { 226, "acirc","latin small letter a with circumflex, U+00E2 ISOlat1" },
1717*7c568831SAndroid Build Coastguard Worker { 227, "atilde","latin small letter a with tilde, U+00E3 ISOlat1" },
1718*7c568831SAndroid Build Coastguard Worker { 228, "auml", "latin small letter a with diaeresis, U+00E4 ISOlat1" },
1719*7c568831SAndroid Build Coastguard Worker { 229, "aring","latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1" },
1720*7c568831SAndroid Build Coastguard Worker { 230, "aelig","latin small letter ae = latin small ligature ae, U+00E6 ISOlat1" },
1721*7c568831SAndroid Build Coastguard Worker { 231, "ccedil","latin small letter c with cedilla, U+00E7 ISOlat1" },
1722*7c568831SAndroid Build Coastguard Worker { 232, "egrave","latin small letter e with grave, U+00E8 ISOlat1" },
1723*7c568831SAndroid Build Coastguard Worker { 233, "eacute","latin small letter e with acute, U+00E9 ISOlat1" },
1724*7c568831SAndroid Build Coastguard Worker { 234, "ecirc","latin small letter e with circumflex, U+00EA ISOlat1" },
1725*7c568831SAndroid Build Coastguard Worker { 235, "euml", "latin small letter e with diaeresis, U+00EB ISOlat1" },
1726*7c568831SAndroid Build Coastguard Worker { 236, "igrave","latin small letter i with grave, U+00EC ISOlat1" },
1727*7c568831SAndroid Build Coastguard Worker { 237, "iacute","latin small letter i with acute, U+00ED ISOlat1" },
1728*7c568831SAndroid Build Coastguard Worker { 238, "icirc","latin small letter i with circumflex, U+00EE ISOlat1" },
1729*7c568831SAndroid Build Coastguard Worker { 239, "iuml", "latin small letter i with diaeresis, U+00EF ISOlat1" },
1730*7c568831SAndroid Build Coastguard Worker { 240, "eth", "latin small letter eth, U+00F0 ISOlat1" },
1731*7c568831SAndroid Build Coastguard Worker { 241, "ntilde","latin small letter n with tilde, U+00F1 ISOlat1" },
1732*7c568831SAndroid Build Coastguard Worker { 242, "ograve","latin small letter o with grave, U+00F2 ISOlat1" },
1733*7c568831SAndroid Build Coastguard Worker { 243, "oacute","latin small letter o with acute, U+00F3 ISOlat1" },
1734*7c568831SAndroid Build Coastguard Worker { 244, "ocirc","latin small letter o with circumflex, U+00F4 ISOlat1" },
1735*7c568831SAndroid Build Coastguard Worker { 245, "otilde","latin small letter o with tilde, U+00F5 ISOlat1" },
1736*7c568831SAndroid Build Coastguard Worker { 246, "ouml", "latin small letter o with diaeresis, U+00F6 ISOlat1" },
1737*7c568831SAndroid Build Coastguard Worker { 247, "divide","division sign, U+00F7 ISOnum" },
1738*7c568831SAndroid Build Coastguard Worker { 248, "oslash","latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1" },
1739*7c568831SAndroid Build Coastguard Worker { 249, "ugrave","latin small letter u with grave, U+00F9 ISOlat1" },
1740*7c568831SAndroid Build Coastguard Worker { 250, "uacute","latin small letter u with acute, U+00FA ISOlat1" },
1741*7c568831SAndroid Build Coastguard Worker { 251, "ucirc","latin small letter u with circumflex, U+00FB ISOlat1" },
1742*7c568831SAndroid Build Coastguard Worker { 252, "uuml", "latin small letter u with diaeresis, U+00FC ISOlat1" },
1743*7c568831SAndroid Build Coastguard Worker { 253, "yacute","latin small letter y with acute, U+00FD ISOlat1" },
1744*7c568831SAndroid Build Coastguard Worker { 254, "thorn","latin small letter thorn with, U+00FE ISOlat1" },
1745*7c568831SAndroid Build Coastguard Worker { 255, "yuml", "latin small letter y with diaeresis, U+00FF ISOlat1" },
1746*7c568831SAndroid Build Coastguard Worker
1747*7c568831SAndroid Build Coastguard Worker { 338, "OElig","latin capital ligature OE, U+0152 ISOlat2" },
1748*7c568831SAndroid Build Coastguard Worker { 339, "oelig","latin small ligature oe, U+0153 ISOlat2" },
1749*7c568831SAndroid Build Coastguard Worker { 352, "Scaron","latin capital letter S with caron, U+0160 ISOlat2" },
1750*7c568831SAndroid Build Coastguard Worker { 353, "scaron","latin small letter s with caron, U+0161 ISOlat2" },
1751*7c568831SAndroid Build Coastguard Worker { 376, "Yuml", "latin capital letter Y with diaeresis, U+0178 ISOlat2" },
1752*7c568831SAndroid Build Coastguard Worker
1753*7c568831SAndroid Build Coastguard Worker /*
1754*7c568831SAndroid Build Coastguard Worker * Anything below should really be kept as entities references
1755*7c568831SAndroid Build Coastguard Worker */
1756*7c568831SAndroid Build Coastguard Worker { 402, "fnof", "latin small f with hook = function = florin, U+0192 ISOtech" },
1757*7c568831SAndroid Build Coastguard Worker
1758*7c568831SAndroid Build Coastguard Worker { 710, "circ", "modifier letter circumflex accent, U+02C6 ISOpub" },
1759*7c568831SAndroid Build Coastguard Worker { 732, "tilde","small tilde, U+02DC ISOdia" },
1760*7c568831SAndroid Build Coastguard Worker
1761*7c568831SAndroid Build Coastguard Worker { 913, "Alpha","greek capital letter alpha, U+0391" },
1762*7c568831SAndroid Build Coastguard Worker { 914, "Beta", "greek capital letter beta, U+0392" },
1763*7c568831SAndroid Build Coastguard Worker { 915, "Gamma","greek capital letter gamma, U+0393 ISOgrk3" },
1764*7c568831SAndroid Build Coastguard Worker { 916, "Delta","greek capital letter delta, U+0394 ISOgrk3" },
1765*7c568831SAndroid Build Coastguard Worker { 917, "Epsilon","greek capital letter epsilon, U+0395" },
1766*7c568831SAndroid Build Coastguard Worker { 918, "Zeta", "greek capital letter zeta, U+0396" },
1767*7c568831SAndroid Build Coastguard Worker { 919, "Eta", "greek capital letter eta, U+0397" },
1768*7c568831SAndroid Build Coastguard Worker { 920, "Theta","greek capital letter theta, U+0398 ISOgrk3" },
1769*7c568831SAndroid Build Coastguard Worker { 921, "Iota", "greek capital letter iota, U+0399" },
1770*7c568831SAndroid Build Coastguard Worker { 922, "Kappa","greek capital letter kappa, U+039A" },
1771*7c568831SAndroid Build Coastguard Worker { 923, "Lambda", "greek capital letter lambda, U+039B ISOgrk3" },
1772*7c568831SAndroid Build Coastguard Worker { 924, "Mu", "greek capital letter mu, U+039C" },
1773*7c568831SAndroid Build Coastguard Worker { 925, "Nu", "greek capital letter nu, U+039D" },
1774*7c568831SAndroid Build Coastguard Worker { 926, "Xi", "greek capital letter xi, U+039E ISOgrk3" },
1775*7c568831SAndroid Build Coastguard Worker { 927, "Omicron","greek capital letter omicron, U+039F" },
1776*7c568831SAndroid Build Coastguard Worker { 928, "Pi", "greek capital letter pi, U+03A0 ISOgrk3" },
1777*7c568831SAndroid Build Coastguard Worker { 929, "Rho", "greek capital letter rho, U+03A1" },
1778*7c568831SAndroid Build Coastguard Worker { 931, "Sigma","greek capital letter sigma, U+03A3 ISOgrk3" },
1779*7c568831SAndroid Build Coastguard Worker { 932, "Tau", "greek capital letter tau, U+03A4" },
1780*7c568831SAndroid Build Coastguard Worker { 933, "Upsilon","greek capital letter upsilon, U+03A5 ISOgrk3" },
1781*7c568831SAndroid Build Coastguard Worker { 934, "Phi", "greek capital letter phi, U+03A6 ISOgrk3" },
1782*7c568831SAndroid Build Coastguard Worker { 935, "Chi", "greek capital letter chi, U+03A7" },
1783*7c568831SAndroid Build Coastguard Worker { 936, "Psi", "greek capital letter psi, U+03A8 ISOgrk3" },
1784*7c568831SAndroid Build Coastguard Worker { 937, "Omega","greek capital letter omega, U+03A9 ISOgrk3" },
1785*7c568831SAndroid Build Coastguard Worker
1786*7c568831SAndroid Build Coastguard Worker { 945, "alpha","greek small letter alpha, U+03B1 ISOgrk3" },
1787*7c568831SAndroid Build Coastguard Worker { 946, "beta", "greek small letter beta, U+03B2 ISOgrk3" },
1788*7c568831SAndroid Build Coastguard Worker { 947, "gamma","greek small letter gamma, U+03B3 ISOgrk3" },
1789*7c568831SAndroid Build Coastguard Worker { 948, "delta","greek small letter delta, U+03B4 ISOgrk3" },
1790*7c568831SAndroid Build Coastguard Worker { 949, "epsilon","greek small letter epsilon, U+03B5 ISOgrk3" },
1791*7c568831SAndroid Build Coastguard Worker { 950, "zeta", "greek small letter zeta, U+03B6 ISOgrk3" },
1792*7c568831SAndroid Build Coastguard Worker { 951, "eta", "greek small letter eta, U+03B7 ISOgrk3" },
1793*7c568831SAndroid Build Coastguard Worker { 952, "theta","greek small letter theta, U+03B8 ISOgrk3" },
1794*7c568831SAndroid Build Coastguard Worker { 953, "iota", "greek small letter iota, U+03B9 ISOgrk3" },
1795*7c568831SAndroid Build Coastguard Worker { 954, "kappa","greek small letter kappa, U+03BA ISOgrk3" },
1796*7c568831SAndroid Build Coastguard Worker { 955, "lambda","greek small letter lambda, U+03BB ISOgrk3" },
1797*7c568831SAndroid Build Coastguard Worker { 956, "mu", "greek small letter mu, U+03BC ISOgrk3" },
1798*7c568831SAndroid Build Coastguard Worker { 957, "nu", "greek small letter nu, U+03BD ISOgrk3" },
1799*7c568831SAndroid Build Coastguard Worker { 958, "xi", "greek small letter xi, U+03BE ISOgrk3" },
1800*7c568831SAndroid Build Coastguard Worker { 959, "omicron","greek small letter omicron, U+03BF NEW" },
1801*7c568831SAndroid Build Coastguard Worker { 960, "pi", "greek small letter pi, U+03C0 ISOgrk3" },
1802*7c568831SAndroid Build Coastguard Worker { 961, "rho", "greek small letter rho, U+03C1 ISOgrk3" },
1803*7c568831SAndroid Build Coastguard Worker { 962, "sigmaf","greek small letter final sigma, U+03C2 ISOgrk3" },
1804*7c568831SAndroid Build Coastguard Worker { 963, "sigma","greek small letter sigma, U+03C3 ISOgrk3" },
1805*7c568831SAndroid Build Coastguard Worker { 964, "tau", "greek small letter tau, U+03C4 ISOgrk3" },
1806*7c568831SAndroid Build Coastguard Worker { 965, "upsilon","greek small letter upsilon, U+03C5 ISOgrk3" },
1807*7c568831SAndroid Build Coastguard Worker { 966, "phi", "greek small letter phi, U+03C6 ISOgrk3" },
1808*7c568831SAndroid Build Coastguard Worker { 967, "chi", "greek small letter chi, U+03C7 ISOgrk3" },
1809*7c568831SAndroid Build Coastguard Worker { 968, "psi", "greek small letter psi, U+03C8 ISOgrk3" },
1810*7c568831SAndroid Build Coastguard Worker { 969, "omega","greek small letter omega, U+03C9 ISOgrk3" },
1811*7c568831SAndroid Build Coastguard Worker { 977, "thetasym","greek small letter theta symbol, U+03D1 NEW" },
1812*7c568831SAndroid Build Coastguard Worker { 978, "upsih","greek upsilon with hook symbol, U+03D2 NEW" },
1813*7c568831SAndroid Build Coastguard Worker { 982, "piv", "greek pi symbol, U+03D6 ISOgrk3" },
1814*7c568831SAndroid Build Coastguard Worker
1815*7c568831SAndroid Build Coastguard Worker { 8194, "ensp", "en space, U+2002 ISOpub" },
1816*7c568831SAndroid Build Coastguard Worker { 8195, "emsp", "em space, U+2003 ISOpub" },
1817*7c568831SAndroid Build Coastguard Worker { 8201, "thinsp","thin space, U+2009 ISOpub" },
1818*7c568831SAndroid Build Coastguard Worker { 8204, "zwnj", "zero width non-joiner, U+200C NEW RFC 2070" },
1819*7c568831SAndroid Build Coastguard Worker { 8205, "zwj", "zero width joiner, U+200D NEW RFC 2070" },
1820*7c568831SAndroid Build Coastguard Worker { 8206, "lrm", "left-to-right mark, U+200E NEW RFC 2070" },
1821*7c568831SAndroid Build Coastguard Worker { 8207, "rlm", "right-to-left mark, U+200F NEW RFC 2070" },
1822*7c568831SAndroid Build Coastguard Worker { 8211, "ndash","en dash, U+2013 ISOpub" },
1823*7c568831SAndroid Build Coastguard Worker { 8212, "mdash","em dash, U+2014 ISOpub" },
1824*7c568831SAndroid Build Coastguard Worker { 8216, "lsquo","left single quotation mark, U+2018 ISOnum" },
1825*7c568831SAndroid Build Coastguard Worker { 8217, "rsquo","right single quotation mark, U+2019 ISOnum" },
1826*7c568831SAndroid Build Coastguard Worker { 8218, "sbquo","single low-9 quotation mark, U+201A NEW" },
1827*7c568831SAndroid Build Coastguard Worker { 8220, "ldquo","left double quotation mark, U+201C ISOnum" },
1828*7c568831SAndroid Build Coastguard Worker { 8221, "rdquo","right double quotation mark, U+201D ISOnum" },
1829*7c568831SAndroid Build Coastguard Worker { 8222, "bdquo","double low-9 quotation mark, U+201E NEW" },
1830*7c568831SAndroid Build Coastguard Worker { 8224, "dagger","dagger, U+2020 ISOpub" },
1831*7c568831SAndroid Build Coastguard Worker { 8225, "Dagger","double dagger, U+2021 ISOpub" },
1832*7c568831SAndroid Build Coastguard Worker
1833*7c568831SAndroid Build Coastguard Worker { 8226, "bull", "bullet = black small circle, U+2022 ISOpub" },
1834*7c568831SAndroid Build Coastguard Worker { 8230, "hellip","horizontal ellipsis = three dot leader, U+2026 ISOpub" },
1835*7c568831SAndroid Build Coastguard Worker
1836*7c568831SAndroid Build Coastguard Worker { 8240, "permil","per mille sign, U+2030 ISOtech" },
1837*7c568831SAndroid Build Coastguard Worker
1838*7c568831SAndroid Build Coastguard Worker { 8242, "prime","prime = minutes = feet, U+2032 ISOtech" },
1839*7c568831SAndroid Build Coastguard Worker { 8243, "Prime","double prime = seconds = inches, U+2033 ISOtech" },
1840*7c568831SAndroid Build Coastguard Worker
1841*7c568831SAndroid Build Coastguard Worker { 8249, "lsaquo","single left-pointing angle quotation mark, U+2039 ISO proposed" },
1842*7c568831SAndroid Build Coastguard Worker { 8250, "rsaquo","single right-pointing angle quotation mark, U+203A ISO proposed" },
1843*7c568831SAndroid Build Coastguard Worker
1844*7c568831SAndroid Build Coastguard Worker { 8254, "oline","overline = spacing overscore, U+203E NEW" },
1845*7c568831SAndroid Build Coastguard Worker { 8260, "frasl","fraction slash, U+2044 NEW" },
1846*7c568831SAndroid Build Coastguard Worker
1847*7c568831SAndroid Build Coastguard Worker { 8364, "euro", "euro sign, U+20AC NEW" },
1848*7c568831SAndroid Build Coastguard Worker
1849*7c568831SAndroid Build Coastguard Worker { 8465, "image","blackletter capital I = imaginary part, U+2111 ISOamso" },
1850*7c568831SAndroid Build Coastguard Worker { 8472, "weierp","script capital P = power set = Weierstrass p, U+2118 ISOamso" },
1851*7c568831SAndroid Build Coastguard Worker { 8476, "real", "blackletter capital R = real part symbol, U+211C ISOamso" },
1852*7c568831SAndroid Build Coastguard Worker { 8482, "trade","trade mark sign, U+2122 ISOnum" },
1853*7c568831SAndroid Build Coastguard Worker { 8501, "alefsym","alef symbol = first transfinite cardinal, U+2135 NEW" },
1854*7c568831SAndroid Build Coastguard Worker { 8592, "larr", "leftwards arrow, U+2190 ISOnum" },
1855*7c568831SAndroid Build Coastguard Worker { 8593, "uarr", "upwards arrow, U+2191 ISOnum" },
1856*7c568831SAndroid Build Coastguard Worker { 8594, "rarr", "rightwards arrow, U+2192 ISOnum" },
1857*7c568831SAndroid Build Coastguard Worker { 8595, "darr", "downwards arrow, U+2193 ISOnum" },
1858*7c568831SAndroid Build Coastguard Worker { 8596, "harr", "left right arrow, U+2194 ISOamsa" },
1859*7c568831SAndroid Build Coastguard Worker { 8629, "crarr","downwards arrow with corner leftwards = carriage return, U+21B5 NEW" },
1860*7c568831SAndroid Build Coastguard Worker { 8656, "lArr", "leftwards double arrow, U+21D0 ISOtech" },
1861*7c568831SAndroid Build Coastguard Worker { 8657, "uArr", "upwards double arrow, U+21D1 ISOamsa" },
1862*7c568831SAndroid Build Coastguard Worker { 8658, "rArr", "rightwards double arrow, U+21D2 ISOtech" },
1863*7c568831SAndroid Build Coastguard Worker { 8659, "dArr", "downwards double arrow, U+21D3 ISOamsa" },
1864*7c568831SAndroid Build Coastguard Worker { 8660, "hArr", "left right double arrow, U+21D4 ISOamsa" },
1865*7c568831SAndroid Build Coastguard Worker
1866*7c568831SAndroid Build Coastguard Worker { 8704, "forall","for all, U+2200 ISOtech" },
1867*7c568831SAndroid Build Coastguard Worker { 8706, "part", "partial differential, U+2202 ISOtech" },
1868*7c568831SAndroid Build Coastguard Worker { 8707, "exist","there exists, U+2203 ISOtech" },
1869*7c568831SAndroid Build Coastguard Worker { 8709, "empty","empty set = null set = diameter, U+2205 ISOamso" },
1870*7c568831SAndroid Build Coastguard Worker { 8711, "nabla","nabla = backward difference, U+2207 ISOtech" },
1871*7c568831SAndroid Build Coastguard Worker { 8712, "isin", "element of, U+2208 ISOtech" },
1872*7c568831SAndroid Build Coastguard Worker { 8713, "notin","not an element of, U+2209 ISOtech" },
1873*7c568831SAndroid Build Coastguard Worker { 8715, "ni", "contains as member, U+220B ISOtech" },
1874*7c568831SAndroid Build Coastguard Worker { 8719, "prod", "n-ary product = product sign, U+220F ISOamsb" },
1875*7c568831SAndroid Build Coastguard Worker { 8721, "sum", "n-ary summation, U+2211 ISOamsb" },
1876*7c568831SAndroid Build Coastguard Worker { 8722, "minus","minus sign, U+2212 ISOtech" },
1877*7c568831SAndroid Build Coastguard Worker { 8727, "lowast","asterisk operator, U+2217 ISOtech" },
1878*7c568831SAndroid Build Coastguard Worker { 8730, "radic","square root = radical sign, U+221A ISOtech" },
1879*7c568831SAndroid Build Coastguard Worker { 8733, "prop", "proportional to, U+221D ISOtech" },
1880*7c568831SAndroid Build Coastguard Worker { 8734, "infin","infinity, U+221E ISOtech" },
1881*7c568831SAndroid Build Coastguard Worker { 8736, "ang", "angle, U+2220 ISOamso" },
1882*7c568831SAndroid Build Coastguard Worker { 8743, "and", "logical and = wedge, U+2227 ISOtech" },
1883*7c568831SAndroid Build Coastguard Worker { 8744, "or", "logical or = vee, U+2228 ISOtech" },
1884*7c568831SAndroid Build Coastguard Worker { 8745, "cap", "intersection = cap, U+2229 ISOtech" },
1885*7c568831SAndroid Build Coastguard Worker { 8746, "cup", "union = cup, U+222A ISOtech" },
1886*7c568831SAndroid Build Coastguard Worker { 8747, "int", "integral, U+222B ISOtech" },
1887*7c568831SAndroid Build Coastguard Worker { 8756, "there4","therefore, U+2234 ISOtech" },
1888*7c568831SAndroid Build Coastguard Worker { 8764, "sim", "tilde operator = varies with = similar to, U+223C ISOtech" },
1889*7c568831SAndroid Build Coastguard Worker { 8773, "cong", "approximately equal to, U+2245 ISOtech" },
1890*7c568831SAndroid Build Coastguard Worker { 8776, "asymp","almost equal to = asymptotic to, U+2248 ISOamsr" },
1891*7c568831SAndroid Build Coastguard Worker { 8800, "ne", "not equal to, U+2260 ISOtech" },
1892*7c568831SAndroid Build Coastguard Worker { 8801, "equiv","identical to, U+2261 ISOtech" },
1893*7c568831SAndroid Build Coastguard Worker { 8804, "le", "less-than or equal to, U+2264 ISOtech" },
1894*7c568831SAndroid Build Coastguard Worker { 8805, "ge", "greater-than or equal to, U+2265 ISOtech" },
1895*7c568831SAndroid Build Coastguard Worker { 8834, "sub", "subset of, U+2282 ISOtech" },
1896*7c568831SAndroid Build Coastguard Worker { 8835, "sup", "superset of, U+2283 ISOtech" },
1897*7c568831SAndroid Build Coastguard Worker { 8836, "nsub", "not a subset of, U+2284 ISOamsn" },
1898*7c568831SAndroid Build Coastguard Worker { 8838, "sube", "subset of or equal to, U+2286 ISOtech" },
1899*7c568831SAndroid Build Coastguard Worker { 8839, "supe", "superset of or equal to, U+2287 ISOtech" },
1900*7c568831SAndroid Build Coastguard Worker { 8853, "oplus","circled plus = direct sum, U+2295 ISOamsb" },
1901*7c568831SAndroid Build Coastguard Worker { 8855, "otimes","circled times = vector product, U+2297 ISOamsb" },
1902*7c568831SAndroid Build Coastguard Worker { 8869, "perp", "up tack = orthogonal to = perpendicular, U+22A5 ISOtech" },
1903*7c568831SAndroid Build Coastguard Worker { 8901, "sdot", "dot operator, U+22C5 ISOamsb" },
1904*7c568831SAndroid Build Coastguard Worker { 8968, "lceil","left ceiling = apl upstile, U+2308 ISOamsc" },
1905*7c568831SAndroid Build Coastguard Worker { 8969, "rceil","right ceiling, U+2309 ISOamsc" },
1906*7c568831SAndroid Build Coastguard Worker { 8970, "lfloor","left floor = apl downstile, U+230A ISOamsc" },
1907*7c568831SAndroid Build Coastguard Worker { 8971, "rfloor","right floor, U+230B ISOamsc" },
1908*7c568831SAndroid Build Coastguard Worker { 9001, "lang", "left-pointing angle bracket = bra, U+2329 ISOtech" },
1909*7c568831SAndroid Build Coastguard Worker { 9002, "rang", "right-pointing angle bracket = ket, U+232A ISOtech" },
1910*7c568831SAndroid Build Coastguard Worker { 9674, "loz", "lozenge, U+25CA ISOpub" },
1911*7c568831SAndroid Build Coastguard Worker
1912*7c568831SAndroid Build Coastguard Worker { 9824, "spades","black spade suit, U+2660 ISOpub" },
1913*7c568831SAndroid Build Coastguard Worker { 9827, "clubs","black club suit = shamrock, U+2663 ISOpub" },
1914*7c568831SAndroid Build Coastguard Worker { 9829, "hearts","black heart suit = valentine, U+2665 ISOpub" },
1915*7c568831SAndroid Build Coastguard Worker { 9830, "diams","black diamond suit, U+2666 ISOpub" },
1916*7c568831SAndroid Build Coastguard Worker
1917*7c568831SAndroid Build Coastguard Worker };
1918*7c568831SAndroid Build Coastguard Worker
1919*7c568831SAndroid Build Coastguard Worker /************************************************************************
1920*7c568831SAndroid Build Coastguard Worker * *
1921*7c568831SAndroid Build Coastguard Worker * Commodity functions to handle entities *
1922*7c568831SAndroid Build Coastguard Worker * *
1923*7c568831SAndroid Build Coastguard Worker ************************************************************************/
1924*7c568831SAndroid Build Coastguard Worker
1925*7c568831SAndroid Build Coastguard Worker /**
1926*7c568831SAndroid Build Coastguard Worker * htmlEntityLookup:
1927*7c568831SAndroid Build Coastguard Worker * @name: the entity name
1928*7c568831SAndroid Build Coastguard Worker *
1929*7c568831SAndroid Build Coastguard Worker * Lookup the given entity in EntitiesTable
1930*7c568831SAndroid Build Coastguard Worker *
1931*7c568831SAndroid Build Coastguard Worker * TODO: the linear scan is really ugly, an hash table is really needed.
1932*7c568831SAndroid Build Coastguard Worker *
1933*7c568831SAndroid Build Coastguard Worker * Returns the associated htmlEntityDescPtr if found, NULL otherwise.
1934*7c568831SAndroid Build Coastguard Worker */
1935*7c568831SAndroid Build Coastguard Worker const htmlEntityDesc *
htmlEntityLookup(const xmlChar * name)1936*7c568831SAndroid Build Coastguard Worker htmlEntityLookup(const xmlChar *name) {
1937*7c568831SAndroid Build Coastguard Worker unsigned int i;
1938*7c568831SAndroid Build Coastguard Worker
1939*7c568831SAndroid Build Coastguard Worker for (i = 0;i < (sizeof(html40EntitiesTable)/
1940*7c568831SAndroid Build Coastguard Worker sizeof(html40EntitiesTable[0]));i++) {
1941*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(name, BAD_CAST html40EntitiesTable[i].name)) {
1942*7c568831SAndroid Build Coastguard Worker return((htmlEntityDescPtr) &html40EntitiesTable[i]);
1943*7c568831SAndroid Build Coastguard Worker }
1944*7c568831SAndroid Build Coastguard Worker }
1945*7c568831SAndroid Build Coastguard Worker return(NULL);
1946*7c568831SAndroid Build Coastguard Worker }
1947*7c568831SAndroid Build Coastguard Worker
1948*7c568831SAndroid Build Coastguard Worker static int
htmlCompareEntityDesc(const void * vkey,const void * vdesc)1949*7c568831SAndroid Build Coastguard Worker htmlCompareEntityDesc(const void *vkey, const void *vdesc) {
1950*7c568831SAndroid Build Coastguard Worker const unsigned *key = vkey;
1951*7c568831SAndroid Build Coastguard Worker const htmlEntityDesc *desc = vdesc;
1952*7c568831SAndroid Build Coastguard Worker
1953*7c568831SAndroid Build Coastguard Worker return((int) *key - (int) desc->value);
1954*7c568831SAndroid Build Coastguard Worker }
1955*7c568831SAndroid Build Coastguard Worker
1956*7c568831SAndroid Build Coastguard Worker /**
1957*7c568831SAndroid Build Coastguard Worker * htmlEntityValueLookup:
1958*7c568831SAndroid Build Coastguard Worker * @value: the entity's unicode value
1959*7c568831SAndroid Build Coastguard Worker *
1960*7c568831SAndroid Build Coastguard Worker * Lookup the given entity in EntitiesTable
1961*7c568831SAndroid Build Coastguard Worker *
1962*7c568831SAndroid Build Coastguard Worker * TODO: the linear scan is really ugly, an hash table is really needed.
1963*7c568831SAndroid Build Coastguard Worker *
1964*7c568831SAndroid Build Coastguard Worker * Returns the associated htmlEntityDescPtr if found, NULL otherwise.
1965*7c568831SAndroid Build Coastguard Worker */
1966*7c568831SAndroid Build Coastguard Worker const htmlEntityDesc *
htmlEntityValueLookup(unsigned int value)1967*7c568831SAndroid Build Coastguard Worker htmlEntityValueLookup(unsigned int value) {
1968*7c568831SAndroid Build Coastguard Worker const htmlEntityDesc *desc;
1969*7c568831SAndroid Build Coastguard Worker size_t nmemb;
1970*7c568831SAndroid Build Coastguard Worker
1971*7c568831SAndroid Build Coastguard Worker nmemb = sizeof(html40EntitiesTable) / sizeof(html40EntitiesTable[0]);
1972*7c568831SAndroid Build Coastguard Worker desc = bsearch(&value, html40EntitiesTable, nmemb, sizeof(htmlEntityDesc),
1973*7c568831SAndroid Build Coastguard Worker htmlCompareEntityDesc);
1974*7c568831SAndroid Build Coastguard Worker
1975*7c568831SAndroid Build Coastguard Worker return(desc);
1976*7c568831SAndroid Build Coastguard Worker }
1977*7c568831SAndroid Build Coastguard Worker
1978*7c568831SAndroid Build Coastguard Worker /**
1979*7c568831SAndroid Build Coastguard Worker * UTF8ToHtml:
1980*7c568831SAndroid Build Coastguard Worker * @out: a pointer to an array of bytes to store the result
1981*7c568831SAndroid Build Coastguard Worker * @outlen: the length of @out
1982*7c568831SAndroid Build Coastguard Worker * @in: a pointer to an array of UTF-8 chars
1983*7c568831SAndroid Build Coastguard Worker * @inlen: the length of @in
1984*7c568831SAndroid Build Coastguard Worker *
1985*7c568831SAndroid Build Coastguard Worker * Take a block of UTF-8 chars in and try to convert it to an ASCII
1986*7c568831SAndroid Build Coastguard Worker * plus HTML entities block of chars out.
1987*7c568831SAndroid Build Coastguard Worker *
1988*7c568831SAndroid Build Coastguard Worker * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
1989*7c568831SAndroid Build Coastguard Worker * The value of @inlen after return is the number of octets consumed
1990*7c568831SAndroid Build Coastguard Worker * as the return value is positive, else unpredictable.
1991*7c568831SAndroid Build Coastguard Worker * The value of @outlen after return is the number of octets consumed.
1992*7c568831SAndroid Build Coastguard Worker */
1993*7c568831SAndroid Build Coastguard Worker int
UTF8ToHtml(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1994*7c568831SAndroid Build Coastguard Worker UTF8ToHtml(unsigned char* out, int *outlen,
1995*7c568831SAndroid Build Coastguard Worker const unsigned char* in, int *inlen) {
1996*7c568831SAndroid Build Coastguard Worker const unsigned char* instart = in;
1997*7c568831SAndroid Build Coastguard Worker const unsigned char* inend;
1998*7c568831SAndroid Build Coastguard Worker unsigned char* outstart = out;
1999*7c568831SAndroid Build Coastguard Worker unsigned char* outend;
2000*7c568831SAndroid Build Coastguard Worker int ret = XML_ENC_ERR_SPACE;
2001*7c568831SAndroid Build Coastguard Worker
2002*7c568831SAndroid Build Coastguard Worker if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
2003*7c568831SAndroid Build Coastguard Worker return(XML_ENC_ERR_INTERNAL);
2004*7c568831SAndroid Build Coastguard Worker
2005*7c568831SAndroid Build Coastguard Worker if (in == NULL) {
2006*7c568831SAndroid Build Coastguard Worker /*
2007*7c568831SAndroid Build Coastguard Worker * initialization nothing to do
2008*7c568831SAndroid Build Coastguard Worker */
2009*7c568831SAndroid Build Coastguard Worker *outlen = 0;
2010*7c568831SAndroid Build Coastguard Worker *inlen = 0;
2011*7c568831SAndroid Build Coastguard Worker return(XML_ENC_ERR_SUCCESS);
2012*7c568831SAndroid Build Coastguard Worker }
2013*7c568831SAndroid Build Coastguard Worker
2014*7c568831SAndroid Build Coastguard Worker inend = in + *inlen;
2015*7c568831SAndroid Build Coastguard Worker outend = out + *outlen;
2016*7c568831SAndroid Build Coastguard Worker while (in < inend) {
2017*7c568831SAndroid Build Coastguard Worker const htmlEntityDesc *ent;
2018*7c568831SAndroid Build Coastguard Worker const char *cp;
2019*7c568831SAndroid Build Coastguard Worker char nbuf[16];
2020*7c568831SAndroid Build Coastguard Worker unsigned c, d;
2021*7c568831SAndroid Build Coastguard Worker int seqlen, len, i;
2022*7c568831SAndroid Build Coastguard Worker
2023*7c568831SAndroid Build Coastguard Worker d = *in;
2024*7c568831SAndroid Build Coastguard Worker
2025*7c568831SAndroid Build Coastguard Worker if (d < 0x80) {
2026*7c568831SAndroid Build Coastguard Worker if (out >= outend)
2027*7c568831SAndroid Build Coastguard Worker goto done;
2028*7c568831SAndroid Build Coastguard Worker *out++ = d;
2029*7c568831SAndroid Build Coastguard Worker in += 1;
2030*7c568831SAndroid Build Coastguard Worker continue;
2031*7c568831SAndroid Build Coastguard Worker }
2032*7c568831SAndroid Build Coastguard Worker
2033*7c568831SAndroid Build Coastguard Worker if (d < 0xE0) { c = d & 0x1F; seqlen = 2; }
2034*7c568831SAndroid Build Coastguard Worker else if (d < 0xF0) { c = d & 0x0F; seqlen = 3; }
2035*7c568831SAndroid Build Coastguard Worker else { c = d & 0x07; seqlen = 4; }
2036*7c568831SAndroid Build Coastguard Worker
2037*7c568831SAndroid Build Coastguard Worker if (inend - in < seqlen)
2038*7c568831SAndroid Build Coastguard Worker break;
2039*7c568831SAndroid Build Coastguard Worker
2040*7c568831SAndroid Build Coastguard Worker for (i = 1; i < seqlen; i++) {
2041*7c568831SAndroid Build Coastguard Worker d = in[i];
2042*7c568831SAndroid Build Coastguard Worker c <<= 6;
2043*7c568831SAndroid Build Coastguard Worker c |= d & 0x3F;
2044*7c568831SAndroid Build Coastguard Worker }
2045*7c568831SAndroid Build Coastguard Worker
2046*7c568831SAndroid Build Coastguard Worker /*
2047*7c568831SAndroid Build Coastguard Worker * Try to lookup a predefined HTML entity for it
2048*7c568831SAndroid Build Coastguard Worker */
2049*7c568831SAndroid Build Coastguard Worker ent = htmlEntityValueLookup(c);
2050*7c568831SAndroid Build Coastguard Worker
2051*7c568831SAndroid Build Coastguard Worker if (ent == NULL) {
2052*7c568831SAndroid Build Coastguard Worker snprintf(nbuf, sizeof(nbuf), "#%u", c);
2053*7c568831SAndroid Build Coastguard Worker cp = nbuf;
2054*7c568831SAndroid Build Coastguard Worker } else {
2055*7c568831SAndroid Build Coastguard Worker cp = ent->name;
2056*7c568831SAndroid Build Coastguard Worker }
2057*7c568831SAndroid Build Coastguard Worker
2058*7c568831SAndroid Build Coastguard Worker len = strlen(cp);
2059*7c568831SAndroid Build Coastguard Worker if (outend - out < len + 2)
2060*7c568831SAndroid Build Coastguard Worker goto done;
2061*7c568831SAndroid Build Coastguard Worker
2062*7c568831SAndroid Build Coastguard Worker *out++ = '&';
2063*7c568831SAndroid Build Coastguard Worker memcpy(out, cp, len);
2064*7c568831SAndroid Build Coastguard Worker out += len;
2065*7c568831SAndroid Build Coastguard Worker *out++ = ';';
2066*7c568831SAndroid Build Coastguard Worker
2067*7c568831SAndroid Build Coastguard Worker in += seqlen;
2068*7c568831SAndroid Build Coastguard Worker }
2069*7c568831SAndroid Build Coastguard Worker
2070*7c568831SAndroid Build Coastguard Worker ret = out - outstart;
2071*7c568831SAndroid Build Coastguard Worker
2072*7c568831SAndroid Build Coastguard Worker done:
2073*7c568831SAndroid Build Coastguard Worker *outlen = out - outstart;
2074*7c568831SAndroid Build Coastguard Worker *inlen = in - instart;
2075*7c568831SAndroid Build Coastguard Worker return(ret);
2076*7c568831SAndroid Build Coastguard Worker }
2077*7c568831SAndroid Build Coastguard Worker
2078*7c568831SAndroid Build Coastguard Worker /**
2079*7c568831SAndroid Build Coastguard Worker * htmlEncodeEntities:
2080*7c568831SAndroid Build Coastguard Worker * @out: a pointer to an array of bytes to store the result
2081*7c568831SAndroid Build Coastguard Worker * @outlen: the length of @out
2082*7c568831SAndroid Build Coastguard Worker * @in: a pointer to an array of UTF-8 chars
2083*7c568831SAndroid Build Coastguard Worker * @inlen: the length of @in
2084*7c568831SAndroid Build Coastguard Worker * @quoteChar: the quote character to escape (' or ") or zero.
2085*7c568831SAndroid Build Coastguard Worker *
2086*7c568831SAndroid Build Coastguard Worker * Take a block of UTF-8 chars in and try to convert it to an ASCII
2087*7c568831SAndroid Build Coastguard Worker * plus HTML entities block of chars out.
2088*7c568831SAndroid Build Coastguard Worker *
2089*7c568831SAndroid Build Coastguard Worker * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2090*7c568831SAndroid Build Coastguard Worker * The value of @inlen after return is the number of octets consumed
2091*7c568831SAndroid Build Coastguard Worker * as the return value is positive, else unpredictable.
2092*7c568831SAndroid Build Coastguard Worker * The value of @outlen after return is the number of octets consumed.
2093*7c568831SAndroid Build Coastguard Worker */
2094*7c568831SAndroid Build Coastguard Worker int
htmlEncodeEntities(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int quoteChar)2095*7c568831SAndroid Build Coastguard Worker htmlEncodeEntities(unsigned char* out, int *outlen,
2096*7c568831SAndroid Build Coastguard Worker const unsigned char* in, int *inlen, int quoteChar) {
2097*7c568831SAndroid Build Coastguard Worker const unsigned char* processed = in;
2098*7c568831SAndroid Build Coastguard Worker const unsigned char* outend;
2099*7c568831SAndroid Build Coastguard Worker const unsigned char* outstart = out;
2100*7c568831SAndroid Build Coastguard Worker const unsigned char* instart = in;
2101*7c568831SAndroid Build Coastguard Worker const unsigned char* inend;
2102*7c568831SAndroid Build Coastguard Worker unsigned int c, d;
2103*7c568831SAndroid Build Coastguard Worker int trailing;
2104*7c568831SAndroid Build Coastguard Worker
2105*7c568831SAndroid Build Coastguard Worker if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL))
2106*7c568831SAndroid Build Coastguard Worker return(-1);
2107*7c568831SAndroid Build Coastguard Worker outend = out + (*outlen);
2108*7c568831SAndroid Build Coastguard Worker inend = in + (*inlen);
2109*7c568831SAndroid Build Coastguard Worker while (in < inend) {
2110*7c568831SAndroid Build Coastguard Worker d = *in++;
2111*7c568831SAndroid Build Coastguard Worker if (d < 0x80) { c= d; trailing= 0; }
2112*7c568831SAndroid Build Coastguard Worker else if (d < 0xC0) {
2113*7c568831SAndroid Build Coastguard Worker /* trailing byte in leading position */
2114*7c568831SAndroid Build Coastguard Worker *outlen = out - outstart;
2115*7c568831SAndroid Build Coastguard Worker *inlen = processed - instart;
2116*7c568831SAndroid Build Coastguard Worker return(-2);
2117*7c568831SAndroid Build Coastguard Worker } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
2118*7c568831SAndroid Build Coastguard Worker else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
2119*7c568831SAndroid Build Coastguard Worker else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
2120*7c568831SAndroid Build Coastguard Worker else {
2121*7c568831SAndroid Build Coastguard Worker /* no chance for this in Ascii */
2122*7c568831SAndroid Build Coastguard Worker *outlen = out - outstart;
2123*7c568831SAndroid Build Coastguard Worker *inlen = processed - instart;
2124*7c568831SAndroid Build Coastguard Worker return(-2);
2125*7c568831SAndroid Build Coastguard Worker }
2126*7c568831SAndroid Build Coastguard Worker
2127*7c568831SAndroid Build Coastguard Worker if (inend - in < trailing)
2128*7c568831SAndroid Build Coastguard Worker break;
2129*7c568831SAndroid Build Coastguard Worker
2130*7c568831SAndroid Build Coastguard Worker while (trailing--) {
2131*7c568831SAndroid Build Coastguard Worker if (((d= *in++) & 0xC0) != 0x80) {
2132*7c568831SAndroid Build Coastguard Worker *outlen = out - outstart;
2133*7c568831SAndroid Build Coastguard Worker *inlen = processed - instart;
2134*7c568831SAndroid Build Coastguard Worker return(-2);
2135*7c568831SAndroid Build Coastguard Worker }
2136*7c568831SAndroid Build Coastguard Worker c <<= 6;
2137*7c568831SAndroid Build Coastguard Worker c |= d & 0x3F;
2138*7c568831SAndroid Build Coastguard Worker }
2139*7c568831SAndroid Build Coastguard Worker
2140*7c568831SAndroid Build Coastguard Worker /* assertion: c is a single UTF-4 value */
2141*7c568831SAndroid Build Coastguard Worker if ((c < 0x80) && (c != (unsigned int) quoteChar) &&
2142*7c568831SAndroid Build Coastguard Worker (c != '&') && (c != '<') && (c != '>')) {
2143*7c568831SAndroid Build Coastguard Worker if (out >= outend)
2144*7c568831SAndroid Build Coastguard Worker break;
2145*7c568831SAndroid Build Coastguard Worker *out++ = c;
2146*7c568831SAndroid Build Coastguard Worker } else {
2147*7c568831SAndroid Build Coastguard Worker const htmlEntityDesc * ent;
2148*7c568831SAndroid Build Coastguard Worker const char *cp;
2149*7c568831SAndroid Build Coastguard Worker char nbuf[16];
2150*7c568831SAndroid Build Coastguard Worker int len;
2151*7c568831SAndroid Build Coastguard Worker
2152*7c568831SAndroid Build Coastguard Worker /*
2153*7c568831SAndroid Build Coastguard Worker * Try to lookup a predefined HTML entity for it
2154*7c568831SAndroid Build Coastguard Worker */
2155*7c568831SAndroid Build Coastguard Worker ent = htmlEntityValueLookup(c);
2156*7c568831SAndroid Build Coastguard Worker if (ent == NULL) {
2157*7c568831SAndroid Build Coastguard Worker snprintf(nbuf, sizeof(nbuf), "#%u", c);
2158*7c568831SAndroid Build Coastguard Worker cp = nbuf;
2159*7c568831SAndroid Build Coastguard Worker }
2160*7c568831SAndroid Build Coastguard Worker else
2161*7c568831SAndroid Build Coastguard Worker cp = ent->name;
2162*7c568831SAndroid Build Coastguard Worker len = strlen(cp);
2163*7c568831SAndroid Build Coastguard Worker if (outend - out < len + 2)
2164*7c568831SAndroid Build Coastguard Worker break;
2165*7c568831SAndroid Build Coastguard Worker *out++ = '&';
2166*7c568831SAndroid Build Coastguard Worker memcpy(out, cp, len);
2167*7c568831SAndroid Build Coastguard Worker out += len;
2168*7c568831SAndroid Build Coastguard Worker *out++ = ';';
2169*7c568831SAndroid Build Coastguard Worker }
2170*7c568831SAndroid Build Coastguard Worker processed = in;
2171*7c568831SAndroid Build Coastguard Worker }
2172*7c568831SAndroid Build Coastguard Worker *outlen = out - outstart;
2173*7c568831SAndroid Build Coastguard Worker *inlen = processed - instart;
2174*7c568831SAndroid Build Coastguard Worker return(0);
2175*7c568831SAndroid Build Coastguard Worker }
2176*7c568831SAndroid Build Coastguard Worker
2177*7c568831SAndroid Build Coastguard Worker /************************************************************************
2178*7c568831SAndroid Build Coastguard Worker * *
2179*7c568831SAndroid Build Coastguard Worker * Commodity functions, cleanup needed ? *
2180*7c568831SAndroid Build Coastguard Worker * *
2181*7c568831SAndroid Build Coastguard Worker ************************************************************************/
2182*7c568831SAndroid Build Coastguard Worker /*
2183*7c568831SAndroid Build Coastguard Worker * all tags allowing pc data from the html 4.01 loose dtd
2184*7c568831SAndroid Build Coastguard Worker * NOTE: it might be more appropriate to integrate this information
2185*7c568831SAndroid Build Coastguard Worker * into the html40ElementTable array but I don't want to risk any
2186*7c568831SAndroid Build Coastguard Worker * binary incompatibility
2187*7c568831SAndroid Build Coastguard Worker */
2188*7c568831SAndroid Build Coastguard Worker static const char *allowPCData[] = {
2189*7c568831SAndroid Build Coastguard Worker "a", "abbr", "acronym", "address", "applet", "b", "bdo", "big",
2190*7c568831SAndroid Build Coastguard Worker "blockquote", "body", "button", "caption", "center", "cite", "code",
2191*7c568831SAndroid Build Coastguard Worker "dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2",
2192*7c568831SAndroid Build Coastguard Worker "h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend",
2193*7c568831SAndroid Build Coastguard Worker "li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp",
2194*7c568831SAndroid Build Coastguard Worker "small", "span", "strike", "strong", "td", "th", "tt", "u", "var"
2195*7c568831SAndroid Build Coastguard Worker };
2196*7c568831SAndroid Build Coastguard Worker
2197*7c568831SAndroid Build Coastguard Worker /**
2198*7c568831SAndroid Build Coastguard Worker * areBlanks:
2199*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
2200*7c568831SAndroid Build Coastguard Worker * @str: a xmlChar *
2201*7c568831SAndroid Build Coastguard Worker * @len: the size of @str
2202*7c568831SAndroid Build Coastguard Worker *
2203*7c568831SAndroid Build Coastguard Worker * Is this a sequence of blank chars that one can ignore ?
2204*7c568831SAndroid Build Coastguard Worker *
2205*7c568831SAndroid Build Coastguard Worker * Returns 1 if ignorable 0 if whitespace, -1 otherwise.
2206*7c568831SAndroid Build Coastguard Worker */
2207*7c568831SAndroid Build Coastguard Worker
areBlanks(htmlParserCtxtPtr ctxt,const xmlChar * str,int len)2208*7c568831SAndroid Build Coastguard Worker static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2209*7c568831SAndroid Build Coastguard Worker unsigned int i;
2210*7c568831SAndroid Build Coastguard Worker int j;
2211*7c568831SAndroid Build Coastguard Worker xmlNodePtr lastChild;
2212*7c568831SAndroid Build Coastguard Worker xmlDtdPtr dtd;
2213*7c568831SAndroid Build Coastguard Worker
2214*7c568831SAndroid Build Coastguard Worker for (j = 0;j < len;j++)
2215*7c568831SAndroid Build Coastguard Worker if (!(IS_WS_HTML(str[j]))) return(-1);
2216*7c568831SAndroid Build Coastguard Worker
2217*7c568831SAndroid Build Coastguard Worker if (CUR == 0) return(1);
2218*7c568831SAndroid Build Coastguard Worker if (CUR != '<') return(0);
2219*7c568831SAndroid Build Coastguard Worker if (ctxt->name == NULL)
2220*7c568831SAndroid Build Coastguard Worker return(1);
2221*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(ctxt->name, BAD_CAST"html"))
2222*7c568831SAndroid Build Coastguard Worker return(1);
2223*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
2224*7c568831SAndroid Build Coastguard Worker return(1);
2225*7c568831SAndroid Build Coastguard Worker
2226*7c568831SAndroid Build Coastguard Worker /* Only strip CDATA children of the body tag for strict HTML DTDs */
2227*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) {
2228*7c568831SAndroid Build Coastguard Worker dtd = xmlGetIntSubset(ctxt->myDoc);
2229*7c568831SAndroid Build Coastguard Worker if (dtd != NULL && dtd->ExternalID != NULL) {
2230*7c568831SAndroid Build Coastguard Worker if (!xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4.01//EN") ||
2231*7c568831SAndroid Build Coastguard Worker !xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4//EN"))
2232*7c568831SAndroid Build Coastguard Worker return(1);
2233*7c568831SAndroid Build Coastguard Worker }
2234*7c568831SAndroid Build Coastguard Worker }
2235*7c568831SAndroid Build Coastguard Worker
2236*7c568831SAndroid Build Coastguard Worker if (ctxt->node == NULL) return(0);
2237*7c568831SAndroid Build Coastguard Worker lastChild = xmlGetLastChild(ctxt->node);
2238*7c568831SAndroid Build Coastguard Worker while ((lastChild) && (lastChild->type == XML_COMMENT_NODE))
2239*7c568831SAndroid Build Coastguard Worker lastChild = lastChild->prev;
2240*7c568831SAndroid Build Coastguard Worker if (lastChild == NULL) {
2241*7c568831SAndroid Build Coastguard Worker if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2242*7c568831SAndroid Build Coastguard Worker (ctxt->node->content != NULL)) return(0);
2243*7c568831SAndroid Build Coastguard Worker /* keep ws in constructs like ...<b> </b>...
2244*7c568831SAndroid Build Coastguard Worker for all tags "b" allowing PCDATA */
2245*7c568831SAndroid Build Coastguard Worker for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {
2246*7c568831SAndroid Build Coastguard Worker if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) {
2247*7c568831SAndroid Build Coastguard Worker return(0);
2248*7c568831SAndroid Build Coastguard Worker }
2249*7c568831SAndroid Build Coastguard Worker }
2250*7c568831SAndroid Build Coastguard Worker } else if (xmlNodeIsText(lastChild)) {
2251*7c568831SAndroid Build Coastguard Worker return(0);
2252*7c568831SAndroid Build Coastguard Worker } else {
2253*7c568831SAndroid Build Coastguard Worker /* keep ws in constructs like <p><b>xy</b> <i>z</i><p>
2254*7c568831SAndroid Build Coastguard Worker for all tags "p" allowing PCDATA */
2255*7c568831SAndroid Build Coastguard Worker for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {
2256*7c568831SAndroid Build Coastguard Worker if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) {
2257*7c568831SAndroid Build Coastguard Worker return(0);
2258*7c568831SAndroid Build Coastguard Worker }
2259*7c568831SAndroid Build Coastguard Worker }
2260*7c568831SAndroid Build Coastguard Worker }
2261*7c568831SAndroid Build Coastguard Worker return(1);
2262*7c568831SAndroid Build Coastguard Worker }
2263*7c568831SAndroid Build Coastguard Worker
2264*7c568831SAndroid Build Coastguard Worker /**
2265*7c568831SAndroid Build Coastguard Worker * htmlNewDocNoDtD:
2266*7c568831SAndroid Build Coastguard Worker * @URI: URI for the dtd, or NULL
2267*7c568831SAndroid Build Coastguard Worker * @ExternalID: the external ID of the DTD, or NULL
2268*7c568831SAndroid Build Coastguard Worker *
2269*7c568831SAndroid Build Coastguard Worker * Creates a new HTML document without a DTD node if @URI and @ExternalID
2270*7c568831SAndroid Build Coastguard Worker * are NULL
2271*7c568831SAndroid Build Coastguard Worker *
2272*7c568831SAndroid Build Coastguard Worker * Returns a new document, do not initialize the DTD if not provided
2273*7c568831SAndroid Build Coastguard Worker */
2274*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlNewDocNoDtD(const xmlChar * URI,const xmlChar * ExternalID)2275*7c568831SAndroid Build Coastguard Worker htmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) {
2276*7c568831SAndroid Build Coastguard Worker xmlDocPtr cur;
2277*7c568831SAndroid Build Coastguard Worker
2278*7c568831SAndroid Build Coastguard Worker /*
2279*7c568831SAndroid Build Coastguard Worker * Allocate a new document and fill the fields.
2280*7c568831SAndroid Build Coastguard Worker */
2281*7c568831SAndroid Build Coastguard Worker cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc));
2282*7c568831SAndroid Build Coastguard Worker if (cur == NULL)
2283*7c568831SAndroid Build Coastguard Worker return(NULL);
2284*7c568831SAndroid Build Coastguard Worker memset(cur, 0, sizeof(xmlDoc));
2285*7c568831SAndroid Build Coastguard Worker
2286*7c568831SAndroid Build Coastguard Worker cur->type = XML_HTML_DOCUMENT_NODE;
2287*7c568831SAndroid Build Coastguard Worker cur->version = NULL;
2288*7c568831SAndroid Build Coastguard Worker cur->intSubset = NULL;
2289*7c568831SAndroid Build Coastguard Worker cur->doc = cur;
2290*7c568831SAndroid Build Coastguard Worker cur->name = NULL;
2291*7c568831SAndroid Build Coastguard Worker cur->children = NULL;
2292*7c568831SAndroid Build Coastguard Worker cur->extSubset = NULL;
2293*7c568831SAndroid Build Coastguard Worker cur->oldNs = NULL;
2294*7c568831SAndroid Build Coastguard Worker cur->encoding = NULL;
2295*7c568831SAndroid Build Coastguard Worker cur->standalone = 1;
2296*7c568831SAndroid Build Coastguard Worker cur->compression = 0;
2297*7c568831SAndroid Build Coastguard Worker cur->ids = NULL;
2298*7c568831SAndroid Build Coastguard Worker cur->refs = NULL;
2299*7c568831SAndroid Build Coastguard Worker cur->_private = NULL;
2300*7c568831SAndroid Build Coastguard Worker cur->charset = XML_CHAR_ENCODING_UTF8;
2301*7c568831SAndroid Build Coastguard Worker cur->properties = XML_DOC_HTML | XML_DOC_USERBUILT;
2302*7c568831SAndroid Build Coastguard Worker if ((ExternalID != NULL) ||
2303*7c568831SAndroid Build Coastguard Worker (URI != NULL)) {
2304*7c568831SAndroid Build Coastguard Worker xmlDtdPtr intSubset;
2305*7c568831SAndroid Build Coastguard Worker
2306*7c568831SAndroid Build Coastguard Worker intSubset = xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI);
2307*7c568831SAndroid Build Coastguard Worker if (intSubset == NULL) {
2308*7c568831SAndroid Build Coastguard Worker xmlFree(cur);
2309*7c568831SAndroid Build Coastguard Worker return(NULL);
2310*7c568831SAndroid Build Coastguard Worker }
2311*7c568831SAndroid Build Coastguard Worker }
2312*7c568831SAndroid Build Coastguard Worker if ((xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
2313*7c568831SAndroid Build Coastguard Worker xmlRegisterNodeDefaultValue((xmlNodePtr)cur);
2314*7c568831SAndroid Build Coastguard Worker return(cur);
2315*7c568831SAndroid Build Coastguard Worker }
2316*7c568831SAndroid Build Coastguard Worker
2317*7c568831SAndroid Build Coastguard Worker /**
2318*7c568831SAndroid Build Coastguard Worker * htmlNewDoc:
2319*7c568831SAndroid Build Coastguard Worker * @URI: URI for the dtd, or NULL
2320*7c568831SAndroid Build Coastguard Worker * @ExternalID: the external ID of the DTD, or NULL
2321*7c568831SAndroid Build Coastguard Worker *
2322*7c568831SAndroid Build Coastguard Worker * Creates a new HTML document
2323*7c568831SAndroid Build Coastguard Worker *
2324*7c568831SAndroid Build Coastguard Worker * Returns a new document
2325*7c568831SAndroid Build Coastguard Worker */
2326*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlNewDoc(const xmlChar * URI,const xmlChar * ExternalID)2327*7c568831SAndroid Build Coastguard Worker htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
2328*7c568831SAndroid Build Coastguard Worker if ((URI == NULL) && (ExternalID == NULL))
2329*7c568831SAndroid Build Coastguard Worker return(htmlNewDocNoDtD(
2330*7c568831SAndroid Build Coastguard Worker BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd",
2331*7c568831SAndroid Build Coastguard Worker BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN"));
2332*7c568831SAndroid Build Coastguard Worker
2333*7c568831SAndroid Build Coastguard Worker return(htmlNewDocNoDtD(URI, ExternalID));
2334*7c568831SAndroid Build Coastguard Worker }
2335*7c568831SAndroid Build Coastguard Worker
2336*7c568831SAndroid Build Coastguard Worker
2337*7c568831SAndroid Build Coastguard Worker /************************************************************************
2338*7c568831SAndroid Build Coastguard Worker * *
2339*7c568831SAndroid Build Coastguard Worker * The parser itself *
2340*7c568831SAndroid Build Coastguard Worker * Relates to http://www.w3.org/TR/html40 *
2341*7c568831SAndroid Build Coastguard Worker * *
2342*7c568831SAndroid Build Coastguard Worker ************************************************************************/
2343*7c568831SAndroid Build Coastguard Worker
2344*7c568831SAndroid Build Coastguard Worker /************************************************************************
2345*7c568831SAndroid Build Coastguard Worker * *
2346*7c568831SAndroid Build Coastguard Worker * The parser itself *
2347*7c568831SAndroid Build Coastguard Worker * *
2348*7c568831SAndroid Build Coastguard Worker ************************************************************************/
2349*7c568831SAndroid Build Coastguard Worker
2350*7c568831SAndroid Build Coastguard Worker /**
2351*7c568831SAndroid Build Coastguard Worker * htmlParseHTMLName:
2352*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
2353*7c568831SAndroid Build Coastguard Worker *
2354*7c568831SAndroid Build Coastguard Worker * parse an HTML tag or attribute name, note that we convert it to lowercase
2355*7c568831SAndroid Build Coastguard Worker * since HTML names are not case-sensitive.
2356*7c568831SAndroid Build Coastguard Worker *
2357*7c568831SAndroid Build Coastguard Worker * Returns the Tag Name parsed or NULL
2358*7c568831SAndroid Build Coastguard Worker */
2359*7c568831SAndroid Build Coastguard Worker
2360*7c568831SAndroid Build Coastguard Worker static xmlHashedString
htmlParseHTMLName(htmlParserCtxtPtr ctxt,int attr)2361*7c568831SAndroid Build Coastguard Worker htmlParseHTMLName(htmlParserCtxtPtr ctxt, int attr) {
2362*7c568831SAndroid Build Coastguard Worker xmlHashedString ret;
2363*7c568831SAndroid Build Coastguard Worker xmlChar buf[HTML_PARSER_BUFFER_SIZE];
2364*7c568831SAndroid Build Coastguard Worker const xmlChar *in;
2365*7c568831SAndroid Build Coastguard Worker size_t avail;
2366*7c568831SAndroid Build Coastguard Worker int eof = PARSER_PROGRESSIVE(ctxt);
2367*7c568831SAndroid Build Coastguard Worker int nbchar = 0;
2368*7c568831SAndroid Build Coastguard Worker int stop = attr ? '=' : ' ';
2369*7c568831SAndroid Build Coastguard Worker
2370*7c568831SAndroid Build Coastguard Worker in = ctxt->input->cur;
2371*7c568831SAndroid Build Coastguard Worker avail = ctxt->input->end - in;
2372*7c568831SAndroid Build Coastguard Worker
2373*7c568831SAndroid Build Coastguard Worker while (1) {
2374*7c568831SAndroid Build Coastguard Worker int c, size;
2375*7c568831SAndroid Build Coastguard Worker
2376*7c568831SAndroid Build Coastguard Worker if ((!eof) && (avail < 32)) {
2377*7c568831SAndroid Build Coastguard Worker size_t oldAvail = avail;
2378*7c568831SAndroid Build Coastguard Worker
2379*7c568831SAndroid Build Coastguard Worker ctxt->input->cur = in;
2380*7c568831SAndroid Build Coastguard Worker
2381*7c568831SAndroid Build Coastguard Worker SHRINK;
2382*7c568831SAndroid Build Coastguard Worker xmlParserGrow(ctxt);
2383*7c568831SAndroid Build Coastguard Worker
2384*7c568831SAndroid Build Coastguard Worker in = ctxt->input->cur;
2385*7c568831SAndroid Build Coastguard Worker avail = ctxt->input->end - in;
2386*7c568831SAndroid Build Coastguard Worker
2387*7c568831SAndroid Build Coastguard Worker if (oldAvail == avail)
2388*7c568831SAndroid Build Coastguard Worker eof = 1;
2389*7c568831SAndroid Build Coastguard Worker }
2390*7c568831SAndroid Build Coastguard Worker
2391*7c568831SAndroid Build Coastguard Worker if (avail == 0)
2392*7c568831SAndroid Build Coastguard Worker break;
2393*7c568831SAndroid Build Coastguard Worker
2394*7c568831SAndroid Build Coastguard Worker c = *in;
2395*7c568831SAndroid Build Coastguard Worker size = 1;
2396*7c568831SAndroid Build Coastguard Worker
2397*7c568831SAndroid Build Coastguard Worker if ((nbchar != 0) &&
2398*7c568831SAndroid Build Coastguard Worker ((c == '/') || (c == '>') || (c == stop) ||
2399*7c568831SAndroid Build Coastguard Worker (IS_WS_HTML(c))))
2400*7c568831SAndroid Build Coastguard Worker break;
2401*7c568831SAndroid Build Coastguard Worker
2402*7c568831SAndroid Build Coastguard Worker if (c == 0) {
2403*7c568831SAndroid Build Coastguard Worker if (nbchar + 3 <= HTML_PARSER_BUFFER_SIZE) {
2404*7c568831SAndroid Build Coastguard Worker buf[nbchar++] = 0xEF;
2405*7c568831SAndroid Build Coastguard Worker buf[nbchar++] = 0xBF;
2406*7c568831SAndroid Build Coastguard Worker buf[nbchar++] = 0xBD;
2407*7c568831SAndroid Build Coastguard Worker }
2408*7c568831SAndroid Build Coastguard Worker } else if (c < 0x80) {
2409*7c568831SAndroid Build Coastguard Worker if (nbchar < HTML_PARSER_BUFFER_SIZE) {
2410*7c568831SAndroid Build Coastguard Worker if (IS_UPPER(c))
2411*7c568831SAndroid Build Coastguard Worker c += 0x20;
2412*7c568831SAndroid Build Coastguard Worker buf[nbchar++] = c;
2413*7c568831SAndroid Build Coastguard Worker }
2414*7c568831SAndroid Build Coastguard Worker } else {
2415*7c568831SAndroid Build Coastguard Worker size = htmlValidateUtf8(ctxt, in, avail);
2416*7c568831SAndroid Build Coastguard Worker
2417*7c568831SAndroid Build Coastguard Worker if (size > 0) {
2418*7c568831SAndroid Build Coastguard Worker if (nbchar + size <= HTML_PARSER_BUFFER_SIZE) {
2419*7c568831SAndroid Build Coastguard Worker memcpy(buf + nbchar, in, size);
2420*7c568831SAndroid Build Coastguard Worker nbchar += size;
2421*7c568831SAndroid Build Coastguard Worker }
2422*7c568831SAndroid Build Coastguard Worker } else {
2423*7c568831SAndroid Build Coastguard Worker size = 1;
2424*7c568831SAndroid Build Coastguard Worker
2425*7c568831SAndroid Build Coastguard Worker if (nbchar + 3 <= HTML_PARSER_BUFFER_SIZE) {
2426*7c568831SAndroid Build Coastguard Worker buf[nbchar++] = 0xEF;
2427*7c568831SAndroid Build Coastguard Worker buf[nbchar++] = 0xBF;
2428*7c568831SAndroid Build Coastguard Worker buf[nbchar++] = 0xBD;
2429*7c568831SAndroid Build Coastguard Worker }
2430*7c568831SAndroid Build Coastguard Worker }
2431*7c568831SAndroid Build Coastguard Worker }
2432*7c568831SAndroid Build Coastguard Worker
2433*7c568831SAndroid Build Coastguard Worker in += size;
2434*7c568831SAndroid Build Coastguard Worker avail -= size;
2435*7c568831SAndroid Build Coastguard Worker }
2436*7c568831SAndroid Build Coastguard Worker
2437*7c568831SAndroid Build Coastguard Worker ctxt->input->cur = in;
2438*7c568831SAndroid Build Coastguard Worker
2439*7c568831SAndroid Build Coastguard Worker SHRINK;
2440*7c568831SAndroid Build Coastguard Worker
2441*7c568831SAndroid Build Coastguard Worker ret = xmlDictLookupHashed(ctxt->dict, buf, nbchar);
2442*7c568831SAndroid Build Coastguard Worker if (ret.name == NULL)
2443*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
2444*7c568831SAndroid Build Coastguard Worker
2445*7c568831SAndroid Build Coastguard Worker return(ret);
2446*7c568831SAndroid Build Coastguard Worker }
2447*7c568831SAndroid Build Coastguard Worker
2448*7c568831SAndroid Build Coastguard Worker static const short htmlC1Remap[32] = {
2449*7c568831SAndroid Build Coastguard Worker 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
2450*7c568831SAndroid Build Coastguard Worker 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F,
2451*7c568831SAndroid Build Coastguard Worker 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2452*7c568831SAndroid Build Coastguard Worker 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178
2453*7c568831SAndroid Build Coastguard Worker };
2454*7c568831SAndroid Build Coastguard Worker
2455*7c568831SAndroid Build Coastguard Worker static const xmlChar *
htmlCodePointToUtf8(int c,xmlChar * out,int * osize)2456*7c568831SAndroid Build Coastguard Worker htmlCodePointToUtf8(int c, xmlChar *out, int *osize) {
2457*7c568831SAndroid Build Coastguard Worker int i = 0;
2458*7c568831SAndroid Build Coastguard Worker int bits, hi;
2459*7c568831SAndroid Build Coastguard Worker
2460*7c568831SAndroid Build Coastguard Worker if ((c >= 0x80) && (c < 0xA0)) {
2461*7c568831SAndroid Build Coastguard Worker c = htmlC1Remap[c - 0x80];
2462*7c568831SAndroid Build Coastguard Worker } else if ((c <= 0) ||
2463*7c568831SAndroid Build Coastguard Worker ((c >= 0xD800) && (c < 0xE000)) ||
2464*7c568831SAndroid Build Coastguard Worker (c > 0x10FFFF)) {
2465*7c568831SAndroid Build Coastguard Worker c = 0xFFFD;
2466*7c568831SAndroid Build Coastguard Worker }
2467*7c568831SAndroid Build Coastguard Worker
2468*7c568831SAndroid Build Coastguard Worker if (c < 0x80) { bits = 0; hi = 0x00; }
2469*7c568831SAndroid Build Coastguard Worker else if (c < 0x800) { bits = 6; hi = 0xC0; }
2470*7c568831SAndroid Build Coastguard Worker else if (c < 0x10000) { bits = 12; hi = 0xE0; }
2471*7c568831SAndroid Build Coastguard Worker else { bits = 18; hi = 0xF0; }
2472*7c568831SAndroid Build Coastguard Worker
2473*7c568831SAndroid Build Coastguard Worker out[i++] = (c >> bits) | hi;
2474*7c568831SAndroid Build Coastguard Worker
2475*7c568831SAndroid Build Coastguard Worker while (bits > 0) {
2476*7c568831SAndroid Build Coastguard Worker bits -= 6;
2477*7c568831SAndroid Build Coastguard Worker out[i++] = ((c >> bits) & 0x3F) | 0x80;
2478*7c568831SAndroid Build Coastguard Worker }
2479*7c568831SAndroid Build Coastguard Worker
2480*7c568831SAndroid Build Coastguard Worker *osize = i;
2481*7c568831SAndroid Build Coastguard Worker return(out);
2482*7c568831SAndroid Build Coastguard Worker }
2483*7c568831SAndroid Build Coastguard Worker
2484*7c568831SAndroid Build Coastguard Worker #include "html5ent.inc"
2485*7c568831SAndroid Build Coastguard Worker
2486*7c568831SAndroid Build Coastguard Worker #define ENT_F_SEMICOLON 0x80u
2487*7c568831SAndroid Build Coastguard Worker #define ENT_F_SUBTABLE 0x40u
2488*7c568831SAndroid Build Coastguard Worker #define ENT_F_ALL 0xC0u
2489*7c568831SAndroid Build Coastguard Worker
2490*7c568831SAndroid Build Coastguard Worker static const xmlChar *
htmlFindEntityPrefix(const xmlChar * string,size_t slen,int isAttr,int * nlen,int * rlen)2491*7c568831SAndroid Build Coastguard Worker htmlFindEntityPrefix(const xmlChar *string, size_t slen, int isAttr,
2492*7c568831SAndroid Build Coastguard Worker int *nlen, int *rlen) {
2493*7c568831SAndroid Build Coastguard Worker const xmlChar *match = NULL;
2494*7c568831SAndroid Build Coastguard Worker unsigned left, right;
2495*7c568831SAndroid Build Coastguard Worker int first = string[0];
2496*7c568831SAndroid Build Coastguard Worker size_t matchLen = 0;
2497*7c568831SAndroid Build Coastguard Worker size_t soff = 1;
2498*7c568831SAndroid Build Coastguard Worker
2499*7c568831SAndroid Build Coastguard Worker if (slen < 2)
2500*7c568831SAndroid Build Coastguard Worker return(NULL);
2501*7c568831SAndroid Build Coastguard Worker if (!IS_ASCII_LETTER(first))
2502*7c568831SAndroid Build Coastguard Worker return(NULL);
2503*7c568831SAndroid Build Coastguard Worker
2504*7c568831SAndroid Build Coastguard Worker /*
2505*7c568831SAndroid Build Coastguard Worker * Look up range by first character
2506*7c568831SAndroid Build Coastguard Worker */
2507*7c568831SAndroid Build Coastguard Worker first &= 63;
2508*7c568831SAndroid Build Coastguard Worker left = htmlEntAlpha[first*3] | htmlEntAlpha[first*3+1] << 8;
2509*7c568831SAndroid Build Coastguard Worker right = left + htmlEntAlpha[first*3+2];
2510*7c568831SAndroid Build Coastguard Worker
2511*7c568831SAndroid Build Coastguard Worker /*
2512*7c568831SAndroid Build Coastguard Worker * Binary search
2513*7c568831SAndroid Build Coastguard Worker */
2514*7c568831SAndroid Build Coastguard Worker while (left < right) {
2515*7c568831SAndroid Build Coastguard Worker const xmlChar *bytes;
2516*7c568831SAndroid Build Coastguard Worker unsigned mid;
2517*7c568831SAndroid Build Coastguard Worker size_t len;
2518*7c568831SAndroid Build Coastguard Worker int cmp;
2519*7c568831SAndroid Build Coastguard Worker
2520*7c568831SAndroid Build Coastguard Worker mid = left + (right - left) / 2;
2521*7c568831SAndroid Build Coastguard Worker bytes = htmlEntStrings + htmlEntValues[mid];
2522*7c568831SAndroid Build Coastguard Worker len = bytes[0] & ~ENT_F_ALL;
2523*7c568831SAndroid Build Coastguard Worker
2524*7c568831SAndroid Build Coastguard Worker cmp = string[soff] - bytes[1];
2525*7c568831SAndroid Build Coastguard Worker
2526*7c568831SAndroid Build Coastguard Worker if (cmp == 0) {
2527*7c568831SAndroid Build Coastguard Worker if (slen < len) {
2528*7c568831SAndroid Build Coastguard Worker cmp = strncmp((const char *) string + soff + 1,
2529*7c568831SAndroid Build Coastguard Worker (const char *) bytes + 2,
2530*7c568831SAndroid Build Coastguard Worker slen - 1);
2531*7c568831SAndroid Build Coastguard Worker /* Prefix can never match */
2532*7c568831SAndroid Build Coastguard Worker if (cmp == 0)
2533*7c568831SAndroid Build Coastguard Worker break;
2534*7c568831SAndroid Build Coastguard Worker } else {
2535*7c568831SAndroid Build Coastguard Worker cmp = strncmp((const char *) string + soff + 1,
2536*7c568831SAndroid Build Coastguard Worker (const char *) bytes + 2,
2537*7c568831SAndroid Build Coastguard Worker len - 1);
2538*7c568831SAndroid Build Coastguard Worker }
2539*7c568831SAndroid Build Coastguard Worker }
2540*7c568831SAndroid Build Coastguard Worker
2541*7c568831SAndroid Build Coastguard Worker if (cmp < 0) {
2542*7c568831SAndroid Build Coastguard Worker right = mid;
2543*7c568831SAndroid Build Coastguard Worker } else if (cmp > 0) {
2544*7c568831SAndroid Build Coastguard Worker left = mid + 1;
2545*7c568831SAndroid Build Coastguard Worker } else {
2546*7c568831SAndroid Build Coastguard Worker int term = soff + len < slen ? string[soff + len] : 0;
2547*7c568831SAndroid Build Coastguard Worker int isAlnum, isTerm;
2548*7c568831SAndroid Build Coastguard Worker
2549*7c568831SAndroid Build Coastguard Worker isAlnum = IS_ALNUM(term);
2550*7c568831SAndroid Build Coastguard Worker isTerm = ((term == ';') ||
2551*7c568831SAndroid Build Coastguard Worker ((bytes[0] & ENT_F_SEMICOLON) &&
2552*7c568831SAndroid Build Coastguard Worker ((!isAttr) ||
2553*7c568831SAndroid Build Coastguard Worker ((!isAlnum) && (term != '=')))));
2554*7c568831SAndroid Build Coastguard Worker
2555*7c568831SAndroid Build Coastguard Worker if (isTerm) {
2556*7c568831SAndroid Build Coastguard Worker match = bytes + len + 1;
2557*7c568831SAndroid Build Coastguard Worker matchLen = soff + len;
2558*7c568831SAndroid Build Coastguard Worker if (term == ';')
2559*7c568831SAndroid Build Coastguard Worker matchLen += 1;
2560*7c568831SAndroid Build Coastguard Worker }
2561*7c568831SAndroid Build Coastguard Worker
2562*7c568831SAndroid Build Coastguard Worker if (bytes[0] & ENT_F_SUBTABLE) {
2563*7c568831SAndroid Build Coastguard Worker if (isTerm)
2564*7c568831SAndroid Build Coastguard Worker match += 2;
2565*7c568831SAndroid Build Coastguard Worker
2566*7c568831SAndroid Build Coastguard Worker if ((isAlnum) && (soff + len < slen)) {
2567*7c568831SAndroid Build Coastguard Worker left = mid + bytes[len + 1];
2568*7c568831SAndroid Build Coastguard Worker right = left + bytes[len + 2];
2569*7c568831SAndroid Build Coastguard Worker soff += len;
2570*7c568831SAndroid Build Coastguard Worker continue;
2571*7c568831SAndroid Build Coastguard Worker }
2572*7c568831SAndroid Build Coastguard Worker }
2573*7c568831SAndroid Build Coastguard Worker
2574*7c568831SAndroid Build Coastguard Worker break;
2575*7c568831SAndroid Build Coastguard Worker }
2576*7c568831SAndroid Build Coastguard Worker }
2577*7c568831SAndroid Build Coastguard Worker
2578*7c568831SAndroid Build Coastguard Worker if (match == NULL)
2579*7c568831SAndroid Build Coastguard Worker return(NULL);
2580*7c568831SAndroid Build Coastguard Worker
2581*7c568831SAndroid Build Coastguard Worker *nlen = matchLen;
2582*7c568831SAndroid Build Coastguard Worker *rlen = match[0];
2583*7c568831SAndroid Build Coastguard Worker return(match + 1);
2584*7c568831SAndroid Build Coastguard Worker }
2585*7c568831SAndroid Build Coastguard Worker
2586*7c568831SAndroid Build Coastguard Worker /**
2587*7c568831SAndroid Build Coastguard Worker * htmlParseData:
2588*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
2589*7c568831SAndroid Build Coastguard Worker * @mask: mask of terminating characters
2590*7c568831SAndroid Build Coastguard Worker * @comment: true if parsing a comment
2591*7c568831SAndroid Build Coastguard Worker * @refs: true if references are allowed
2592*7c568831SAndroid Build Coastguard Worker * @maxLength: maximum output length
2593*7c568831SAndroid Build Coastguard Worker *
2594*7c568831SAndroid Build Coastguard Worker * Parse data until terminator is reached.
2595*7c568831SAndroid Build Coastguard Worker *
2596*7c568831SAndroid Build Coastguard Worker * Returns the parsed string or NULL in case of errors.
2597*7c568831SAndroid Build Coastguard Worker */
2598*7c568831SAndroid Build Coastguard Worker
2599*7c568831SAndroid Build Coastguard Worker static xmlChar *
htmlParseData(htmlParserCtxtPtr ctxt,htmlAsciiMask mask,int comment,int refs,int maxLength)2600*7c568831SAndroid Build Coastguard Worker htmlParseData(htmlParserCtxtPtr ctxt, htmlAsciiMask mask,
2601*7c568831SAndroid Build Coastguard Worker int comment, int refs, int maxLength) {
2602*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input = ctxt->input;
2603*7c568831SAndroid Build Coastguard Worker xmlChar *ret = NULL;
2604*7c568831SAndroid Build Coastguard Worker xmlChar *buffer;
2605*7c568831SAndroid Build Coastguard Worker xmlChar utf8Char[4];
2606*7c568831SAndroid Build Coastguard Worker size_t buffer_size;
2607*7c568831SAndroid Build Coastguard Worker size_t used;
2608*7c568831SAndroid Build Coastguard Worker int eof = PARSER_PROGRESSIVE(ctxt);
2609*7c568831SAndroid Build Coastguard Worker int line, col;
2610*7c568831SAndroid Build Coastguard Worker int termSkip = -1;
2611*7c568831SAndroid Build Coastguard Worker
2612*7c568831SAndroid Build Coastguard Worker used = 0;
2613*7c568831SAndroid Build Coastguard Worker buffer_size = ctxt->spaceMax;
2614*7c568831SAndroid Build Coastguard Worker buffer = (xmlChar *) ctxt->spaceTab;
2615*7c568831SAndroid Build Coastguard Worker if (buffer == NULL) {
2616*7c568831SAndroid Build Coastguard Worker buffer_size = 500;
2617*7c568831SAndroid Build Coastguard Worker buffer = xmlMalloc(buffer_size + 1);
2618*7c568831SAndroid Build Coastguard Worker if (buffer == NULL) {
2619*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
2620*7c568831SAndroid Build Coastguard Worker return(NULL);
2621*7c568831SAndroid Build Coastguard Worker }
2622*7c568831SAndroid Build Coastguard Worker }
2623*7c568831SAndroid Build Coastguard Worker
2624*7c568831SAndroid Build Coastguard Worker line = input->line;
2625*7c568831SAndroid Build Coastguard Worker col = input->col;
2626*7c568831SAndroid Build Coastguard Worker
2627*7c568831SAndroid Build Coastguard Worker while (!PARSER_STOPPED(ctxt)) {
2628*7c568831SAndroid Build Coastguard Worker const xmlChar *chunk, *in, *repl;
2629*7c568831SAndroid Build Coastguard Worker size_t avail, chunkSize, extraSize;
2630*7c568831SAndroid Build Coastguard Worker int replSize;
2631*7c568831SAndroid Build Coastguard Worker int skip = 0;
2632*7c568831SAndroid Build Coastguard Worker int ncr = 0;
2633*7c568831SAndroid Build Coastguard Worker int ncrSize = 0;
2634*7c568831SAndroid Build Coastguard Worker int cp = 0;
2635*7c568831SAndroid Build Coastguard Worker
2636*7c568831SAndroid Build Coastguard Worker chunk = input->cur;
2637*7c568831SAndroid Build Coastguard Worker avail = input->end - chunk;
2638*7c568831SAndroid Build Coastguard Worker in = chunk;
2639*7c568831SAndroid Build Coastguard Worker
2640*7c568831SAndroid Build Coastguard Worker repl = BAD_CAST "";
2641*7c568831SAndroid Build Coastguard Worker replSize = 0;
2642*7c568831SAndroid Build Coastguard Worker
2643*7c568831SAndroid Build Coastguard Worker while (!PARSER_STOPPED(ctxt)) {
2644*7c568831SAndroid Build Coastguard Worker size_t j;
2645*7c568831SAndroid Build Coastguard Worker int cur, size;
2646*7c568831SAndroid Build Coastguard Worker
2647*7c568831SAndroid Build Coastguard Worker if ((!eof) && (avail <= 64)) {
2648*7c568831SAndroid Build Coastguard Worker size_t oldAvail = avail;
2649*7c568831SAndroid Build Coastguard Worker size_t off = in - chunk;
2650*7c568831SAndroid Build Coastguard Worker
2651*7c568831SAndroid Build Coastguard Worker input->cur = in;
2652*7c568831SAndroid Build Coastguard Worker
2653*7c568831SAndroid Build Coastguard Worker xmlParserGrow(ctxt);
2654*7c568831SAndroid Build Coastguard Worker
2655*7c568831SAndroid Build Coastguard Worker in = input->cur;
2656*7c568831SAndroid Build Coastguard Worker chunk = in - off;
2657*7c568831SAndroid Build Coastguard Worker input->cur = chunk;
2658*7c568831SAndroid Build Coastguard Worker avail = input->end - in;
2659*7c568831SAndroid Build Coastguard Worker
2660*7c568831SAndroid Build Coastguard Worker if (oldAvail == avail)
2661*7c568831SAndroid Build Coastguard Worker eof = 1;
2662*7c568831SAndroid Build Coastguard Worker }
2663*7c568831SAndroid Build Coastguard Worker
2664*7c568831SAndroid Build Coastguard Worker if (avail == 0) {
2665*7c568831SAndroid Build Coastguard Worker termSkip = 0;
2666*7c568831SAndroid Build Coastguard Worker break;
2667*7c568831SAndroid Build Coastguard Worker }
2668*7c568831SAndroid Build Coastguard Worker
2669*7c568831SAndroid Build Coastguard Worker cur = *in;
2670*7c568831SAndroid Build Coastguard Worker size = 1;
2671*7c568831SAndroid Build Coastguard Worker col += 1;
2672*7c568831SAndroid Build Coastguard Worker
2673*7c568831SAndroid Build Coastguard Worker if (htmlMaskMatch(mask, cur)) {
2674*7c568831SAndroid Build Coastguard Worker if (comment) {
2675*7c568831SAndroid Build Coastguard Worker if (avail < 2) {
2676*7c568831SAndroid Build Coastguard Worker termSkip = 1;
2677*7c568831SAndroid Build Coastguard Worker } else if (in[1] == '-') {
2678*7c568831SAndroid Build Coastguard Worker if (avail < 3) {
2679*7c568831SAndroid Build Coastguard Worker termSkip = 2;
2680*7c568831SAndroid Build Coastguard Worker } else if (in[2] == '>') {
2681*7c568831SAndroid Build Coastguard Worker termSkip = 3;
2682*7c568831SAndroid Build Coastguard Worker } else if (in[2] == '!') {
2683*7c568831SAndroid Build Coastguard Worker if (avail < 4)
2684*7c568831SAndroid Build Coastguard Worker termSkip = 3;
2685*7c568831SAndroid Build Coastguard Worker else if (in[3] == '>')
2686*7c568831SAndroid Build Coastguard Worker termSkip = 4;
2687*7c568831SAndroid Build Coastguard Worker }
2688*7c568831SAndroid Build Coastguard Worker }
2689*7c568831SAndroid Build Coastguard Worker
2690*7c568831SAndroid Build Coastguard Worker if (termSkip >= 0)
2691*7c568831SAndroid Build Coastguard Worker break;
2692*7c568831SAndroid Build Coastguard Worker } else {
2693*7c568831SAndroid Build Coastguard Worker termSkip = 0;
2694*7c568831SAndroid Build Coastguard Worker break;
2695*7c568831SAndroid Build Coastguard Worker }
2696*7c568831SAndroid Build Coastguard Worker }
2697*7c568831SAndroid Build Coastguard Worker
2698*7c568831SAndroid Build Coastguard Worker if (ncr) {
2699*7c568831SAndroid Build Coastguard Worker int lc = cur | 0x20;
2700*7c568831SAndroid Build Coastguard Worker int digit;
2701*7c568831SAndroid Build Coastguard Worker
2702*7c568831SAndroid Build Coastguard Worker if ((cur >= '0') && (cur <= '9')) {
2703*7c568831SAndroid Build Coastguard Worker digit = cur - '0';
2704*7c568831SAndroid Build Coastguard Worker } else if ((ncr == 16) && (lc >= 'a') && (lc <= 'f')) {
2705*7c568831SAndroid Build Coastguard Worker digit = (lc - 'a') + 10;
2706*7c568831SAndroid Build Coastguard Worker } else {
2707*7c568831SAndroid Build Coastguard Worker if (cur == ';') {
2708*7c568831SAndroid Build Coastguard Worker in += 1;
2709*7c568831SAndroid Build Coastguard Worker size += 1;
2710*7c568831SAndroid Build Coastguard Worker ncrSize += 1;
2711*7c568831SAndroid Build Coastguard Worker }
2712*7c568831SAndroid Build Coastguard Worker goto next_chunk;
2713*7c568831SAndroid Build Coastguard Worker }
2714*7c568831SAndroid Build Coastguard Worker
2715*7c568831SAndroid Build Coastguard Worker cp = cp * ncr + digit;
2716*7c568831SAndroid Build Coastguard Worker if (cp >= 0x110000)
2717*7c568831SAndroid Build Coastguard Worker cp = 0x110000;
2718*7c568831SAndroid Build Coastguard Worker
2719*7c568831SAndroid Build Coastguard Worker ncrSize += 1;
2720*7c568831SAndroid Build Coastguard Worker
2721*7c568831SAndroid Build Coastguard Worker goto next_char;
2722*7c568831SAndroid Build Coastguard Worker }
2723*7c568831SAndroid Build Coastguard Worker
2724*7c568831SAndroid Build Coastguard Worker switch (cur) {
2725*7c568831SAndroid Build Coastguard Worker case '&':
2726*7c568831SAndroid Build Coastguard Worker if (!refs)
2727*7c568831SAndroid Build Coastguard Worker break;
2728*7c568831SAndroid Build Coastguard Worker
2729*7c568831SAndroid Build Coastguard Worker j = 1;
2730*7c568831SAndroid Build Coastguard Worker
2731*7c568831SAndroid Build Coastguard Worker if ((j < avail) && (in[j] == '#')) {
2732*7c568831SAndroid Build Coastguard Worker j += 1;
2733*7c568831SAndroid Build Coastguard Worker if (j < avail) {
2734*7c568831SAndroid Build Coastguard Worker if ((in[j] | 0x20) == 'x') {
2735*7c568831SAndroid Build Coastguard Worker j += 1;
2736*7c568831SAndroid Build Coastguard Worker if ((j < avail) && (IS_HEX_DIGIT(in[j]))) {
2737*7c568831SAndroid Build Coastguard Worker ncr = 16;
2738*7c568831SAndroid Build Coastguard Worker size = 3;
2739*7c568831SAndroid Build Coastguard Worker ncrSize = 3;
2740*7c568831SAndroid Build Coastguard Worker cp = 0;
2741*7c568831SAndroid Build Coastguard Worker }
2742*7c568831SAndroid Build Coastguard Worker } else if (IS_ASCII_DIGIT(in[j])) {
2743*7c568831SAndroid Build Coastguard Worker ncr = 10;
2744*7c568831SAndroid Build Coastguard Worker size = 2;
2745*7c568831SAndroid Build Coastguard Worker ncrSize = 2;
2746*7c568831SAndroid Build Coastguard Worker cp = 0;
2747*7c568831SAndroid Build Coastguard Worker }
2748*7c568831SAndroid Build Coastguard Worker }
2749*7c568831SAndroid Build Coastguard Worker } else {
2750*7c568831SAndroid Build Coastguard Worker repl = htmlFindEntityPrefix(in + j,
2751*7c568831SAndroid Build Coastguard Worker avail - j,
2752*7c568831SAndroid Build Coastguard Worker /* isAttr */ 1,
2753*7c568831SAndroid Build Coastguard Worker &skip, &replSize);
2754*7c568831SAndroid Build Coastguard Worker if (repl != NULL) {
2755*7c568831SAndroid Build Coastguard Worker skip += 1;
2756*7c568831SAndroid Build Coastguard Worker goto next_chunk;
2757*7c568831SAndroid Build Coastguard Worker }
2758*7c568831SAndroid Build Coastguard Worker
2759*7c568831SAndroid Build Coastguard Worker skip = 0;
2760*7c568831SAndroid Build Coastguard Worker }
2761*7c568831SAndroid Build Coastguard Worker
2762*7c568831SAndroid Build Coastguard Worker break;
2763*7c568831SAndroid Build Coastguard Worker
2764*7c568831SAndroid Build Coastguard Worker case '\0':
2765*7c568831SAndroid Build Coastguard Worker skip = 1;
2766*7c568831SAndroid Build Coastguard Worker repl = BAD_CAST "\xEF\xBF\xBD";
2767*7c568831SAndroid Build Coastguard Worker replSize = 3;
2768*7c568831SAndroid Build Coastguard Worker goto next_chunk;
2769*7c568831SAndroid Build Coastguard Worker
2770*7c568831SAndroid Build Coastguard Worker case '\n':
2771*7c568831SAndroid Build Coastguard Worker line += 1;
2772*7c568831SAndroid Build Coastguard Worker col = 1;
2773*7c568831SAndroid Build Coastguard Worker break;
2774*7c568831SAndroid Build Coastguard Worker
2775*7c568831SAndroid Build Coastguard Worker case '\r':
2776*7c568831SAndroid Build Coastguard Worker skip = 1;
2777*7c568831SAndroid Build Coastguard Worker if (in[1] != 0x0A) {
2778*7c568831SAndroid Build Coastguard Worker repl = BAD_CAST "\x0A";
2779*7c568831SAndroid Build Coastguard Worker replSize = 1;
2780*7c568831SAndroid Build Coastguard Worker }
2781*7c568831SAndroid Build Coastguard Worker goto next_chunk;
2782*7c568831SAndroid Build Coastguard Worker
2783*7c568831SAndroid Build Coastguard Worker default:
2784*7c568831SAndroid Build Coastguard Worker if (cur < 0x80)
2785*7c568831SAndroid Build Coastguard Worker break;
2786*7c568831SAndroid Build Coastguard Worker
2787*7c568831SAndroid Build Coastguard Worker if ((input->flags & XML_INPUT_HAS_ENCODING) == 0) {
2788*7c568831SAndroid Build Coastguard Worker xmlChar * guess;
2789*7c568831SAndroid Build Coastguard Worker
2790*7c568831SAndroid Build Coastguard Worker guess = htmlFindEncoding(ctxt);
2791*7c568831SAndroid Build Coastguard Worker if (guess == NULL) {
2792*7c568831SAndroid Build Coastguard Worker xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
2793*7c568831SAndroid Build Coastguard Worker } else {
2794*7c568831SAndroid Build Coastguard Worker xmlSwitchEncodingName(ctxt, (const char *) guess);
2795*7c568831SAndroid Build Coastguard Worker xmlFree(guess);
2796*7c568831SAndroid Build Coastguard Worker }
2797*7c568831SAndroid Build Coastguard Worker input->flags |= XML_INPUT_HAS_ENCODING;
2798*7c568831SAndroid Build Coastguard Worker
2799*7c568831SAndroid Build Coastguard Worker goto restart;
2800*7c568831SAndroid Build Coastguard Worker }
2801*7c568831SAndroid Build Coastguard Worker
2802*7c568831SAndroid Build Coastguard Worker size = htmlValidateUtf8(ctxt, in, avail);
2803*7c568831SAndroid Build Coastguard Worker
2804*7c568831SAndroid Build Coastguard Worker if (size <= 0) {
2805*7c568831SAndroid Build Coastguard Worker skip = 1;
2806*7c568831SAndroid Build Coastguard Worker repl = BAD_CAST "\xEF\xBF\xBD";
2807*7c568831SAndroid Build Coastguard Worker replSize = 3;
2808*7c568831SAndroid Build Coastguard Worker goto next_chunk;
2809*7c568831SAndroid Build Coastguard Worker }
2810*7c568831SAndroid Build Coastguard Worker
2811*7c568831SAndroid Build Coastguard Worker break;
2812*7c568831SAndroid Build Coastguard Worker }
2813*7c568831SAndroid Build Coastguard Worker
2814*7c568831SAndroid Build Coastguard Worker next_char:
2815*7c568831SAndroid Build Coastguard Worker in += size;
2816*7c568831SAndroid Build Coastguard Worker avail -= size;
2817*7c568831SAndroid Build Coastguard Worker }
2818*7c568831SAndroid Build Coastguard Worker
2819*7c568831SAndroid Build Coastguard Worker next_chunk:
2820*7c568831SAndroid Build Coastguard Worker if (ncrSize > 0) {
2821*7c568831SAndroid Build Coastguard Worker skip = ncrSize;
2822*7c568831SAndroid Build Coastguard Worker in -= ncrSize;
2823*7c568831SAndroid Build Coastguard Worker
2824*7c568831SAndroid Build Coastguard Worker repl = htmlCodePointToUtf8(cp, utf8Char, &replSize);
2825*7c568831SAndroid Build Coastguard Worker }
2826*7c568831SAndroid Build Coastguard Worker
2827*7c568831SAndroid Build Coastguard Worker chunkSize = in - chunk;
2828*7c568831SAndroid Build Coastguard Worker extraSize = chunkSize + replSize;
2829*7c568831SAndroid Build Coastguard Worker
2830*7c568831SAndroid Build Coastguard Worker if (extraSize > maxLength - used) {
2831*7c568831SAndroid Build Coastguard Worker htmlParseErr(ctxt, XML_ERR_RESOURCE_LIMIT,
2832*7c568831SAndroid Build Coastguard Worker "value too long\n", NULL, NULL);
2833*7c568831SAndroid Build Coastguard Worker goto error;
2834*7c568831SAndroid Build Coastguard Worker }
2835*7c568831SAndroid Build Coastguard Worker
2836*7c568831SAndroid Build Coastguard Worker if (extraSize > buffer_size - used) {
2837*7c568831SAndroid Build Coastguard Worker size_t newSize = (used + extraSize) * 2;
2838*7c568831SAndroid Build Coastguard Worker xmlChar *tmp = (xmlChar *) xmlRealloc(buffer, newSize + 1);
2839*7c568831SAndroid Build Coastguard Worker
2840*7c568831SAndroid Build Coastguard Worker if (tmp == NULL) {
2841*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
2842*7c568831SAndroid Build Coastguard Worker goto error;
2843*7c568831SAndroid Build Coastguard Worker }
2844*7c568831SAndroid Build Coastguard Worker buffer = tmp;
2845*7c568831SAndroid Build Coastguard Worker buffer_size = newSize;
2846*7c568831SAndroid Build Coastguard Worker }
2847*7c568831SAndroid Build Coastguard Worker
2848*7c568831SAndroid Build Coastguard Worker if (chunkSize > 0) {
2849*7c568831SAndroid Build Coastguard Worker input->cur += chunkSize;
2850*7c568831SAndroid Build Coastguard Worker memcpy(buffer + used, chunk, chunkSize);
2851*7c568831SAndroid Build Coastguard Worker used += chunkSize;
2852*7c568831SAndroid Build Coastguard Worker }
2853*7c568831SAndroid Build Coastguard Worker
2854*7c568831SAndroid Build Coastguard Worker input->cur += skip;
2855*7c568831SAndroid Build Coastguard Worker if (replSize > 0) {
2856*7c568831SAndroid Build Coastguard Worker memcpy(buffer + used, repl, replSize);
2857*7c568831SAndroid Build Coastguard Worker used += replSize;
2858*7c568831SAndroid Build Coastguard Worker }
2859*7c568831SAndroid Build Coastguard Worker
2860*7c568831SAndroid Build Coastguard Worker SHRINK;
2861*7c568831SAndroid Build Coastguard Worker
2862*7c568831SAndroid Build Coastguard Worker if (termSkip >= 0)
2863*7c568831SAndroid Build Coastguard Worker break;
2864*7c568831SAndroid Build Coastguard Worker
2865*7c568831SAndroid Build Coastguard Worker restart:
2866*7c568831SAndroid Build Coastguard Worker ;
2867*7c568831SAndroid Build Coastguard Worker }
2868*7c568831SAndroid Build Coastguard Worker
2869*7c568831SAndroid Build Coastguard Worker if (termSkip > 0) {
2870*7c568831SAndroid Build Coastguard Worker input->cur += termSkip;
2871*7c568831SAndroid Build Coastguard Worker col += termSkip;
2872*7c568831SAndroid Build Coastguard Worker }
2873*7c568831SAndroid Build Coastguard Worker
2874*7c568831SAndroid Build Coastguard Worker input->line = line;
2875*7c568831SAndroid Build Coastguard Worker input->col = col;
2876*7c568831SAndroid Build Coastguard Worker
2877*7c568831SAndroid Build Coastguard Worker ret = xmlMalloc(used + 1);
2878*7c568831SAndroid Build Coastguard Worker if (ret == NULL) {
2879*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
2880*7c568831SAndroid Build Coastguard Worker } else {
2881*7c568831SAndroid Build Coastguard Worker memcpy(ret, buffer, used);
2882*7c568831SAndroid Build Coastguard Worker ret[used] = 0;
2883*7c568831SAndroid Build Coastguard Worker }
2884*7c568831SAndroid Build Coastguard Worker
2885*7c568831SAndroid Build Coastguard Worker error:
2886*7c568831SAndroid Build Coastguard Worker ctxt->spaceTab = (void *) buffer;
2887*7c568831SAndroid Build Coastguard Worker ctxt->spaceMax = buffer_size;
2888*7c568831SAndroid Build Coastguard Worker
2889*7c568831SAndroid Build Coastguard Worker return(ret);
2890*7c568831SAndroid Build Coastguard Worker }
2891*7c568831SAndroid Build Coastguard Worker
2892*7c568831SAndroid Build Coastguard Worker /**
2893*7c568831SAndroid Build Coastguard Worker * htmlParseEntityRef:
2894*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
2895*7c568831SAndroid Build Coastguard Worker * @str: location to store the entity name
2896*7c568831SAndroid Build Coastguard Worker *
2897*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Internal function, don't use.
2898*7c568831SAndroid Build Coastguard Worker *
2899*7c568831SAndroid Build Coastguard Worker * Returns NULL.
2900*7c568831SAndroid Build Coastguard Worker */
2901*7c568831SAndroid Build Coastguard Worker const htmlEntityDesc *
htmlParseEntityRef(htmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,const xmlChar ** str ATTRIBUTE_UNUSED)2902*7c568831SAndroid Build Coastguard Worker htmlParseEntityRef(htmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
2903*7c568831SAndroid Build Coastguard Worker const xmlChar **str ATTRIBUTE_UNUSED) {
2904*7c568831SAndroid Build Coastguard Worker return(NULL);
2905*7c568831SAndroid Build Coastguard Worker }
2906*7c568831SAndroid Build Coastguard Worker
2907*7c568831SAndroid Build Coastguard Worker /**
2908*7c568831SAndroid Build Coastguard Worker * htmlParseAttValue:
2909*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
2910*7c568831SAndroid Build Coastguard Worker *
2911*7c568831SAndroid Build Coastguard Worker * parse a value for an attribute
2912*7c568831SAndroid Build Coastguard Worker * Note: the parser won't do substitution of entities here, this
2913*7c568831SAndroid Build Coastguard Worker * will be handled later in xmlStringGetNodeList, unless it was
2914*7c568831SAndroid Build Coastguard Worker * asked for ctxt->replaceEntities != 0
2915*7c568831SAndroid Build Coastguard Worker *
2916*7c568831SAndroid Build Coastguard Worker * Returns the AttValue parsed or NULL.
2917*7c568831SAndroid Build Coastguard Worker */
2918*7c568831SAndroid Build Coastguard Worker
2919*7c568831SAndroid Build Coastguard Worker static xmlChar *
htmlParseAttValue(htmlParserCtxtPtr ctxt)2920*7c568831SAndroid Build Coastguard Worker htmlParseAttValue(htmlParserCtxtPtr ctxt) {
2921*7c568831SAndroid Build Coastguard Worker xmlChar *ret = NULL;
2922*7c568831SAndroid Build Coastguard Worker int maxLength = (ctxt->options & HTML_PARSE_HUGE) ?
2923*7c568831SAndroid Build Coastguard Worker XML_MAX_HUGE_LENGTH :
2924*7c568831SAndroid Build Coastguard Worker XML_MAX_TEXT_LENGTH;
2925*7c568831SAndroid Build Coastguard Worker
2926*7c568831SAndroid Build Coastguard Worker if (CUR == '"') {
2927*7c568831SAndroid Build Coastguard Worker SKIP(1);
2928*7c568831SAndroid Build Coastguard Worker ret = htmlParseData(ctxt, MASK_DQ, 0, 1, maxLength);
2929*7c568831SAndroid Build Coastguard Worker if (CUR == '"')
2930*7c568831SAndroid Build Coastguard Worker SKIP(1);
2931*7c568831SAndroid Build Coastguard Worker } else if (CUR == '\'') {
2932*7c568831SAndroid Build Coastguard Worker SKIP(1);
2933*7c568831SAndroid Build Coastguard Worker ret = htmlParseData(ctxt, MASK_SQ, 0, 1, maxLength);
2934*7c568831SAndroid Build Coastguard Worker if (CUR == '\'')
2935*7c568831SAndroid Build Coastguard Worker SKIP(1);
2936*7c568831SAndroid Build Coastguard Worker } else {
2937*7c568831SAndroid Build Coastguard Worker ret = htmlParseData(ctxt, MASK_WS_GT, 0, 1, maxLength);
2938*7c568831SAndroid Build Coastguard Worker }
2939*7c568831SAndroid Build Coastguard Worker return(ret);
2940*7c568831SAndroid Build Coastguard Worker }
2941*7c568831SAndroid Build Coastguard Worker
2942*7c568831SAndroid Build Coastguard Worker static void
htmlCharDataSAXCallback(htmlParserCtxtPtr ctxt,const xmlChar * buf,int size,int mode)2943*7c568831SAndroid Build Coastguard Worker htmlCharDataSAXCallback(htmlParserCtxtPtr ctxt, const xmlChar *buf,
2944*7c568831SAndroid Build Coastguard Worker int size, int mode) {
2945*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax == NULL) || (ctxt->disableSAX))
2946*7c568831SAndroid Build Coastguard Worker return;
2947*7c568831SAndroid Build Coastguard Worker
2948*7c568831SAndroid Build Coastguard Worker if ((mode == 0) || (mode == DATA_RCDATA) ||
2949*7c568831SAndroid Build Coastguard Worker (ctxt->sax->cdataBlock == NULL)) {
2950*7c568831SAndroid Build Coastguard Worker int blank = areBlanks(ctxt, buf, size);
2951*7c568831SAndroid Build Coastguard Worker
2952*7c568831SAndroid Build Coastguard Worker if ((mode == 0) && (blank > 0) && (!ctxt->keepBlanks)) {
2953*7c568831SAndroid Build Coastguard Worker if (ctxt->sax->ignorableWhitespace != NULL)
2954*7c568831SAndroid Build Coastguard Worker ctxt->sax->ignorableWhitespace(ctxt->userData,
2955*7c568831SAndroid Build Coastguard Worker buf, size);
2956*7c568831SAndroid Build Coastguard Worker } else {
2957*7c568831SAndroid Build Coastguard Worker if ((mode == 0) && (blank < 0))
2958*7c568831SAndroid Build Coastguard Worker htmlCheckParagraph(ctxt);
2959*7c568831SAndroid Build Coastguard Worker
2960*7c568831SAndroid Build Coastguard Worker if (ctxt->sax->characters != NULL)
2961*7c568831SAndroid Build Coastguard Worker ctxt->sax->characters(ctxt->userData, buf, size);
2962*7c568831SAndroid Build Coastguard Worker }
2963*7c568831SAndroid Build Coastguard Worker } else {
2964*7c568831SAndroid Build Coastguard Worker /*
2965*7c568831SAndroid Build Coastguard Worker * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
2966*7c568831SAndroid Build Coastguard Worker */
2967*7c568831SAndroid Build Coastguard Worker ctxt->sax->cdataBlock(ctxt->userData, buf, size);
2968*7c568831SAndroid Build Coastguard Worker }
2969*7c568831SAndroid Build Coastguard Worker }
2970*7c568831SAndroid Build Coastguard Worker
2971*7c568831SAndroid Build Coastguard Worker /**
2972*7c568831SAndroid Build Coastguard Worker * htmlParseCharData:
2973*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
2974*7c568831SAndroid Build Coastguard Worker * @terminate: true if the input buffer is complete
2975*7c568831SAndroid Build Coastguard Worker *
2976*7c568831SAndroid Build Coastguard Worker * Parse character data and references.
2977*7c568831SAndroid Build Coastguard Worker */
2978*7c568831SAndroid Build Coastguard Worker
2979*7c568831SAndroid Build Coastguard Worker static int
htmlParseCharData(htmlParserCtxtPtr ctxt)2980*7c568831SAndroid Build Coastguard Worker htmlParseCharData(htmlParserCtxtPtr ctxt) {
2981*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input = ctxt->input;
2982*7c568831SAndroid Build Coastguard Worker xmlChar utf8Char[4];
2983*7c568831SAndroid Build Coastguard Worker int complete = 0;
2984*7c568831SAndroid Build Coastguard Worker int done = 0;
2985*7c568831SAndroid Build Coastguard Worker int mode;
2986*7c568831SAndroid Build Coastguard Worker int eof = PARSER_PROGRESSIVE(ctxt);
2987*7c568831SAndroid Build Coastguard Worker int line, col;
2988*7c568831SAndroid Build Coastguard Worker
2989*7c568831SAndroid Build Coastguard Worker mode = ctxt->endCheckState;
2990*7c568831SAndroid Build Coastguard Worker
2991*7c568831SAndroid Build Coastguard Worker line = input->line;
2992*7c568831SAndroid Build Coastguard Worker col = input->col;
2993*7c568831SAndroid Build Coastguard Worker
2994*7c568831SAndroid Build Coastguard Worker while (!PARSER_STOPPED(ctxt)) {
2995*7c568831SAndroid Build Coastguard Worker const xmlChar *chunk, *in, *repl;
2996*7c568831SAndroid Build Coastguard Worker size_t avail;
2997*7c568831SAndroid Build Coastguard Worker int replSize;
2998*7c568831SAndroid Build Coastguard Worker int skip = 0;
2999*7c568831SAndroid Build Coastguard Worker int ncr = 0;
3000*7c568831SAndroid Build Coastguard Worker int ncrSize = 0;
3001*7c568831SAndroid Build Coastguard Worker int cp = 0;
3002*7c568831SAndroid Build Coastguard Worker
3003*7c568831SAndroid Build Coastguard Worker chunk = input->cur;
3004*7c568831SAndroid Build Coastguard Worker avail = input->end - chunk;
3005*7c568831SAndroid Build Coastguard Worker in = chunk;
3006*7c568831SAndroid Build Coastguard Worker
3007*7c568831SAndroid Build Coastguard Worker repl = BAD_CAST "";
3008*7c568831SAndroid Build Coastguard Worker replSize = 0;
3009*7c568831SAndroid Build Coastguard Worker
3010*7c568831SAndroid Build Coastguard Worker while (!PARSER_STOPPED(ctxt)) {
3011*7c568831SAndroid Build Coastguard Worker size_t j;
3012*7c568831SAndroid Build Coastguard Worker int cur, size;
3013*7c568831SAndroid Build Coastguard Worker
3014*7c568831SAndroid Build Coastguard Worker if (avail <= 64) {
3015*7c568831SAndroid Build Coastguard Worker if (!eof) {
3016*7c568831SAndroid Build Coastguard Worker size_t oldAvail = avail;
3017*7c568831SAndroid Build Coastguard Worker size_t off = in - chunk;
3018*7c568831SAndroid Build Coastguard Worker
3019*7c568831SAndroid Build Coastguard Worker input->cur = in;
3020*7c568831SAndroid Build Coastguard Worker
3021*7c568831SAndroid Build Coastguard Worker xmlParserGrow(ctxt);
3022*7c568831SAndroid Build Coastguard Worker
3023*7c568831SAndroid Build Coastguard Worker in = input->cur;
3024*7c568831SAndroid Build Coastguard Worker chunk = in - off;
3025*7c568831SAndroid Build Coastguard Worker input->cur = chunk;
3026*7c568831SAndroid Build Coastguard Worker avail = input->end - in;
3027*7c568831SAndroid Build Coastguard Worker
3028*7c568831SAndroid Build Coastguard Worker if (oldAvail == avail)
3029*7c568831SAndroid Build Coastguard Worker eof = 1;
3030*7c568831SAndroid Build Coastguard Worker }
3031*7c568831SAndroid Build Coastguard Worker
3032*7c568831SAndroid Build Coastguard Worker if (avail == 0) {
3033*7c568831SAndroid Build Coastguard Worker done = 1;
3034*7c568831SAndroid Build Coastguard Worker break;
3035*7c568831SAndroid Build Coastguard Worker }
3036*7c568831SAndroid Build Coastguard Worker }
3037*7c568831SAndroid Build Coastguard Worker
3038*7c568831SAndroid Build Coastguard Worker /* Accelerator */
3039*7c568831SAndroid Build Coastguard Worker if (!ncr) {
3040*7c568831SAndroid Build Coastguard Worker while (avail > 0) {
3041*7c568831SAndroid Build Coastguard Worker static const unsigned mask[8] = {
3042*7c568831SAndroid Build Coastguard Worker 0x00002401, 0x10002040,
3043*7c568831SAndroid Build Coastguard Worker 0x00000000, 0x00000000,
3044*7c568831SAndroid Build Coastguard Worker 0xFFFFFFFF, 0xFFFFFFFF,
3045*7c568831SAndroid Build Coastguard Worker 0xFFFFFFFF, 0xFFFFFFFF
3046*7c568831SAndroid Build Coastguard Worker };
3047*7c568831SAndroid Build Coastguard Worker cur = *in;
3048*7c568831SAndroid Build Coastguard Worker if ((1u << (cur & 0x1F)) & mask[cur >> 5])
3049*7c568831SAndroid Build Coastguard Worker break;
3050*7c568831SAndroid Build Coastguard Worker col += 1;
3051*7c568831SAndroid Build Coastguard Worker in += 1;
3052*7c568831SAndroid Build Coastguard Worker avail -= 1;
3053*7c568831SAndroid Build Coastguard Worker }
3054*7c568831SAndroid Build Coastguard Worker
3055*7c568831SAndroid Build Coastguard Worker if ((!eof) && (avail <= 64))
3056*7c568831SAndroid Build Coastguard Worker continue;
3057*7c568831SAndroid Build Coastguard Worker if (avail == 0)
3058*7c568831SAndroid Build Coastguard Worker continue;
3059*7c568831SAndroid Build Coastguard Worker }
3060*7c568831SAndroid Build Coastguard Worker
3061*7c568831SAndroid Build Coastguard Worker cur = *in;
3062*7c568831SAndroid Build Coastguard Worker size = 1;
3063*7c568831SAndroid Build Coastguard Worker col += 1;
3064*7c568831SAndroid Build Coastguard Worker
3065*7c568831SAndroid Build Coastguard Worker if (ncr) {
3066*7c568831SAndroid Build Coastguard Worker int lc = cur | 0x20;
3067*7c568831SAndroid Build Coastguard Worker int digit;
3068*7c568831SAndroid Build Coastguard Worker
3069*7c568831SAndroid Build Coastguard Worker if ((cur >= '0') && (cur <= '9')) {
3070*7c568831SAndroid Build Coastguard Worker digit = cur - '0';
3071*7c568831SAndroid Build Coastguard Worker } else if ((ncr == 16) && (lc >= 'a') && (lc <= 'f')) {
3072*7c568831SAndroid Build Coastguard Worker digit = (lc - 'a') + 10;
3073*7c568831SAndroid Build Coastguard Worker } else {
3074*7c568831SAndroid Build Coastguard Worker if (cur == ';') {
3075*7c568831SAndroid Build Coastguard Worker in += 1;
3076*7c568831SAndroid Build Coastguard Worker size += 1;
3077*7c568831SAndroid Build Coastguard Worker ncrSize += 1;
3078*7c568831SAndroid Build Coastguard Worker }
3079*7c568831SAndroid Build Coastguard Worker goto next_chunk;
3080*7c568831SAndroid Build Coastguard Worker }
3081*7c568831SAndroid Build Coastguard Worker
3082*7c568831SAndroid Build Coastguard Worker cp = cp * ncr + digit;
3083*7c568831SAndroid Build Coastguard Worker if (cp >= 0x110000)
3084*7c568831SAndroid Build Coastguard Worker cp = 0x110000;
3085*7c568831SAndroid Build Coastguard Worker
3086*7c568831SAndroid Build Coastguard Worker ncrSize += 1;
3087*7c568831SAndroid Build Coastguard Worker
3088*7c568831SAndroid Build Coastguard Worker goto next_char;
3089*7c568831SAndroid Build Coastguard Worker }
3090*7c568831SAndroid Build Coastguard Worker
3091*7c568831SAndroid Build Coastguard Worker switch (cur) {
3092*7c568831SAndroid Build Coastguard Worker case '<':
3093*7c568831SAndroid Build Coastguard Worker if (mode == 0) {
3094*7c568831SAndroid Build Coastguard Worker done = 1;
3095*7c568831SAndroid Build Coastguard Worker goto next_chunk;
3096*7c568831SAndroid Build Coastguard Worker }
3097*7c568831SAndroid Build Coastguard Worker if (mode == DATA_PLAINTEXT)
3098*7c568831SAndroid Build Coastguard Worker break;
3099*7c568831SAndroid Build Coastguard Worker
3100*7c568831SAndroid Build Coastguard Worker j = 1;
3101*7c568831SAndroid Build Coastguard Worker if (j < avail) {
3102*7c568831SAndroid Build Coastguard Worker if ((mode == DATA_SCRIPT) && (in[j] == '!')) {
3103*7c568831SAndroid Build Coastguard Worker /* Check for comment start */
3104*7c568831SAndroid Build Coastguard Worker
3105*7c568831SAndroid Build Coastguard Worker j += 1;
3106*7c568831SAndroid Build Coastguard Worker if ((j < avail) && (in[j] == '-')) {
3107*7c568831SAndroid Build Coastguard Worker j += 1;
3108*7c568831SAndroid Build Coastguard Worker if ((j < avail) && (in[j] == '-'))
3109*7c568831SAndroid Build Coastguard Worker mode = DATA_SCRIPT_ESC1;
3110*7c568831SAndroid Build Coastguard Worker }
3111*7c568831SAndroid Build Coastguard Worker } else {
3112*7c568831SAndroid Build Coastguard Worker int i = 0;
3113*7c568831SAndroid Build Coastguard Worker int solidus = 0;
3114*7c568831SAndroid Build Coastguard Worker
3115*7c568831SAndroid Build Coastguard Worker /* Check for tag */
3116*7c568831SAndroid Build Coastguard Worker
3117*7c568831SAndroid Build Coastguard Worker if (in[j] == '/') {
3118*7c568831SAndroid Build Coastguard Worker j += 1;
3119*7c568831SAndroid Build Coastguard Worker solidus = 1;
3120*7c568831SAndroid Build Coastguard Worker }
3121*7c568831SAndroid Build Coastguard Worker
3122*7c568831SAndroid Build Coastguard Worker if ((solidus) || (mode == DATA_SCRIPT_ESC1)) {
3123*7c568831SAndroid Build Coastguard Worker while ((j < avail) &&
3124*7c568831SAndroid Build Coastguard Worker (ctxt->name[i] != 0) &&
3125*7c568831SAndroid Build Coastguard Worker (ctxt->name[i] == (in[j] | 0x20))) {
3126*7c568831SAndroid Build Coastguard Worker i += 1;
3127*7c568831SAndroid Build Coastguard Worker j += 1;
3128*7c568831SAndroid Build Coastguard Worker }
3129*7c568831SAndroid Build Coastguard Worker
3130*7c568831SAndroid Build Coastguard Worker if ((ctxt->name[i] == 0) && (j < avail)) {
3131*7c568831SAndroid Build Coastguard Worker int c = in[j];
3132*7c568831SAndroid Build Coastguard Worker
3133*7c568831SAndroid Build Coastguard Worker if ((c == '>') || (c == '/') ||
3134*7c568831SAndroid Build Coastguard Worker (IS_WS_HTML(c))) {
3135*7c568831SAndroid Build Coastguard Worker if ((mode == DATA_SCRIPT_ESC1) &&
3136*7c568831SAndroid Build Coastguard Worker (!solidus)) {
3137*7c568831SAndroid Build Coastguard Worker mode = DATA_SCRIPT_ESC2;
3138*7c568831SAndroid Build Coastguard Worker } else if (mode == DATA_SCRIPT_ESC2) {
3139*7c568831SAndroid Build Coastguard Worker mode = DATA_SCRIPT_ESC1;
3140*7c568831SAndroid Build Coastguard Worker } else {
3141*7c568831SAndroid Build Coastguard Worker complete = 1;
3142*7c568831SAndroid Build Coastguard Worker done = 1;
3143*7c568831SAndroid Build Coastguard Worker goto next_chunk;
3144*7c568831SAndroid Build Coastguard Worker }
3145*7c568831SAndroid Build Coastguard Worker }
3146*7c568831SAndroid Build Coastguard Worker }
3147*7c568831SAndroid Build Coastguard Worker }
3148*7c568831SAndroid Build Coastguard Worker }
3149*7c568831SAndroid Build Coastguard Worker }
3150*7c568831SAndroid Build Coastguard Worker
3151*7c568831SAndroid Build Coastguard Worker if ((mode != 0) && (PARSER_PROGRESSIVE(ctxt))) {
3152*7c568831SAndroid Build Coastguard Worker in += 1;
3153*7c568831SAndroid Build Coastguard Worker done = 1;
3154*7c568831SAndroid Build Coastguard Worker goto next_chunk;
3155*7c568831SAndroid Build Coastguard Worker }
3156*7c568831SAndroid Build Coastguard Worker
3157*7c568831SAndroid Build Coastguard Worker break;
3158*7c568831SAndroid Build Coastguard Worker
3159*7c568831SAndroid Build Coastguard Worker case '-':
3160*7c568831SAndroid Build Coastguard Worker if ((mode != DATA_SCRIPT_ESC1) && (mode != DATA_SCRIPT_ESC2))
3161*7c568831SAndroid Build Coastguard Worker break;
3162*7c568831SAndroid Build Coastguard Worker
3163*7c568831SAndroid Build Coastguard Worker /* Check for comment end */
3164*7c568831SAndroid Build Coastguard Worker
3165*7c568831SAndroid Build Coastguard Worker j = 1;
3166*7c568831SAndroid Build Coastguard Worker if ((j < avail) && (in[j] == '-')) {
3167*7c568831SAndroid Build Coastguard Worker j += 1;
3168*7c568831SAndroid Build Coastguard Worker if ((j < avail) && (in[j] == '>'))
3169*7c568831SAndroid Build Coastguard Worker mode = DATA_SCRIPT;
3170*7c568831SAndroid Build Coastguard Worker }
3171*7c568831SAndroid Build Coastguard Worker
3172*7c568831SAndroid Build Coastguard Worker break;
3173*7c568831SAndroid Build Coastguard Worker
3174*7c568831SAndroid Build Coastguard Worker case '&':
3175*7c568831SAndroid Build Coastguard Worker if ((mode != 0) && (mode != DATA_RCDATA))
3176*7c568831SAndroid Build Coastguard Worker break;
3177*7c568831SAndroid Build Coastguard Worker
3178*7c568831SAndroid Build Coastguard Worker j = 1;
3179*7c568831SAndroid Build Coastguard Worker
3180*7c568831SAndroid Build Coastguard Worker if ((j < avail) && (in[j] == '#')) {
3181*7c568831SAndroid Build Coastguard Worker j += 1;
3182*7c568831SAndroid Build Coastguard Worker if (j < avail) {
3183*7c568831SAndroid Build Coastguard Worker if ((in[j] | 0x20) == 'x') {
3184*7c568831SAndroid Build Coastguard Worker j += 1;
3185*7c568831SAndroid Build Coastguard Worker if ((j < avail) && (IS_HEX_DIGIT(in[j]))) {
3186*7c568831SAndroid Build Coastguard Worker ncr = 16;
3187*7c568831SAndroid Build Coastguard Worker size = 3;
3188*7c568831SAndroid Build Coastguard Worker ncrSize = 3;
3189*7c568831SAndroid Build Coastguard Worker cp = 0;
3190*7c568831SAndroid Build Coastguard Worker }
3191*7c568831SAndroid Build Coastguard Worker } else if (IS_ASCII_DIGIT(in[j])) {
3192*7c568831SAndroid Build Coastguard Worker ncr = 10;
3193*7c568831SAndroid Build Coastguard Worker size = 2;
3194*7c568831SAndroid Build Coastguard Worker ncrSize = 2;
3195*7c568831SAndroid Build Coastguard Worker cp = 0;
3196*7c568831SAndroid Build Coastguard Worker }
3197*7c568831SAndroid Build Coastguard Worker }
3198*7c568831SAndroid Build Coastguard Worker } else {
3199*7c568831SAndroid Build Coastguard Worker repl = htmlFindEntityPrefix(in + j,
3200*7c568831SAndroid Build Coastguard Worker avail - j,
3201*7c568831SAndroid Build Coastguard Worker /* isAttr */ 0,
3202*7c568831SAndroid Build Coastguard Worker &skip, &replSize);
3203*7c568831SAndroid Build Coastguard Worker if (repl != NULL) {
3204*7c568831SAndroid Build Coastguard Worker skip += 1;
3205*7c568831SAndroid Build Coastguard Worker goto next_chunk;
3206*7c568831SAndroid Build Coastguard Worker }
3207*7c568831SAndroid Build Coastguard Worker
3208*7c568831SAndroid Build Coastguard Worker skip = 0;
3209*7c568831SAndroid Build Coastguard Worker }
3210*7c568831SAndroid Build Coastguard Worker
3211*7c568831SAndroid Build Coastguard Worker break;
3212*7c568831SAndroid Build Coastguard Worker
3213*7c568831SAndroid Build Coastguard Worker case '\0':
3214*7c568831SAndroid Build Coastguard Worker skip = 1;
3215*7c568831SAndroid Build Coastguard Worker repl = BAD_CAST "\xEF\xBF\xBD";
3216*7c568831SAndroid Build Coastguard Worker replSize = 3;
3217*7c568831SAndroid Build Coastguard Worker goto next_chunk;
3218*7c568831SAndroid Build Coastguard Worker
3219*7c568831SAndroid Build Coastguard Worker case '\n':
3220*7c568831SAndroid Build Coastguard Worker line += 1;
3221*7c568831SAndroid Build Coastguard Worker col = 1;
3222*7c568831SAndroid Build Coastguard Worker break;
3223*7c568831SAndroid Build Coastguard Worker
3224*7c568831SAndroid Build Coastguard Worker case '\r':
3225*7c568831SAndroid Build Coastguard Worker skip = 1;
3226*7c568831SAndroid Build Coastguard Worker if (in[1] != 0x0A) {
3227*7c568831SAndroid Build Coastguard Worker repl = BAD_CAST "\x0A";
3228*7c568831SAndroid Build Coastguard Worker replSize = 1;
3229*7c568831SAndroid Build Coastguard Worker }
3230*7c568831SAndroid Build Coastguard Worker goto next_chunk;
3231*7c568831SAndroid Build Coastguard Worker
3232*7c568831SAndroid Build Coastguard Worker default:
3233*7c568831SAndroid Build Coastguard Worker if (cur < 0x80)
3234*7c568831SAndroid Build Coastguard Worker break;
3235*7c568831SAndroid Build Coastguard Worker
3236*7c568831SAndroid Build Coastguard Worker if ((input->flags & XML_INPUT_HAS_ENCODING) == 0) {
3237*7c568831SAndroid Build Coastguard Worker xmlChar * guess;
3238*7c568831SAndroid Build Coastguard Worker
3239*7c568831SAndroid Build Coastguard Worker guess = htmlFindEncoding(ctxt);
3240*7c568831SAndroid Build Coastguard Worker if (guess == NULL) {
3241*7c568831SAndroid Build Coastguard Worker xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
3242*7c568831SAndroid Build Coastguard Worker } else {
3243*7c568831SAndroid Build Coastguard Worker xmlSwitchEncodingName(ctxt, (const char *) guess);
3244*7c568831SAndroid Build Coastguard Worker xmlFree(guess);
3245*7c568831SAndroid Build Coastguard Worker }
3246*7c568831SAndroid Build Coastguard Worker input->flags |= XML_INPUT_HAS_ENCODING;
3247*7c568831SAndroid Build Coastguard Worker
3248*7c568831SAndroid Build Coastguard Worker goto restart;
3249*7c568831SAndroid Build Coastguard Worker }
3250*7c568831SAndroid Build Coastguard Worker
3251*7c568831SAndroid Build Coastguard Worker size = htmlValidateUtf8(ctxt, in, avail);
3252*7c568831SAndroid Build Coastguard Worker
3253*7c568831SAndroid Build Coastguard Worker if (size <= 0) {
3254*7c568831SAndroid Build Coastguard Worker skip = 1;
3255*7c568831SAndroid Build Coastguard Worker repl = BAD_CAST "\xEF\xBF\xBD";
3256*7c568831SAndroid Build Coastguard Worker replSize = 3;
3257*7c568831SAndroid Build Coastguard Worker goto next_chunk;
3258*7c568831SAndroid Build Coastguard Worker }
3259*7c568831SAndroid Build Coastguard Worker
3260*7c568831SAndroid Build Coastguard Worker break;
3261*7c568831SAndroid Build Coastguard Worker }
3262*7c568831SAndroid Build Coastguard Worker
3263*7c568831SAndroid Build Coastguard Worker next_char:
3264*7c568831SAndroid Build Coastguard Worker in += size;
3265*7c568831SAndroid Build Coastguard Worker avail -= size;
3266*7c568831SAndroid Build Coastguard Worker }
3267*7c568831SAndroid Build Coastguard Worker
3268*7c568831SAndroid Build Coastguard Worker next_chunk:
3269*7c568831SAndroid Build Coastguard Worker if (ncrSize > 0) {
3270*7c568831SAndroid Build Coastguard Worker skip = ncrSize;
3271*7c568831SAndroid Build Coastguard Worker in -= ncrSize;
3272*7c568831SAndroid Build Coastguard Worker
3273*7c568831SAndroid Build Coastguard Worker repl = htmlCodePointToUtf8(cp, utf8Char, &replSize);
3274*7c568831SAndroid Build Coastguard Worker }
3275*7c568831SAndroid Build Coastguard Worker
3276*7c568831SAndroid Build Coastguard Worker if (in > chunk) {
3277*7c568831SAndroid Build Coastguard Worker input->cur += in - chunk;
3278*7c568831SAndroid Build Coastguard Worker htmlCharDataSAXCallback(ctxt, chunk, in - chunk, mode);
3279*7c568831SAndroid Build Coastguard Worker }
3280*7c568831SAndroid Build Coastguard Worker
3281*7c568831SAndroid Build Coastguard Worker input->cur += skip;
3282*7c568831SAndroid Build Coastguard Worker if (replSize > 0)
3283*7c568831SAndroid Build Coastguard Worker htmlCharDataSAXCallback(ctxt, repl, replSize, mode);
3284*7c568831SAndroid Build Coastguard Worker
3285*7c568831SAndroid Build Coastguard Worker SHRINK;
3286*7c568831SAndroid Build Coastguard Worker
3287*7c568831SAndroid Build Coastguard Worker if (done)
3288*7c568831SAndroid Build Coastguard Worker break;
3289*7c568831SAndroid Build Coastguard Worker
3290*7c568831SAndroid Build Coastguard Worker restart:
3291*7c568831SAndroid Build Coastguard Worker ;
3292*7c568831SAndroid Build Coastguard Worker }
3293*7c568831SAndroid Build Coastguard Worker
3294*7c568831SAndroid Build Coastguard Worker input->line = line;
3295*7c568831SAndroid Build Coastguard Worker input->col = col;
3296*7c568831SAndroid Build Coastguard Worker
3297*7c568831SAndroid Build Coastguard Worker if (complete)
3298*7c568831SAndroid Build Coastguard Worker ctxt->endCheckState = 0;
3299*7c568831SAndroid Build Coastguard Worker else
3300*7c568831SAndroid Build Coastguard Worker ctxt->endCheckState = mode;
3301*7c568831SAndroid Build Coastguard Worker
3302*7c568831SAndroid Build Coastguard Worker return(complete);
3303*7c568831SAndroid Build Coastguard Worker }
3304*7c568831SAndroid Build Coastguard Worker
3305*7c568831SAndroid Build Coastguard Worker /**
3306*7c568831SAndroid Build Coastguard Worker * htmlParseComment:
3307*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
3308*7c568831SAndroid Build Coastguard Worker * @bogus: true if this is a bogus comment
3309*7c568831SAndroid Build Coastguard Worker *
3310*7c568831SAndroid Build Coastguard Worker * Parse an HTML comment
3311*7c568831SAndroid Build Coastguard Worker */
3312*7c568831SAndroid Build Coastguard Worker static void
htmlParseComment(htmlParserCtxtPtr ctxt,int bogus)3313*7c568831SAndroid Build Coastguard Worker htmlParseComment(htmlParserCtxtPtr ctxt, int bogus) {
3314*7c568831SAndroid Build Coastguard Worker const xmlChar *comment = BAD_CAST "";
3315*7c568831SAndroid Build Coastguard Worker xmlChar *buf = NULL;
3316*7c568831SAndroid Build Coastguard Worker int maxLength = (ctxt->options & HTML_PARSE_HUGE) ?
3317*7c568831SAndroid Build Coastguard Worker XML_MAX_HUGE_LENGTH :
3318*7c568831SAndroid Build Coastguard Worker XML_MAX_TEXT_LENGTH;
3319*7c568831SAndroid Build Coastguard Worker
3320*7c568831SAndroid Build Coastguard Worker if (bogus) {
3321*7c568831SAndroid Build Coastguard Worker buf = htmlParseData(ctxt, MASK_GT, 0, 0, maxLength);
3322*7c568831SAndroid Build Coastguard Worker if (CUR == '>')
3323*7c568831SAndroid Build Coastguard Worker SKIP(1);
3324*7c568831SAndroid Build Coastguard Worker comment = buf;
3325*7c568831SAndroid Build Coastguard Worker } else {
3326*7c568831SAndroid Build Coastguard Worker if (CUR == '>') {
3327*7c568831SAndroid Build Coastguard Worker SKIP(1);
3328*7c568831SAndroid Build Coastguard Worker } else if ((CUR == '-') && (NXT(1) == '>')) {
3329*7c568831SAndroid Build Coastguard Worker SKIP(2);
3330*7c568831SAndroid Build Coastguard Worker } else {
3331*7c568831SAndroid Build Coastguard Worker buf = htmlParseData(ctxt, MASK_DASH, 1, 0, maxLength);
3332*7c568831SAndroid Build Coastguard Worker comment = buf;
3333*7c568831SAndroid Build Coastguard Worker }
3334*7c568831SAndroid Build Coastguard Worker }
3335*7c568831SAndroid Build Coastguard Worker
3336*7c568831SAndroid Build Coastguard Worker if (comment == NULL)
3337*7c568831SAndroid Build Coastguard Worker return;
3338*7c568831SAndroid Build Coastguard Worker
3339*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3340*7c568831SAndroid Build Coastguard Worker (!ctxt->disableSAX))
3341*7c568831SAndroid Build Coastguard Worker ctxt->sax->comment(ctxt->userData, comment);
3342*7c568831SAndroid Build Coastguard Worker
3343*7c568831SAndroid Build Coastguard Worker xmlFree(buf);
3344*7c568831SAndroid Build Coastguard Worker }
3345*7c568831SAndroid Build Coastguard Worker
3346*7c568831SAndroid Build Coastguard Worker /**
3347*7c568831SAndroid Build Coastguard Worker * htmlParseCharRef:
3348*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
3349*7c568831SAndroid Build Coastguard Worker *
3350*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Internal function, don't use.
3351*7c568831SAndroid Build Coastguard Worker *
3352*7c568831SAndroid Build Coastguard Worker * Returns 0
3353*7c568831SAndroid Build Coastguard Worker */
3354*7c568831SAndroid Build Coastguard Worker int
htmlParseCharRef(htmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED)3355*7c568831SAndroid Build Coastguard Worker htmlParseCharRef(htmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
3356*7c568831SAndroid Build Coastguard Worker return(0);
3357*7c568831SAndroid Build Coastguard Worker }
3358*7c568831SAndroid Build Coastguard Worker
3359*7c568831SAndroid Build Coastguard Worker
3360*7c568831SAndroid Build Coastguard Worker /**
3361*7c568831SAndroid Build Coastguard Worker * htmlParseDoctypeLiteral:
3362*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
3363*7c568831SAndroid Build Coastguard Worker *
3364*7c568831SAndroid Build Coastguard Worker * Parse a DOCTYPE SYTSTEM or PUBLIC literal.
3365*7c568831SAndroid Build Coastguard Worker *
3366*7c568831SAndroid Build Coastguard Worker * Returns the literal or NULL in case of error.
3367*7c568831SAndroid Build Coastguard Worker */
3368*7c568831SAndroid Build Coastguard Worker
3369*7c568831SAndroid Build Coastguard Worker static xmlChar *
htmlParseDoctypeLiteral(htmlParserCtxtPtr ctxt)3370*7c568831SAndroid Build Coastguard Worker htmlParseDoctypeLiteral(htmlParserCtxtPtr ctxt) {
3371*7c568831SAndroid Build Coastguard Worker xmlChar *ret;
3372*7c568831SAndroid Build Coastguard Worker int maxLength = (ctxt->options & HTML_PARSE_HUGE) ?
3373*7c568831SAndroid Build Coastguard Worker XML_MAX_TEXT_LENGTH :
3374*7c568831SAndroid Build Coastguard Worker XML_MAX_NAME_LENGTH;
3375*7c568831SAndroid Build Coastguard Worker
3376*7c568831SAndroid Build Coastguard Worker if (CUR == '"') {
3377*7c568831SAndroid Build Coastguard Worker SKIP(1);
3378*7c568831SAndroid Build Coastguard Worker ret = htmlParseData(ctxt, MASK_DQ_GT, 0, 0, maxLength);
3379*7c568831SAndroid Build Coastguard Worker if (CUR == '"')
3380*7c568831SAndroid Build Coastguard Worker SKIP(1);
3381*7c568831SAndroid Build Coastguard Worker } else if (CUR == '\'') {
3382*7c568831SAndroid Build Coastguard Worker SKIP(1);
3383*7c568831SAndroid Build Coastguard Worker ret = htmlParseData(ctxt, MASK_SQ_GT, 0, 0, maxLength);
3384*7c568831SAndroid Build Coastguard Worker if (CUR == '\'')
3385*7c568831SAndroid Build Coastguard Worker SKIP(1);
3386*7c568831SAndroid Build Coastguard Worker } else {
3387*7c568831SAndroid Build Coastguard Worker return(NULL);
3388*7c568831SAndroid Build Coastguard Worker }
3389*7c568831SAndroid Build Coastguard Worker
3390*7c568831SAndroid Build Coastguard Worker return(ret);
3391*7c568831SAndroid Build Coastguard Worker }
3392*7c568831SAndroid Build Coastguard Worker
3393*7c568831SAndroid Build Coastguard Worker static void
htmlSkipBogusDoctype(htmlParserCtxtPtr ctxt)3394*7c568831SAndroid Build Coastguard Worker htmlSkipBogusDoctype(htmlParserCtxtPtr ctxt) {
3395*7c568831SAndroid Build Coastguard Worker const xmlChar *in;
3396*7c568831SAndroid Build Coastguard Worker size_t avail;
3397*7c568831SAndroid Build Coastguard Worker int eof = PARSER_PROGRESSIVE(ctxt);
3398*7c568831SAndroid Build Coastguard Worker int line, col;
3399*7c568831SAndroid Build Coastguard Worker
3400*7c568831SAndroid Build Coastguard Worker line = ctxt->input->line;
3401*7c568831SAndroid Build Coastguard Worker col = ctxt->input->col;
3402*7c568831SAndroid Build Coastguard Worker
3403*7c568831SAndroid Build Coastguard Worker in = ctxt->input->cur;
3404*7c568831SAndroid Build Coastguard Worker avail = ctxt->input->end - in;
3405*7c568831SAndroid Build Coastguard Worker
3406*7c568831SAndroid Build Coastguard Worker while (!PARSER_STOPPED(ctxt)) {
3407*7c568831SAndroid Build Coastguard Worker int cur;
3408*7c568831SAndroid Build Coastguard Worker
3409*7c568831SAndroid Build Coastguard Worker if ((!eof) && (avail <= 64)) {
3410*7c568831SAndroid Build Coastguard Worker size_t oldAvail = avail;
3411*7c568831SAndroid Build Coastguard Worker
3412*7c568831SAndroid Build Coastguard Worker ctxt->input->cur = in;
3413*7c568831SAndroid Build Coastguard Worker
3414*7c568831SAndroid Build Coastguard Worker xmlParserGrow(ctxt);
3415*7c568831SAndroid Build Coastguard Worker
3416*7c568831SAndroid Build Coastguard Worker in = ctxt->input->cur;
3417*7c568831SAndroid Build Coastguard Worker avail = ctxt->input->end - in;
3418*7c568831SAndroid Build Coastguard Worker
3419*7c568831SAndroid Build Coastguard Worker if (oldAvail == avail)
3420*7c568831SAndroid Build Coastguard Worker eof = 1;
3421*7c568831SAndroid Build Coastguard Worker }
3422*7c568831SAndroid Build Coastguard Worker
3423*7c568831SAndroid Build Coastguard Worker if (avail == 0)
3424*7c568831SAndroid Build Coastguard Worker break;
3425*7c568831SAndroid Build Coastguard Worker
3426*7c568831SAndroid Build Coastguard Worker col += 1;
3427*7c568831SAndroid Build Coastguard Worker
3428*7c568831SAndroid Build Coastguard Worker cur = *in;
3429*7c568831SAndroid Build Coastguard Worker if (cur == '>') {
3430*7c568831SAndroid Build Coastguard Worker in += 1;
3431*7c568831SAndroid Build Coastguard Worker break;
3432*7c568831SAndroid Build Coastguard Worker } else if (cur == 0x0A) {
3433*7c568831SAndroid Build Coastguard Worker line += 1;
3434*7c568831SAndroid Build Coastguard Worker col = 1;
3435*7c568831SAndroid Build Coastguard Worker }
3436*7c568831SAndroid Build Coastguard Worker
3437*7c568831SAndroid Build Coastguard Worker in += 1;
3438*7c568831SAndroid Build Coastguard Worker avail -= 1;
3439*7c568831SAndroid Build Coastguard Worker
3440*7c568831SAndroid Build Coastguard Worker SHRINK;
3441*7c568831SAndroid Build Coastguard Worker }
3442*7c568831SAndroid Build Coastguard Worker
3443*7c568831SAndroid Build Coastguard Worker ctxt->input->cur = in;
3444*7c568831SAndroid Build Coastguard Worker ctxt->input->line = line;
3445*7c568831SAndroid Build Coastguard Worker ctxt->input->col = col;
3446*7c568831SAndroid Build Coastguard Worker }
3447*7c568831SAndroid Build Coastguard Worker
3448*7c568831SAndroid Build Coastguard Worker /**
3449*7c568831SAndroid Build Coastguard Worker * htmlParseDocTypeDecl:
3450*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
3451*7c568831SAndroid Build Coastguard Worker *
3452*7c568831SAndroid Build Coastguard Worker * Parse a DOCTYPE declaration.
3453*7c568831SAndroid Build Coastguard Worker */
3454*7c568831SAndroid Build Coastguard Worker
3455*7c568831SAndroid Build Coastguard Worker static void
htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt)3456*7c568831SAndroid Build Coastguard Worker htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
3457*7c568831SAndroid Build Coastguard Worker xmlChar *name = NULL;
3458*7c568831SAndroid Build Coastguard Worker xmlChar *publicId = NULL;
3459*7c568831SAndroid Build Coastguard Worker xmlChar *URI = NULL;
3460*7c568831SAndroid Build Coastguard Worker int maxLength = (ctxt->options & HTML_PARSE_HUGE) ?
3461*7c568831SAndroid Build Coastguard Worker XML_MAX_TEXT_LENGTH :
3462*7c568831SAndroid Build Coastguard Worker XML_MAX_NAME_LENGTH;
3463*7c568831SAndroid Build Coastguard Worker
3464*7c568831SAndroid Build Coastguard Worker /*
3465*7c568831SAndroid Build Coastguard Worker * We know that '<!DOCTYPE' has been detected.
3466*7c568831SAndroid Build Coastguard Worker */
3467*7c568831SAndroid Build Coastguard Worker SKIP(9);
3468*7c568831SAndroid Build Coastguard Worker
3469*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
3470*7c568831SAndroid Build Coastguard Worker
3471*7c568831SAndroid Build Coastguard Worker if ((ctxt->input->cur < ctxt->input->end) && (CUR != '>')) {
3472*7c568831SAndroid Build Coastguard Worker name = htmlParseData(ctxt, MASK_WS_GT, 0, 0, maxLength);
3473*7c568831SAndroid Build Coastguard Worker
3474*7c568831SAndroid Build Coastguard Worker if ((ctxt->options & HTML_PARSE_HTML5) && (name != NULL)) {
3475*7c568831SAndroid Build Coastguard Worker xmlChar *cur;
3476*7c568831SAndroid Build Coastguard Worker
3477*7c568831SAndroid Build Coastguard Worker for (cur = name; *cur; cur++) {
3478*7c568831SAndroid Build Coastguard Worker if (IS_UPPER(*cur))
3479*7c568831SAndroid Build Coastguard Worker *cur += 0x20;
3480*7c568831SAndroid Build Coastguard Worker }
3481*7c568831SAndroid Build Coastguard Worker }
3482*7c568831SAndroid Build Coastguard Worker
3483*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
3484*7c568831SAndroid Build Coastguard Worker }
3485*7c568831SAndroid Build Coastguard Worker
3486*7c568831SAndroid Build Coastguard Worker /*
3487*7c568831SAndroid Build Coastguard Worker * Check for SystemID and publicId
3488*7c568831SAndroid Build Coastguard Worker */
3489*7c568831SAndroid Build Coastguard Worker if ((UPPER == 'P') && (UPP(1) == 'U') &&
3490*7c568831SAndroid Build Coastguard Worker (UPP(2) == 'B') && (UPP(3) == 'L') &&
3491*7c568831SAndroid Build Coastguard Worker (UPP(4) == 'I') && (UPP(5) == 'C')) {
3492*7c568831SAndroid Build Coastguard Worker SKIP(6);
3493*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
3494*7c568831SAndroid Build Coastguard Worker publicId = htmlParseDoctypeLiteral(ctxt);
3495*7c568831SAndroid Build Coastguard Worker if (publicId == NULL)
3496*7c568831SAndroid Build Coastguard Worker goto bogus;
3497*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
3498*7c568831SAndroid Build Coastguard Worker URI = htmlParseDoctypeLiteral(ctxt);
3499*7c568831SAndroid Build Coastguard Worker } else if ((UPPER == 'S') && (UPP(1) == 'Y') &&
3500*7c568831SAndroid Build Coastguard Worker (UPP(2) == 'S') && (UPP(3) == 'T') &&
3501*7c568831SAndroid Build Coastguard Worker (UPP(4) == 'E') && (UPP(5) == 'M')) {
3502*7c568831SAndroid Build Coastguard Worker SKIP(6);
3503*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
3504*7c568831SAndroid Build Coastguard Worker URI = htmlParseDoctypeLiteral(ctxt);
3505*7c568831SAndroid Build Coastguard Worker }
3506*7c568831SAndroid Build Coastguard Worker
3507*7c568831SAndroid Build Coastguard Worker bogus:
3508*7c568831SAndroid Build Coastguard Worker htmlSkipBogusDoctype(ctxt);
3509*7c568831SAndroid Build Coastguard Worker
3510*7c568831SAndroid Build Coastguard Worker /*
3511*7c568831SAndroid Build Coastguard Worker * Create or update the document accordingly to the DOCTYPE
3512*7c568831SAndroid Build Coastguard Worker */
3513*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
3514*7c568831SAndroid Build Coastguard Worker (!ctxt->disableSAX))
3515*7c568831SAndroid Build Coastguard Worker ctxt->sax->internalSubset(ctxt->userData, name, publicId, URI);
3516*7c568831SAndroid Build Coastguard Worker
3517*7c568831SAndroid Build Coastguard Worker xmlFree(name);
3518*7c568831SAndroid Build Coastguard Worker xmlFree(URI);
3519*7c568831SAndroid Build Coastguard Worker xmlFree(publicId);
3520*7c568831SAndroid Build Coastguard Worker }
3521*7c568831SAndroid Build Coastguard Worker
3522*7c568831SAndroid Build Coastguard Worker /**
3523*7c568831SAndroid Build Coastguard Worker * htmlParseAttribute:
3524*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
3525*7c568831SAndroid Build Coastguard Worker * @value: a xmlChar ** used to store the value of the attribute
3526*7c568831SAndroid Build Coastguard Worker *
3527*7c568831SAndroid Build Coastguard Worker * parse an attribute
3528*7c568831SAndroid Build Coastguard Worker *
3529*7c568831SAndroid Build Coastguard Worker * [41] Attribute ::= Name Eq AttValue
3530*7c568831SAndroid Build Coastguard Worker *
3531*7c568831SAndroid Build Coastguard Worker * [25] Eq ::= S? '=' S?
3532*7c568831SAndroid Build Coastguard Worker *
3533*7c568831SAndroid Build Coastguard Worker * With namespace:
3534*7c568831SAndroid Build Coastguard Worker *
3535*7c568831SAndroid Build Coastguard Worker * [NS 11] Attribute ::= QName Eq AttValue
3536*7c568831SAndroid Build Coastguard Worker *
3537*7c568831SAndroid Build Coastguard Worker * Also the case QName == xmlns:??? is handled independently as a namespace
3538*7c568831SAndroid Build Coastguard Worker * definition.
3539*7c568831SAndroid Build Coastguard Worker *
3540*7c568831SAndroid Build Coastguard Worker * Returns the attribute name, and the value in *value.
3541*7c568831SAndroid Build Coastguard Worker */
3542*7c568831SAndroid Build Coastguard Worker
3543*7c568831SAndroid Build Coastguard Worker static xmlHashedString
htmlParseAttribute(htmlParserCtxtPtr ctxt,xmlChar ** value)3544*7c568831SAndroid Build Coastguard Worker htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
3545*7c568831SAndroid Build Coastguard Worker xmlHashedString hname;
3546*7c568831SAndroid Build Coastguard Worker xmlChar *val = NULL;
3547*7c568831SAndroid Build Coastguard Worker
3548*7c568831SAndroid Build Coastguard Worker *value = NULL;
3549*7c568831SAndroid Build Coastguard Worker hname = htmlParseHTMLName(ctxt, 1);
3550*7c568831SAndroid Build Coastguard Worker if (hname.name == NULL)
3551*7c568831SAndroid Build Coastguard Worker return(hname);
3552*7c568831SAndroid Build Coastguard Worker
3553*7c568831SAndroid Build Coastguard Worker /*
3554*7c568831SAndroid Build Coastguard Worker * read the value
3555*7c568831SAndroid Build Coastguard Worker */
3556*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
3557*7c568831SAndroid Build Coastguard Worker if (CUR == '=') {
3558*7c568831SAndroid Build Coastguard Worker SKIP(1);
3559*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
3560*7c568831SAndroid Build Coastguard Worker val = htmlParseAttValue(ctxt);
3561*7c568831SAndroid Build Coastguard Worker }
3562*7c568831SAndroid Build Coastguard Worker
3563*7c568831SAndroid Build Coastguard Worker *value = val;
3564*7c568831SAndroid Build Coastguard Worker return(hname);
3565*7c568831SAndroid Build Coastguard Worker }
3566*7c568831SAndroid Build Coastguard Worker
3567*7c568831SAndroid Build Coastguard Worker /**
3568*7c568831SAndroid Build Coastguard Worker * htmlCheckEncoding:
3569*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
3570*7c568831SAndroid Build Coastguard Worker * @attvalue: the attribute value
3571*7c568831SAndroid Build Coastguard Worker *
3572*7c568831SAndroid Build Coastguard Worker * Checks an http-equiv attribute from a Meta tag to detect
3573*7c568831SAndroid Build Coastguard Worker * the encoding
3574*7c568831SAndroid Build Coastguard Worker * If a new encoding is detected the parser is switched to decode
3575*7c568831SAndroid Build Coastguard Worker * it and pass UTF8
3576*7c568831SAndroid Build Coastguard Worker */
3577*7c568831SAndroid Build Coastguard Worker static void
htmlCheckEncoding(htmlParserCtxtPtr ctxt,const xmlChar * attvalue)3578*7c568831SAndroid Build Coastguard Worker htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
3579*7c568831SAndroid Build Coastguard Worker const xmlChar *encoding;
3580*7c568831SAndroid Build Coastguard Worker xmlChar *copy;
3581*7c568831SAndroid Build Coastguard Worker
3582*7c568831SAndroid Build Coastguard Worker if (!attvalue)
3583*7c568831SAndroid Build Coastguard Worker return;
3584*7c568831SAndroid Build Coastguard Worker
3585*7c568831SAndroid Build Coastguard Worker encoding = xmlStrcasestr(attvalue, BAD_CAST"charset");
3586*7c568831SAndroid Build Coastguard Worker if (encoding != NULL) {
3587*7c568831SAndroid Build Coastguard Worker encoding += 7;
3588*7c568831SAndroid Build Coastguard Worker }
3589*7c568831SAndroid Build Coastguard Worker /*
3590*7c568831SAndroid Build Coastguard Worker * skip blank
3591*7c568831SAndroid Build Coastguard Worker */
3592*7c568831SAndroid Build Coastguard Worker if (encoding && IS_WS_HTML(*encoding))
3593*7c568831SAndroid Build Coastguard Worker encoding = xmlStrcasestr(attvalue, BAD_CAST"=");
3594*7c568831SAndroid Build Coastguard Worker if (encoding && *encoding == '=') {
3595*7c568831SAndroid Build Coastguard Worker encoding ++;
3596*7c568831SAndroid Build Coastguard Worker copy = xmlStrdup(encoding);
3597*7c568831SAndroid Build Coastguard Worker if (copy == NULL)
3598*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
3599*7c568831SAndroid Build Coastguard Worker xmlSetDeclaredEncoding(ctxt, copy);
3600*7c568831SAndroid Build Coastguard Worker }
3601*7c568831SAndroid Build Coastguard Worker }
3602*7c568831SAndroid Build Coastguard Worker
3603*7c568831SAndroid Build Coastguard Worker /**
3604*7c568831SAndroid Build Coastguard Worker * htmlCheckMeta:
3605*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
3606*7c568831SAndroid Build Coastguard Worker * @atts: the attributes values
3607*7c568831SAndroid Build Coastguard Worker *
3608*7c568831SAndroid Build Coastguard Worker * Checks an attributes from a Meta tag
3609*7c568831SAndroid Build Coastguard Worker */
3610*7c568831SAndroid Build Coastguard Worker static void
htmlCheckMeta(htmlParserCtxtPtr ctxt,const xmlChar ** atts)3611*7c568831SAndroid Build Coastguard Worker htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
3612*7c568831SAndroid Build Coastguard Worker int i;
3613*7c568831SAndroid Build Coastguard Worker const xmlChar *att, *value;
3614*7c568831SAndroid Build Coastguard Worker int http = 0;
3615*7c568831SAndroid Build Coastguard Worker const xmlChar *content = NULL;
3616*7c568831SAndroid Build Coastguard Worker
3617*7c568831SAndroid Build Coastguard Worker if ((ctxt == NULL) || (atts == NULL))
3618*7c568831SAndroid Build Coastguard Worker return;
3619*7c568831SAndroid Build Coastguard Worker
3620*7c568831SAndroid Build Coastguard Worker i = 0;
3621*7c568831SAndroid Build Coastguard Worker att = atts[i++];
3622*7c568831SAndroid Build Coastguard Worker while (att != NULL) {
3623*7c568831SAndroid Build Coastguard Worker value = atts[i++];
3624*7c568831SAndroid Build Coastguard Worker if (value != NULL) {
3625*7c568831SAndroid Build Coastguard Worker if ((!xmlStrcasecmp(att, BAD_CAST "http-equiv")) &&
3626*7c568831SAndroid Build Coastguard Worker (!xmlStrcasecmp(value, BAD_CAST "Content-Type"))) {
3627*7c568831SAndroid Build Coastguard Worker http = 1;
3628*7c568831SAndroid Build Coastguard Worker } else if (!xmlStrcasecmp(att, BAD_CAST "charset")) {
3629*7c568831SAndroid Build Coastguard Worker xmlChar *copy;
3630*7c568831SAndroid Build Coastguard Worker
3631*7c568831SAndroid Build Coastguard Worker copy = xmlStrdup(value);
3632*7c568831SAndroid Build Coastguard Worker if (copy == NULL)
3633*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
3634*7c568831SAndroid Build Coastguard Worker xmlSetDeclaredEncoding(ctxt, copy);
3635*7c568831SAndroid Build Coastguard Worker } else if (!xmlStrcasecmp(att, BAD_CAST "content")) {
3636*7c568831SAndroid Build Coastguard Worker content = value;
3637*7c568831SAndroid Build Coastguard Worker }
3638*7c568831SAndroid Build Coastguard Worker }
3639*7c568831SAndroid Build Coastguard Worker att = atts[i++];
3640*7c568831SAndroid Build Coastguard Worker }
3641*7c568831SAndroid Build Coastguard Worker if ((http) && (content != NULL))
3642*7c568831SAndroid Build Coastguard Worker htmlCheckEncoding(ctxt, content);
3643*7c568831SAndroid Build Coastguard Worker
3644*7c568831SAndroid Build Coastguard Worker }
3645*7c568831SAndroid Build Coastguard Worker
3646*7c568831SAndroid Build Coastguard Worker /**
3647*7c568831SAndroid Build Coastguard Worker * htmlAttrHashInsert:
3648*7c568831SAndroid Build Coastguard Worker * @ctxt: parser context
3649*7c568831SAndroid Build Coastguard Worker * @size: size of the hash table
3650*7c568831SAndroid Build Coastguard Worker * @name: attribute name
3651*7c568831SAndroid Build Coastguard Worker * @hashValue: hash value of name
3652*7c568831SAndroid Build Coastguard Worker * @aindex: attribute index (this is a multiple of 5)
3653*7c568831SAndroid Build Coastguard Worker *
3654*7c568831SAndroid Build Coastguard Worker * Inserts a new attribute into the hash table.
3655*7c568831SAndroid Build Coastguard Worker *
3656*7c568831SAndroid Build Coastguard Worker * Returns INT_MAX if no existing attribute was found, the attribute
3657*7c568831SAndroid Build Coastguard Worker * index if an attribute was found, -1 if a memory allocation failed.
3658*7c568831SAndroid Build Coastguard Worker */
3659*7c568831SAndroid Build Coastguard Worker static int
htmlAttrHashInsert(xmlParserCtxtPtr ctxt,unsigned size,const xmlChar * name,unsigned hashValue,int aindex)3660*7c568831SAndroid Build Coastguard Worker htmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
3661*7c568831SAndroid Build Coastguard Worker unsigned hashValue, int aindex) {
3662*7c568831SAndroid Build Coastguard Worker xmlAttrHashBucket *table = ctxt->attrHash;
3663*7c568831SAndroid Build Coastguard Worker xmlAttrHashBucket *bucket;
3664*7c568831SAndroid Build Coastguard Worker unsigned hindex;
3665*7c568831SAndroid Build Coastguard Worker
3666*7c568831SAndroid Build Coastguard Worker hindex = hashValue & (size - 1);
3667*7c568831SAndroid Build Coastguard Worker bucket = &table[hindex];
3668*7c568831SAndroid Build Coastguard Worker
3669*7c568831SAndroid Build Coastguard Worker while (bucket->index >= 0) {
3670*7c568831SAndroid Build Coastguard Worker const xmlChar **atts = &ctxt->atts[bucket->index];
3671*7c568831SAndroid Build Coastguard Worker
3672*7c568831SAndroid Build Coastguard Worker if (name == atts[0])
3673*7c568831SAndroid Build Coastguard Worker return(bucket->index);
3674*7c568831SAndroid Build Coastguard Worker
3675*7c568831SAndroid Build Coastguard Worker hindex++;
3676*7c568831SAndroid Build Coastguard Worker bucket++;
3677*7c568831SAndroid Build Coastguard Worker if (hindex >= size) {
3678*7c568831SAndroid Build Coastguard Worker hindex = 0;
3679*7c568831SAndroid Build Coastguard Worker bucket = table;
3680*7c568831SAndroid Build Coastguard Worker }
3681*7c568831SAndroid Build Coastguard Worker }
3682*7c568831SAndroid Build Coastguard Worker
3683*7c568831SAndroid Build Coastguard Worker bucket->index = aindex;
3684*7c568831SAndroid Build Coastguard Worker
3685*7c568831SAndroid Build Coastguard Worker return(INT_MAX);
3686*7c568831SAndroid Build Coastguard Worker }
3687*7c568831SAndroid Build Coastguard Worker
3688*7c568831SAndroid Build Coastguard Worker /**
3689*7c568831SAndroid Build Coastguard Worker * htmlParseStartTag:
3690*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
3691*7c568831SAndroid Build Coastguard Worker *
3692*7c568831SAndroid Build Coastguard Worker * parse a start of tag either for rule element or
3693*7c568831SAndroid Build Coastguard Worker * EmptyElement. In both case we don't parse the tag closing chars.
3694*7c568831SAndroid Build Coastguard Worker *
3695*7c568831SAndroid Build Coastguard Worker * [40] STag ::= '<' Name (S Attribute)* S? '>'
3696*7c568831SAndroid Build Coastguard Worker *
3697*7c568831SAndroid Build Coastguard Worker * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
3698*7c568831SAndroid Build Coastguard Worker *
3699*7c568831SAndroid Build Coastguard Worker * With namespace:
3700*7c568831SAndroid Build Coastguard Worker *
3701*7c568831SAndroid Build Coastguard Worker * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
3702*7c568831SAndroid Build Coastguard Worker *
3703*7c568831SAndroid Build Coastguard Worker * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
3704*7c568831SAndroid Build Coastguard Worker *
3705*7c568831SAndroid Build Coastguard Worker * Returns 0 in case of success, -1 in case of error and 1 if discarded
3706*7c568831SAndroid Build Coastguard Worker */
3707*7c568831SAndroid Build Coastguard Worker
3708*7c568831SAndroid Build Coastguard Worker static void
htmlParseStartTag(htmlParserCtxtPtr ctxt)3709*7c568831SAndroid Build Coastguard Worker htmlParseStartTag(htmlParserCtxtPtr ctxt) {
3710*7c568831SAndroid Build Coastguard Worker const xmlChar *name;
3711*7c568831SAndroid Build Coastguard Worker const xmlChar *attname;
3712*7c568831SAndroid Build Coastguard Worker xmlChar *attvalue;
3713*7c568831SAndroid Build Coastguard Worker const xmlChar **atts;
3714*7c568831SAndroid Build Coastguard Worker int nbatts = 0;
3715*7c568831SAndroid Build Coastguard Worker int maxatts;
3716*7c568831SAndroid Build Coastguard Worker int meta = 0;
3717*7c568831SAndroid Build Coastguard Worker int i;
3718*7c568831SAndroid Build Coastguard Worker int discardtag = 0;
3719*7c568831SAndroid Build Coastguard Worker
3720*7c568831SAndroid Build Coastguard Worker ctxt->endCheckState = 0;
3721*7c568831SAndroid Build Coastguard Worker
3722*7c568831SAndroid Build Coastguard Worker SKIP(1);
3723*7c568831SAndroid Build Coastguard Worker
3724*7c568831SAndroid Build Coastguard Worker atts = ctxt->atts;
3725*7c568831SAndroid Build Coastguard Worker maxatts = ctxt->maxatts;
3726*7c568831SAndroid Build Coastguard Worker
3727*7c568831SAndroid Build Coastguard Worker GROW;
3728*7c568831SAndroid Build Coastguard Worker name = htmlParseHTMLName(ctxt, 0).name;
3729*7c568831SAndroid Build Coastguard Worker if (name == NULL)
3730*7c568831SAndroid Build Coastguard Worker return;
3731*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(name, BAD_CAST"meta"))
3732*7c568831SAndroid Build Coastguard Worker meta = 1;
3733*7c568831SAndroid Build Coastguard Worker
3734*7c568831SAndroid Build Coastguard Worker if ((ctxt->options & HTML_PARSE_HTML5) == 0) {
3735*7c568831SAndroid Build Coastguard Worker /*
3736*7c568831SAndroid Build Coastguard Worker * Check for auto-closure of HTML elements.
3737*7c568831SAndroid Build Coastguard Worker */
3738*7c568831SAndroid Build Coastguard Worker htmlAutoClose(ctxt, name);
3739*7c568831SAndroid Build Coastguard Worker
3740*7c568831SAndroid Build Coastguard Worker /*
3741*7c568831SAndroid Build Coastguard Worker * Check for implied HTML elements.
3742*7c568831SAndroid Build Coastguard Worker */
3743*7c568831SAndroid Build Coastguard Worker htmlCheckImplied(ctxt, name);
3744*7c568831SAndroid Build Coastguard Worker
3745*7c568831SAndroid Build Coastguard Worker /*
3746*7c568831SAndroid Build Coastguard Worker * Avoid html at any level > 0, head at any level != 1
3747*7c568831SAndroid Build Coastguard Worker * or any attempt to recurse body
3748*7c568831SAndroid Build Coastguard Worker */
3749*7c568831SAndroid Build Coastguard Worker if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {
3750*7c568831SAndroid Build Coastguard Worker htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3751*7c568831SAndroid Build Coastguard Worker "htmlParseStartTag: misplaced <html> tag\n",
3752*7c568831SAndroid Build Coastguard Worker name, NULL);
3753*7c568831SAndroid Build Coastguard Worker discardtag = 1;
3754*7c568831SAndroid Build Coastguard Worker ctxt->depth++;
3755*7c568831SAndroid Build Coastguard Worker }
3756*7c568831SAndroid Build Coastguard Worker if ((ctxt->nameNr != 1) &&
3757*7c568831SAndroid Build Coastguard Worker (xmlStrEqual(name, BAD_CAST"head"))) {
3758*7c568831SAndroid Build Coastguard Worker htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3759*7c568831SAndroid Build Coastguard Worker "htmlParseStartTag: misplaced <head> tag\n",
3760*7c568831SAndroid Build Coastguard Worker name, NULL);
3761*7c568831SAndroid Build Coastguard Worker discardtag = 1;
3762*7c568831SAndroid Build Coastguard Worker ctxt->depth++;
3763*7c568831SAndroid Build Coastguard Worker }
3764*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(name, BAD_CAST"body")) {
3765*7c568831SAndroid Build Coastguard Worker int indx;
3766*7c568831SAndroid Build Coastguard Worker for (indx = 0;indx < ctxt->nameNr;indx++) {
3767*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {
3768*7c568831SAndroid Build Coastguard Worker htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3769*7c568831SAndroid Build Coastguard Worker "htmlParseStartTag: misplaced <body> tag\n",
3770*7c568831SAndroid Build Coastguard Worker name, NULL);
3771*7c568831SAndroid Build Coastguard Worker discardtag = 1;
3772*7c568831SAndroid Build Coastguard Worker ctxt->depth++;
3773*7c568831SAndroid Build Coastguard Worker }
3774*7c568831SAndroid Build Coastguard Worker }
3775*7c568831SAndroid Build Coastguard Worker }
3776*7c568831SAndroid Build Coastguard Worker }
3777*7c568831SAndroid Build Coastguard Worker
3778*7c568831SAndroid Build Coastguard Worker /*
3779*7c568831SAndroid Build Coastguard Worker * Now parse the attributes, it ends up with the ending
3780*7c568831SAndroid Build Coastguard Worker *
3781*7c568831SAndroid Build Coastguard Worker * (S Attribute)* S?
3782*7c568831SAndroid Build Coastguard Worker */
3783*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
3784*7c568831SAndroid Build Coastguard Worker while ((ctxt->input->cur < ctxt->input->end) &&
3785*7c568831SAndroid Build Coastguard Worker (CUR != '>') &&
3786*7c568831SAndroid Build Coastguard Worker ((CUR != '/') || (NXT(1) != '>')) &&
3787*7c568831SAndroid Build Coastguard Worker (PARSER_STOPPED(ctxt) == 0)) {
3788*7c568831SAndroid Build Coastguard Worker xmlHashedString hattname;
3789*7c568831SAndroid Build Coastguard Worker
3790*7c568831SAndroid Build Coastguard Worker /* unexpected-solidus-in-tag */
3791*7c568831SAndroid Build Coastguard Worker if (CUR == '/') {
3792*7c568831SAndroid Build Coastguard Worker SKIP(1);
3793*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
3794*7c568831SAndroid Build Coastguard Worker continue;
3795*7c568831SAndroid Build Coastguard Worker }
3796*7c568831SAndroid Build Coastguard Worker GROW;
3797*7c568831SAndroid Build Coastguard Worker hattname = htmlParseAttribute(ctxt, &attvalue);
3798*7c568831SAndroid Build Coastguard Worker attname = hattname.name;
3799*7c568831SAndroid Build Coastguard Worker
3800*7c568831SAndroid Build Coastguard Worker if (attname != NULL) {
3801*7c568831SAndroid Build Coastguard Worker /*
3802*7c568831SAndroid Build Coastguard Worker * Add the pair to atts
3803*7c568831SAndroid Build Coastguard Worker */
3804*7c568831SAndroid Build Coastguard Worker if (nbatts + 4 > maxatts) {
3805*7c568831SAndroid Build Coastguard Worker const xmlChar **tmp;
3806*7c568831SAndroid Build Coastguard Worker unsigned *utmp;
3807*7c568831SAndroid Build Coastguard Worker size_t newSize = maxatts ? maxatts * 2 : 22;
3808*7c568831SAndroid Build Coastguard Worker
3809*7c568831SAndroid Build Coastguard Worker tmp = xmlMalloc(newSize * sizeof(tmp[0]));
3810*7c568831SAndroid Build Coastguard Worker if (tmp == NULL) {
3811*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
3812*7c568831SAndroid Build Coastguard Worker if (attvalue != NULL)
3813*7c568831SAndroid Build Coastguard Worker xmlFree(attvalue);
3814*7c568831SAndroid Build Coastguard Worker goto failed;
3815*7c568831SAndroid Build Coastguard Worker }
3816*7c568831SAndroid Build Coastguard Worker
3817*7c568831SAndroid Build Coastguard Worker utmp = xmlRealloc(ctxt->attallocs,
3818*7c568831SAndroid Build Coastguard Worker newSize / 2 * sizeof(utmp[0]));
3819*7c568831SAndroid Build Coastguard Worker if (utmp == NULL) {
3820*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
3821*7c568831SAndroid Build Coastguard Worker if (attvalue != NULL)
3822*7c568831SAndroid Build Coastguard Worker xmlFree(attvalue);
3823*7c568831SAndroid Build Coastguard Worker xmlFree(tmp);
3824*7c568831SAndroid Build Coastguard Worker goto failed;
3825*7c568831SAndroid Build Coastguard Worker }
3826*7c568831SAndroid Build Coastguard Worker
3827*7c568831SAndroid Build Coastguard Worker if (maxatts > 0)
3828*7c568831SAndroid Build Coastguard Worker memcpy(tmp, atts, maxatts * sizeof(tmp[0]));
3829*7c568831SAndroid Build Coastguard Worker xmlFree(atts);
3830*7c568831SAndroid Build Coastguard Worker
3831*7c568831SAndroid Build Coastguard Worker atts = tmp;
3832*7c568831SAndroid Build Coastguard Worker maxatts = newSize;
3833*7c568831SAndroid Build Coastguard Worker ctxt->atts = atts;
3834*7c568831SAndroid Build Coastguard Worker ctxt->attallocs = utmp;
3835*7c568831SAndroid Build Coastguard Worker ctxt->maxatts = maxatts;
3836*7c568831SAndroid Build Coastguard Worker }
3837*7c568831SAndroid Build Coastguard Worker
3838*7c568831SAndroid Build Coastguard Worker ctxt->attallocs[nbatts/2] = hattname.hashValue;
3839*7c568831SAndroid Build Coastguard Worker atts[nbatts++] = attname;
3840*7c568831SAndroid Build Coastguard Worker atts[nbatts++] = attvalue;
3841*7c568831SAndroid Build Coastguard Worker }
3842*7c568831SAndroid Build Coastguard Worker else {
3843*7c568831SAndroid Build Coastguard Worker if (attvalue != NULL)
3844*7c568831SAndroid Build Coastguard Worker xmlFree(attvalue);
3845*7c568831SAndroid Build Coastguard Worker }
3846*7c568831SAndroid Build Coastguard Worker
3847*7c568831SAndroid Build Coastguard Worker failed:
3848*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
3849*7c568831SAndroid Build Coastguard Worker }
3850*7c568831SAndroid Build Coastguard Worker
3851*7c568831SAndroid Build Coastguard Worker if (ctxt->input->cur >= ctxt->input->end) {
3852*7c568831SAndroid Build Coastguard Worker discardtag = 1;
3853*7c568831SAndroid Build Coastguard Worker goto done;
3854*7c568831SAndroid Build Coastguard Worker }
3855*7c568831SAndroid Build Coastguard Worker
3856*7c568831SAndroid Build Coastguard Worker /*
3857*7c568831SAndroid Build Coastguard Worker * Verify that attribute names are unique.
3858*7c568831SAndroid Build Coastguard Worker */
3859*7c568831SAndroid Build Coastguard Worker if (nbatts > 2) {
3860*7c568831SAndroid Build Coastguard Worker unsigned attrHashSize;
3861*7c568831SAndroid Build Coastguard Worker int j, k;
3862*7c568831SAndroid Build Coastguard Worker
3863*7c568831SAndroid Build Coastguard Worker attrHashSize = 4;
3864*7c568831SAndroid Build Coastguard Worker while (attrHashSize / 2 < (unsigned) nbatts / 2)
3865*7c568831SAndroid Build Coastguard Worker attrHashSize *= 2;
3866*7c568831SAndroid Build Coastguard Worker
3867*7c568831SAndroid Build Coastguard Worker if (attrHashSize > ctxt->attrHashMax) {
3868*7c568831SAndroid Build Coastguard Worker xmlAttrHashBucket *tmp;
3869*7c568831SAndroid Build Coastguard Worker
3870*7c568831SAndroid Build Coastguard Worker tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
3871*7c568831SAndroid Build Coastguard Worker if (tmp == NULL) {
3872*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
3873*7c568831SAndroid Build Coastguard Worker goto done;
3874*7c568831SAndroid Build Coastguard Worker }
3875*7c568831SAndroid Build Coastguard Worker
3876*7c568831SAndroid Build Coastguard Worker ctxt->attrHash = tmp;
3877*7c568831SAndroid Build Coastguard Worker ctxt->attrHashMax = attrHashSize;
3878*7c568831SAndroid Build Coastguard Worker }
3879*7c568831SAndroid Build Coastguard Worker
3880*7c568831SAndroid Build Coastguard Worker memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
3881*7c568831SAndroid Build Coastguard Worker
3882*7c568831SAndroid Build Coastguard Worker for (i = 0, j = 0, k = 0; i < nbatts; i += 2, k++) {
3883*7c568831SAndroid Build Coastguard Worker unsigned hashValue;
3884*7c568831SAndroid Build Coastguard Worker int res;
3885*7c568831SAndroid Build Coastguard Worker
3886*7c568831SAndroid Build Coastguard Worker attname = atts[i];
3887*7c568831SAndroid Build Coastguard Worker hashValue = ctxt->attallocs[k] | 0x80000000;
3888*7c568831SAndroid Build Coastguard Worker
3889*7c568831SAndroid Build Coastguard Worker res = htmlAttrHashInsert(ctxt, attrHashSize, attname,
3890*7c568831SAndroid Build Coastguard Worker hashValue, j);
3891*7c568831SAndroid Build Coastguard Worker if (res < 0)
3892*7c568831SAndroid Build Coastguard Worker continue;
3893*7c568831SAndroid Build Coastguard Worker
3894*7c568831SAndroid Build Coastguard Worker if (res == INT_MAX) {
3895*7c568831SAndroid Build Coastguard Worker atts[j] = atts[i];
3896*7c568831SAndroid Build Coastguard Worker atts[j+1] = atts[i+1];
3897*7c568831SAndroid Build Coastguard Worker j += 2;
3898*7c568831SAndroid Build Coastguard Worker } else {
3899*7c568831SAndroid Build Coastguard Worker xmlFree((xmlChar *) atts[i+1]);
3900*7c568831SAndroid Build Coastguard Worker }
3901*7c568831SAndroid Build Coastguard Worker }
3902*7c568831SAndroid Build Coastguard Worker
3903*7c568831SAndroid Build Coastguard Worker nbatts = j;
3904*7c568831SAndroid Build Coastguard Worker }
3905*7c568831SAndroid Build Coastguard Worker
3906*7c568831SAndroid Build Coastguard Worker if (nbatts > 0) {
3907*7c568831SAndroid Build Coastguard Worker atts[nbatts] = NULL;
3908*7c568831SAndroid Build Coastguard Worker atts[nbatts + 1] = NULL;
3909*7c568831SAndroid Build Coastguard Worker
3910*7c568831SAndroid Build Coastguard Worker /*
3911*7c568831SAndroid Build Coastguard Worker * Handle specific association to the META tag
3912*7c568831SAndroid Build Coastguard Worker */
3913*7c568831SAndroid Build Coastguard Worker if (meta)
3914*7c568831SAndroid Build Coastguard Worker htmlCheckMeta(ctxt, atts);
3915*7c568831SAndroid Build Coastguard Worker }
3916*7c568831SAndroid Build Coastguard Worker
3917*7c568831SAndroid Build Coastguard Worker /*
3918*7c568831SAndroid Build Coastguard Worker * SAX: Start of Element !
3919*7c568831SAndroid Build Coastguard Worker */
3920*7c568831SAndroid Build Coastguard Worker if (!discardtag) {
3921*7c568831SAndroid Build Coastguard Worker if (ctxt->options & HTML_PARSE_HTML5) {
3922*7c568831SAndroid Build Coastguard Worker if (ctxt->nameNr > 0)
3923*7c568831SAndroid Build Coastguard Worker htmlnamePop(ctxt);
3924*7c568831SAndroid Build Coastguard Worker }
3925*7c568831SAndroid Build Coastguard Worker
3926*7c568831SAndroid Build Coastguard Worker htmlnamePush(ctxt, name);
3927*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
3928*7c568831SAndroid Build Coastguard Worker if (nbatts != 0)
3929*7c568831SAndroid Build Coastguard Worker ctxt->sax->startElement(ctxt->userData, name, atts);
3930*7c568831SAndroid Build Coastguard Worker else
3931*7c568831SAndroid Build Coastguard Worker ctxt->sax->startElement(ctxt->userData, name, NULL);
3932*7c568831SAndroid Build Coastguard Worker }
3933*7c568831SAndroid Build Coastguard Worker }
3934*7c568831SAndroid Build Coastguard Worker
3935*7c568831SAndroid Build Coastguard Worker done:
3936*7c568831SAndroid Build Coastguard Worker if (atts != NULL) {
3937*7c568831SAndroid Build Coastguard Worker for (i = 1;i < nbatts;i += 2) {
3938*7c568831SAndroid Build Coastguard Worker if (atts[i] != NULL)
3939*7c568831SAndroid Build Coastguard Worker xmlFree((xmlChar *) atts[i]);
3940*7c568831SAndroid Build Coastguard Worker }
3941*7c568831SAndroid Build Coastguard Worker }
3942*7c568831SAndroid Build Coastguard Worker }
3943*7c568831SAndroid Build Coastguard Worker
3944*7c568831SAndroid Build Coastguard Worker /**
3945*7c568831SAndroid Build Coastguard Worker * htmlParseEndTag:
3946*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
3947*7c568831SAndroid Build Coastguard Worker *
3948*7c568831SAndroid Build Coastguard Worker * parse an end of tag
3949*7c568831SAndroid Build Coastguard Worker *
3950*7c568831SAndroid Build Coastguard Worker * [42] ETag ::= '</' Name S? '>'
3951*7c568831SAndroid Build Coastguard Worker *
3952*7c568831SAndroid Build Coastguard Worker * With namespace
3953*7c568831SAndroid Build Coastguard Worker *
3954*7c568831SAndroid Build Coastguard Worker * [NS 9] ETag ::= '</' QName S? '>'
3955*7c568831SAndroid Build Coastguard Worker *
3956*7c568831SAndroid Build Coastguard Worker * Returns 1 if the current level should be closed.
3957*7c568831SAndroid Build Coastguard Worker */
3958*7c568831SAndroid Build Coastguard Worker
3959*7c568831SAndroid Build Coastguard Worker static void
htmlParseEndTag(htmlParserCtxtPtr ctxt)3960*7c568831SAndroid Build Coastguard Worker htmlParseEndTag(htmlParserCtxtPtr ctxt)
3961*7c568831SAndroid Build Coastguard Worker {
3962*7c568831SAndroid Build Coastguard Worker const xmlChar *name;
3963*7c568831SAndroid Build Coastguard Worker const xmlChar *oldname;
3964*7c568831SAndroid Build Coastguard Worker int i;
3965*7c568831SAndroid Build Coastguard Worker
3966*7c568831SAndroid Build Coastguard Worker ctxt->endCheckState = 0;
3967*7c568831SAndroid Build Coastguard Worker
3968*7c568831SAndroid Build Coastguard Worker SKIP(2);
3969*7c568831SAndroid Build Coastguard Worker
3970*7c568831SAndroid Build Coastguard Worker if (CUR == '>') {
3971*7c568831SAndroid Build Coastguard Worker SKIP(1);
3972*7c568831SAndroid Build Coastguard Worker return;
3973*7c568831SAndroid Build Coastguard Worker }
3974*7c568831SAndroid Build Coastguard Worker
3975*7c568831SAndroid Build Coastguard Worker if (!IS_ASCII_LETTER(CUR)) {
3976*7c568831SAndroid Build Coastguard Worker htmlParseComment(ctxt, /* bogus */ 1);
3977*7c568831SAndroid Build Coastguard Worker return;
3978*7c568831SAndroid Build Coastguard Worker }
3979*7c568831SAndroid Build Coastguard Worker
3980*7c568831SAndroid Build Coastguard Worker name = htmlParseHTMLName(ctxt, 0).name;
3981*7c568831SAndroid Build Coastguard Worker if (name == NULL)
3982*7c568831SAndroid Build Coastguard Worker return;
3983*7c568831SAndroid Build Coastguard Worker
3984*7c568831SAndroid Build Coastguard Worker /*
3985*7c568831SAndroid Build Coastguard Worker * Parse and ignore attributes.
3986*7c568831SAndroid Build Coastguard Worker */
3987*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
3988*7c568831SAndroid Build Coastguard Worker while ((ctxt->input->cur < ctxt->input->end) &&
3989*7c568831SAndroid Build Coastguard Worker (CUR != '>') &&
3990*7c568831SAndroid Build Coastguard Worker ((CUR != '/') || (NXT(1) != '>')) &&
3991*7c568831SAndroid Build Coastguard Worker (ctxt->instate != XML_PARSER_EOF)) {
3992*7c568831SAndroid Build Coastguard Worker xmlChar *attvalue = NULL;
3993*7c568831SAndroid Build Coastguard Worker
3994*7c568831SAndroid Build Coastguard Worker /* unexpected-solidus-in-tag */
3995*7c568831SAndroid Build Coastguard Worker if (CUR == '/') {
3996*7c568831SAndroid Build Coastguard Worker SKIP(1);
3997*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
3998*7c568831SAndroid Build Coastguard Worker continue;
3999*7c568831SAndroid Build Coastguard Worker }
4000*7c568831SAndroid Build Coastguard Worker GROW;
4001*7c568831SAndroid Build Coastguard Worker htmlParseAttribute(ctxt, &attvalue);
4002*7c568831SAndroid Build Coastguard Worker if (attvalue != NULL)
4003*7c568831SAndroid Build Coastguard Worker xmlFree(attvalue);
4004*7c568831SAndroid Build Coastguard Worker
4005*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
4006*7c568831SAndroid Build Coastguard Worker }
4007*7c568831SAndroid Build Coastguard Worker
4008*7c568831SAndroid Build Coastguard Worker if (CUR == '>') {
4009*7c568831SAndroid Build Coastguard Worker SKIP(1);
4010*7c568831SAndroid Build Coastguard Worker } else if ((CUR == '/') && (NXT(1) == '>')) {
4011*7c568831SAndroid Build Coastguard Worker SKIP(2);
4012*7c568831SAndroid Build Coastguard Worker } else {
4013*7c568831SAndroid Build Coastguard Worker return;
4014*7c568831SAndroid Build Coastguard Worker }
4015*7c568831SAndroid Build Coastguard Worker
4016*7c568831SAndroid Build Coastguard Worker if (ctxt->options & HTML_PARSE_HTML5) {
4017*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4018*7c568831SAndroid Build Coastguard Worker ctxt->sax->endElement(ctxt->userData, name);
4019*7c568831SAndroid Build Coastguard Worker return;
4020*7c568831SAndroid Build Coastguard Worker }
4021*7c568831SAndroid Build Coastguard Worker
4022*7c568831SAndroid Build Coastguard Worker /*
4023*7c568831SAndroid Build Coastguard Worker * if we ignored misplaced tags in htmlParseStartTag don't pop them
4024*7c568831SAndroid Build Coastguard Worker * out now.
4025*7c568831SAndroid Build Coastguard Worker */
4026*7c568831SAndroid Build Coastguard Worker if ((ctxt->depth > 0) &&
4027*7c568831SAndroid Build Coastguard Worker (xmlStrEqual(name, BAD_CAST "html") ||
4028*7c568831SAndroid Build Coastguard Worker xmlStrEqual(name, BAD_CAST "body") ||
4029*7c568831SAndroid Build Coastguard Worker xmlStrEqual(name, BAD_CAST "head"))) {
4030*7c568831SAndroid Build Coastguard Worker ctxt->depth--;
4031*7c568831SAndroid Build Coastguard Worker return;
4032*7c568831SAndroid Build Coastguard Worker }
4033*7c568831SAndroid Build Coastguard Worker
4034*7c568831SAndroid Build Coastguard Worker /*
4035*7c568831SAndroid Build Coastguard Worker * If the name read is not one of the element in the parsing stack
4036*7c568831SAndroid Build Coastguard Worker * then return, it's just an error.
4037*7c568831SAndroid Build Coastguard Worker */
4038*7c568831SAndroid Build Coastguard Worker for (i = (ctxt->nameNr - 1); i >= 0; i--) {
4039*7c568831SAndroid Build Coastguard Worker if (xmlStrEqual(name, ctxt->nameTab[i]))
4040*7c568831SAndroid Build Coastguard Worker break;
4041*7c568831SAndroid Build Coastguard Worker }
4042*7c568831SAndroid Build Coastguard Worker if (i < 0) {
4043*7c568831SAndroid Build Coastguard Worker htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
4044*7c568831SAndroid Build Coastguard Worker "Unexpected end tag : %s\n", name, NULL);
4045*7c568831SAndroid Build Coastguard Worker return;
4046*7c568831SAndroid Build Coastguard Worker }
4047*7c568831SAndroid Build Coastguard Worker
4048*7c568831SAndroid Build Coastguard Worker
4049*7c568831SAndroid Build Coastguard Worker /*
4050*7c568831SAndroid Build Coastguard Worker * Check for auto-closure of HTML elements.
4051*7c568831SAndroid Build Coastguard Worker */
4052*7c568831SAndroid Build Coastguard Worker
4053*7c568831SAndroid Build Coastguard Worker htmlAutoCloseOnClose(ctxt, name);
4054*7c568831SAndroid Build Coastguard Worker
4055*7c568831SAndroid Build Coastguard Worker /*
4056*7c568831SAndroid Build Coastguard Worker * Well formedness constraints, opening and closing must match.
4057*7c568831SAndroid Build Coastguard Worker * With the exception that the autoclose may have popped stuff out
4058*7c568831SAndroid Build Coastguard Worker * of the stack.
4059*7c568831SAndroid Build Coastguard Worker */
4060*7c568831SAndroid Build Coastguard Worker if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) {
4061*7c568831SAndroid Build Coastguard Worker htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
4062*7c568831SAndroid Build Coastguard Worker "Opening and ending tag mismatch: %s and %s\n",
4063*7c568831SAndroid Build Coastguard Worker name, ctxt->name);
4064*7c568831SAndroid Build Coastguard Worker }
4065*7c568831SAndroid Build Coastguard Worker
4066*7c568831SAndroid Build Coastguard Worker /*
4067*7c568831SAndroid Build Coastguard Worker * SAX: End of Tag
4068*7c568831SAndroid Build Coastguard Worker */
4069*7c568831SAndroid Build Coastguard Worker oldname = ctxt->name;
4070*7c568831SAndroid Build Coastguard Worker if ((oldname != NULL) && (xmlStrEqual(oldname, name))) {
4071*7c568831SAndroid Build Coastguard Worker htmlParserFinishElementParsing(ctxt);
4072*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4073*7c568831SAndroid Build Coastguard Worker ctxt->sax->endElement(ctxt->userData, name);
4074*7c568831SAndroid Build Coastguard Worker htmlnamePop(ctxt);
4075*7c568831SAndroid Build Coastguard Worker }
4076*7c568831SAndroid Build Coastguard Worker }
4077*7c568831SAndroid Build Coastguard Worker
4078*7c568831SAndroid Build Coastguard Worker /**
4079*7c568831SAndroid Build Coastguard Worker * htmlParseContent:
4080*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
4081*7c568831SAndroid Build Coastguard Worker *
4082*7c568831SAndroid Build Coastguard Worker * Parse a content: comment, sub-element, reference or text.
4083*7c568831SAndroid Build Coastguard Worker * New version for non recursive htmlParseElementInternal
4084*7c568831SAndroid Build Coastguard Worker */
4085*7c568831SAndroid Build Coastguard Worker
4086*7c568831SAndroid Build Coastguard Worker static void
htmlParseContent(htmlParserCtxtPtr ctxt)4087*7c568831SAndroid Build Coastguard Worker htmlParseContent(htmlParserCtxtPtr ctxt) {
4088*7c568831SAndroid Build Coastguard Worker while ((PARSER_STOPPED(ctxt) == 0) &&
4089*7c568831SAndroid Build Coastguard Worker (ctxt->input->cur < ctxt->input->end)) {
4090*7c568831SAndroid Build Coastguard Worker int mode;
4091*7c568831SAndroid Build Coastguard Worker
4092*7c568831SAndroid Build Coastguard Worker GROW;
4093*7c568831SAndroid Build Coastguard Worker mode = ctxt->endCheckState;
4094*7c568831SAndroid Build Coastguard Worker
4095*7c568831SAndroid Build Coastguard Worker if ((mode == 0) && (CUR == '<')) {
4096*7c568831SAndroid Build Coastguard Worker if (NXT(1) == '/') {
4097*7c568831SAndroid Build Coastguard Worker htmlParseEndTag(ctxt);
4098*7c568831SAndroid Build Coastguard Worker } else if (NXT(1) == '!') {
4099*7c568831SAndroid Build Coastguard Worker /*
4100*7c568831SAndroid Build Coastguard Worker * Sometimes DOCTYPE arrives in the middle of the document
4101*7c568831SAndroid Build Coastguard Worker */
4102*7c568831SAndroid Build Coastguard Worker if ((UPP(2) == 'D') && (UPP(3) == 'O') &&
4103*7c568831SAndroid Build Coastguard Worker (UPP(4) == 'C') && (UPP(5) == 'T') &&
4104*7c568831SAndroid Build Coastguard Worker (UPP(6) == 'Y') && (UPP(7) == 'P') &&
4105*7c568831SAndroid Build Coastguard Worker (UPP(8) == 'E')) {
4106*7c568831SAndroid Build Coastguard Worker htmlParseDocTypeDecl(ctxt);
4107*7c568831SAndroid Build Coastguard Worker } else if ((NXT(2) == '-') && (NXT(3) == '-')) {
4108*7c568831SAndroid Build Coastguard Worker SKIP(4);
4109*7c568831SAndroid Build Coastguard Worker htmlParseComment(ctxt, /* bogus */ 0);
4110*7c568831SAndroid Build Coastguard Worker } else {
4111*7c568831SAndroid Build Coastguard Worker SKIP(2);
4112*7c568831SAndroid Build Coastguard Worker htmlParseComment(ctxt, /* bogus */ 1);
4113*7c568831SAndroid Build Coastguard Worker }
4114*7c568831SAndroid Build Coastguard Worker } else if (NXT(1) == '?') {
4115*7c568831SAndroid Build Coastguard Worker SKIP(1);
4116*7c568831SAndroid Build Coastguard Worker htmlParseComment(ctxt, /* bogus */ 1);
4117*7c568831SAndroid Build Coastguard Worker } else if (IS_ASCII_LETTER(NXT(1))) {
4118*7c568831SAndroid Build Coastguard Worker htmlParseElementInternal(ctxt);
4119*7c568831SAndroid Build Coastguard Worker } else {
4120*7c568831SAndroid Build Coastguard Worker htmlCheckParagraph(ctxt);
4121*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4122*7c568831SAndroid Build Coastguard Worker (ctxt->sax->characters != NULL))
4123*7c568831SAndroid Build Coastguard Worker ctxt->sax->characters(ctxt->userData, BAD_CAST "<", 1);
4124*7c568831SAndroid Build Coastguard Worker SKIP(1);
4125*7c568831SAndroid Build Coastguard Worker }
4126*7c568831SAndroid Build Coastguard Worker } else {
4127*7c568831SAndroid Build Coastguard Worker htmlParseCharData(ctxt);
4128*7c568831SAndroid Build Coastguard Worker }
4129*7c568831SAndroid Build Coastguard Worker
4130*7c568831SAndroid Build Coastguard Worker SHRINK;
4131*7c568831SAndroid Build Coastguard Worker GROW;
4132*7c568831SAndroid Build Coastguard Worker }
4133*7c568831SAndroid Build Coastguard Worker
4134*7c568831SAndroid Build Coastguard Worker if (ctxt->input->cur >= ctxt->input->end)
4135*7c568831SAndroid Build Coastguard Worker htmlAutoCloseOnEnd(ctxt);
4136*7c568831SAndroid Build Coastguard Worker }
4137*7c568831SAndroid Build Coastguard Worker
4138*7c568831SAndroid Build Coastguard Worker /**
4139*7c568831SAndroid Build Coastguard Worker * htmlParseElementInternal:
4140*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
4141*7c568831SAndroid Build Coastguard Worker *
4142*7c568831SAndroid Build Coastguard Worker * parse an HTML element, new version, non recursive
4143*7c568831SAndroid Build Coastguard Worker *
4144*7c568831SAndroid Build Coastguard Worker * [39] element ::= EmptyElemTag | STag content ETag
4145*7c568831SAndroid Build Coastguard Worker *
4146*7c568831SAndroid Build Coastguard Worker * [41] Attribute ::= Name Eq AttValue
4147*7c568831SAndroid Build Coastguard Worker */
4148*7c568831SAndroid Build Coastguard Worker
4149*7c568831SAndroid Build Coastguard Worker static int
htmlParseElementInternal(htmlParserCtxtPtr ctxt)4150*7c568831SAndroid Build Coastguard Worker htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
4151*7c568831SAndroid Build Coastguard Worker const xmlChar *name;
4152*7c568831SAndroid Build Coastguard Worker const htmlElemDesc * info;
4153*7c568831SAndroid Build Coastguard Worker htmlParserNodeInfo node_info = { NULL, 0, 0, 0, 0 };
4154*7c568831SAndroid Build Coastguard Worker
4155*7c568831SAndroid Build Coastguard Worker if ((ctxt == NULL) || (ctxt->input == NULL))
4156*7c568831SAndroid Build Coastguard Worker return(0);
4157*7c568831SAndroid Build Coastguard Worker
4158*7c568831SAndroid Build Coastguard Worker /* Capture start position */
4159*7c568831SAndroid Build Coastguard Worker if (ctxt->record_info) {
4160*7c568831SAndroid Build Coastguard Worker node_info.begin_pos = ctxt->input->consumed +
4161*7c568831SAndroid Build Coastguard Worker (CUR_PTR - ctxt->input->base);
4162*7c568831SAndroid Build Coastguard Worker node_info.begin_line = ctxt->input->line;
4163*7c568831SAndroid Build Coastguard Worker }
4164*7c568831SAndroid Build Coastguard Worker
4165*7c568831SAndroid Build Coastguard Worker htmlParseStartTag(ctxt);
4166*7c568831SAndroid Build Coastguard Worker name = ctxt->name;
4167*7c568831SAndroid Build Coastguard Worker if (name == NULL)
4168*7c568831SAndroid Build Coastguard Worker return(0);
4169*7c568831SAndroid Build Coastguard Worker
4170*7c568831SAndroid Build Coastguard Worker if (ctxt->record_info)
4171*7c568831SAndroid Build Coastguard Worker htmlNodeInfoPush(ctxt, &node_info);
4172*7c568831SAndroid Build Coastguard Worker
4173*7c568831SAndroid Build Coastguard Worker /*
4174*7c568831SAndroid Build Coastguard Worker * Check for an Empty Element labeled the XML/SGML way
4175*7c568831SAndroid Build Coastguard Worker */
4176*7c568831SAndroid Build Coastguard Worker if ((CUR == '/') && (NXT(1) == '>')) {
4177*7c568831SAndroid Build Coastguard Worker SKIP(2);
4178*7c568831SAndroid Build Coastguard Worker htmlParserFinishElementParsing(ctxt);
4179*7c568831SAndroid Build Coastguard Worker if ((ctxt->options & HTML_PARSE_HTML5) == 0) {
4180*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4181*7c568831SAndroid Build Coastguard Worker ctxt->sax->endElement(ctxt->userData, name);
4182*7c568831SAndroid Build Coastguard Worker }
4183*7c568831SAndroid Build Coastguard Worker htmlnamePop(ctxt);
4184*7c568831SAndroid Build Coastguard Worker return(0);
4185*7c568831SAndroid Build Coastguard Worker }
4186*7c568831SAndroid Build Coastguard Worker
4187*7c568831SAndroid Build Coastguard Worker if (CUR != '>')
4188*7c568831SAndroid Build Coastguard Worker return(0);
4189*7c568831SAndroid Build Coastguard Worker SKIP(1);
4190*7c568831SAndroid Build Coastguard Worker
4191*7c568831SAndroid Build Coastguard Worker /*
4192*7c568831SAndroid Build Coastguard Worker * Lookup the info for that element.
4193*7c568831SAndroid Build Coastguard Worker */
4194*7c568831SAndroid Build Coastguard Worker info = htmlTagLookup(name);
4195*7c568831SAndroid Build Coastguard Worker
4196*7c568831SAndroid Build Coastguard Worker /*
4197*7c568831SAndroid Build Coastguard Worker * Check for an Empty Element from DTD definition
4198*7c568831SAndroid Build Coastguard Worker */
4199*7c568831SAndroid Build Coastguard Worker if ((info != NULL) && (info->empty)) {
4200*7c568831SAndroid Build Coastguard Worker htmlParserFinishElementParsing(ctxt);
4201*7c568831SAndroid Build Coastguard Worker if ((ctxt->options & HTML_PARSE_HTML5) == 0) {
4202*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4203*7c568831SAndroid Build Coastguard Worker ctxt->sax->endElement(ctxt->userData, name);
4204*7c568831SAndroid Build Coastguard Worker }
4205*7c568831SAndroid Build Coastguard Worker htmlnamePop(ctxt);
4206*7c568831SAndroid Build Coastguard Worker return(0);
4207*7c568831SAndroid Build Coastguard Worker }
4208*7c568831SAndroid Build Coastguard Worker
4209*7c568831SAndroid Build Coastguard Worker if (info != NULL)
4210*7c568831SAndroid Build Coastguard Worker ctxt->endCheckState = info->dataMode;
4211*7c568831SAndroid Build Coastguard Worker
4212*7c568831SAndroid Build Coastguard Worker return(1);
4213*7c568831SAndroid Build Coastguard Worker }
4214*7c568831SAndroid Build Coastguard Worker
4215*7c568831SAndroid Build Coastguard Worker /**
4216*7c568831SAndroid Build Coastguard Worker * htmlParseElement:
4217*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
4218*7c568831SAndroid Build Coastguard Worker *
4219*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Internal function, don't use.
4220*7c568831SAndroid Build Coastguard Worker *
4221*7c568831SAndroid Build Coastguard Worker * parse an HTML element, this is highly recursive
4222*7c568831SAndroid Build Coastguard Worker * this is kept for compatibility with previous code versions
4223*7c568831SAndroid Build Coastguard Worker *
4224*7c568831SAndroid Build Coastguard Worker * [39] element ::= EmptyElemTag | STag content ETag
4225*7c568831SAndroid Build Coastguard Worker *
4226*7c568831SAndroid Build Coastguard Worker * [41] Attribute ::= Name Eq AttValue
4227*7c568831SAndroid Build Coastguard Worker */
4228*7c568831SAndroid Build Coastguard Worker
4229*7c568831SAndroid Build Coastguard Worker void
htmlParseElement(htmlParserCtxtPtr ctxt)4230*7c568831SAndroid Build Coastguard Worker htmlParseElement(htmlParserCtxtPtr ctxt) {
4231*7c568831SAndroid Build Coastguard Worker const xmlChar *oldptr;
4232*7c568831SAndroid Build Coastguard Worker int depth;
4233*7c568831SAndroid Build Coastguard Worker
4234*7c568831SAndroid Build Coastguard Worker if ((ctxt == NULL) || (ctxt->input == NULL))
4235*7c568831SAndroid Build Coastguard Worker return;
4236*7c568831SAndroid Build Coastguard Worker
4237*7c568831SAndroid Build Coastguard Worker if (htmlParseElementInternal(ctxt) == 0)
4238*7c568831SAndroid Build Coastguard Worker return;
4239*7c568831SAndroid Build Coastguard Worker
4240*7c568831SAndroid Build Coastguard Worker /*
4241*7c568831SAndroid Build Coastguard Worker * Parse the content of the element:
4242*7c568831SAndroid Build Coastguard Worker */
4243*7c568831SAndroid Build Coastguard Worker depth = ctxt->nameNr;
4244*7c568831SAndroid Build Coastguard Worker while (CUR != 0) {
4245*7c568831SAndroid Build Coastguard Worker oldptr = ctxt->input->cur;
4246*7c568831SAndroid Build Coastguard Worker htmlParseContent(ctxt);
4247*7c568831SAndroid Build Coastguard Worker if (oldptr==ctxt->input->cur) break;
4248*7c568831SAndroid Build Coastguard Worker if (ctxt->nameNr < depth) break;
4249*7c568831SAndroid Build Coastguard Worker }
4250*7c568831SAndroid Build Coastguard Worker
4251*7c568831SAndroid Build Coastguard Worker if (CUR == 0) {
4252*7c568831SAndroid Build Coastguard Worker htmlAutoCloseOnEnd(ctxt);
4253*7c568831SAndroid Build Coastguard Worker }
4254*7c568831SAndroid Build Coastguard Worker }
4255*7c568831SAndroid Build Coastguard Worker
4256*7c568831SAndroid Build Coastguard Worker xmlNodePtr
htmlCtxtParseContentInternal(htmlParserCtxtPtr ctxt,xmlParserInputPtr input)4257*7c568831SAndroid Build Coastguard Worker htmlCtxtParseContentInternal(htmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
4258*7c568831SAndroid Build Coastguard Worker xmlNodePtr root;
4259*7c568831SAndroid Build Coastguard Worker xmlNodePtr list = NULL;
4260*7c568831SAndroid Build Coastguard Worker xmlChar *rootName = BAD_CAST "#root";
4261*7c568831SAndroid Build Coastguard Worker
4262*7c568831SAndroid Build Coastguard Worker root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
4263*7c568831SAndroid Build Coastguard Worker if (root == NULL) {
4264*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
4265*7c568831SAndroid Build Coastguard Worker return(NULL);
4266*7c568831SAndroid Build Coastguard Worker }
4267*7c568831SAndroid Build Coastguard Worker
4268*7c568831SAndroid Build Coastguard Worker if (xmlPushInput(ctxt, input) < 0) {
4269*7c568831SAndroid Build Coastguard Worker xmlFreeNode(root);
4270*7c568831SAndroid Build Coastguard Worker return(NULL);
4271*7c568831SAndroid Build Coastguard Worker }
4272*7c568831SAndroid Build Coastguard Worker
4273*7c568831SAndroid Build Coastguard Worker htmlnamePush(ctxt, rootName);
4274*7c568831SAndroid Build Coastguard Worker nodePush(ctxt, root);
4275*7c568831SAndroid Build Coastguard Worker
4276*7c568831SAndroid Build Coastguard Worker htmlParseContent(ctxt);
4277*7c568831SAndroid Build Coastguard Worker
4278*7c568831SAndroid Build Coastguard Worker /* TODO: Use xmlCtxtIsCatastrophicError */
4279*7c568831SAndroid Build Coastguard Worker if (ctxt->errNo != XML_ERR_NO_MEMORY) {
4280*7c568831SAndroid Build Coastguard Worker xmlNodePtr cur;
4281*7c568831SAndroid Build Coastguard Worker
4282*7c568831SAndroid Build Coastguard Worker /*
4283*7c568831SAndroid Build Coastguard Worker * Unlink newly created node list.
4284*7c568831SAndroid Build Coastguard Worker */
4285*7c568831SAndroid Build Coastguard Worker list = root->children;
4286*7c568831SAndroid Build Coastguard Worker root->children = NULL;
4287*7c568831SAndroid Build Coastguard Worker root->last = NULL;
4288*7c568831SAndroid Build Coastguard Worker for (cur = list; cur != NULL; cur = cur->next)
4289*7c568831SAndroid Build Coastguard Worker cur->parent = NULL;
4290*7c568831SAndroid Build Coastguard Worker }
4291*7c568831SAndroid Build Coastguard Worker
4292*7c568831SAndroid Build Coastguard Worker nodePop(ctxt);
4293*7c568831SAndroid Build Coastguard Worker htmlnamePop(ctxt);
4294*7c568831SAndroid Build Coastguard Worker
4295*7c568831SAndroid Build Coastguard Worker /* xmlPopInput would free the stream */
4296*7c568831SAndroid Build Coastguard Worker inputPop(ctxt);
4297*7c568831SAndroid Build Coastguard Worker
4298*7c568831SAndroid Build Coastguard Worker xmlFreeNode(root);
4299*7c568831SAndroid Build Coastguard Worker return(list);
4300*7c568831SAndroid Build Coastguard Worker }
4301*7c568831SAndroid Build Coastguard Worker
4302*7c568831SAndroid Build Coastguard Worker /**
4303*7c568831SAndroid Build Coastguard Worker * htmlParseDocument:
4304*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
4305*7c568831SAndroid Build Coastguard Worker *
4306*7c568831SAndroid Build Coastguard Worker * Parse an HTML document and invoke the SAX handlers. This is useful
4307*7c568831SAndroid Build Coastguard Worker * if you're only interested in custom SAX callbacks. If you want a
4308*7c568831SAndroid Build Coastguard Worker * document tree, use htmlCtxtParseDocument.
4309*7c568831SAndroid Build Coastguard Worker *
4310*7c568831SAndroid Build Coastguard Worker * Returns 0, -1 in case of error.
4311*7c568831SAndroid Build Coastguard Worker */
4312*7c568831SAndroid Build Coastguard Worker
4313*7c568831SAndroid Build Coastguard Worker int
htmlParseDocument(htmlParserCtxtPtr ctxt)4314*7c568831SAndroid Build Coastguard Worker htmlParseDocument(htmlParserCtxtPtr ctxt) {
4315*7c568831SAndroid Build Coastguard Worker xmlDtdPtr dtd;
4316*7c568831SAndroid Build Coastguard Worker
4317*7c568831SAndroid Build Coastguard Worker if ((ctxt == NULL) || (ctxt->input == NULL))
4318*7c568831SAndroid Build Coastguard Worker return(-1);
4319*7c568831SAndroid Build Coastguard Worker
4320*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
4321*7c568831SAndroid Build Coastguard Worker ctxt->sax->setDocumentLocator(ctxt->userData,
4322*7c568831SAndroid Build Coastguard Worker (xmlSAXLocator *) &xmlDefaultSAXLocator);
4323*7c568831SAndroid Build Coastguard Worker }
4324*7c568831SAndroid Build Coastguard Worker
4325*7c568831SAndroid Build Coastguard Worker xmlDetectEncoding(ctxt);
4326*7c568831SAndroid Build Coastguard Worker
4327*7c568831SAndroid Build Coastguard Worker /*
4328*7c568831SAndroid Build Coastguard Worker * This is wrong but matches long-standing behavior. In most cases,
4329*7c568831SAndroid Build Coastguard Worker * a document starting with an XML declaration will specify UTF-8.
4330*7c568831SAndroid Build Coastguard Worker */
4331*7c568831SAndroid Build Coastguard Worker if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
4332*7c568831SAndroid Build Coastguard Worker (xmlStrncmp(ctxt->input->cur, BAD_CAST "<?xm", 4) == 0))
4333*7c568831SAndroid Build Coastguard Worker xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_UTF8);
4334*7c568831SAndroid Build Coastguard Worker
4335*7c568831SAndroid Build Coastguard Worker /*
4336*7c568831SAndroid Build Coastguard Worker * Wipe out everything which is before the first '<'
4337*7c568831SAndroid Build Coastguard Worker */
4338*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
4339*7c568831SAndroid Build Coastguard Worker
4340*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
4341*7c568831SAndroid Build Coastguard Worker ctxt->sax->startDocument(ctxt->userData);
4342*7c568831SAndroid Build Coastguard Worker
4343*7c568831SAndroid Build Coastguard Worker /*
4344*7c568831SAndroid Build Coastguard Worker * Parse possible comments and PIs before any content
4345*7c568831SAndroid Build Coastguard Worker */
4346*7c568831SAndroid Build Coastguard Worker while (CUR == '<') {
4347*7c568831SAndroid Build Coastguard Worker if ((NXT(1) == '!') && (NXT(2) == '-') && (NXT(3) == '-')) {
4348*7c568831SAndroid Build Coastguard Worker SKIP(4);
4349*7c568831SAndroid Build Coastguard Worker htmlParseComment(ctxt, /* bogus */ 0);
4350*7c568831SAndroid Build Coastguard Worker } else if (NXT(1) == '?') {
4351*7c568831SAndroid Build Coastguard Worker SKIP(1);
4352*7c568831SAndroid Build Coastguard Worker htmlParseComment(ctxt, /* bogus */ 1);
4353*7c568831SAndroid Build Coastguard Worker } else {
4354*7c568831SAndroid Build Coastguard Worker break;
4355*7c568831SAndroid Build Coastguard Worker }
4356*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
4357*7c568831SAndroid Build Coastguard Worker }
4358*7c568831SAndroid Build Coastguard Worker
4359*7c568831SAndroid Build Coastguard Worker /*
4360*7c568831SAndroid Build Coastguard Worker * Then possibly doc type declaration(s) and more Misc
4361*7c568831SAndroid Build Coastguard Worker * (doctypedecl Misc*)?
4362*7c568831SAndroid Build Coastguard Worker */
4363*7c568831SAndroid Build Coastguard Worker if ((CUR == '<') && (NXT(1) == '!') &&
4364*7c568831SAndroid Build Coastguard Worker (UPP(2) == 'D') && (UPP(3) == 'O') &&
4365*7c568831SAndroid Build Coastguard Worker (UPP(4) == 'C') && (UPP(5) == 'T') &&
4366*7c568831SAndroid Build Coastguard Worker (UPP(6) == 'Y') && (UPP(7) == 'P') &&
4367*7c568831SAndroid Build Coastguard Worker (UPP(8) == 'E')) {
4368*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_MISC;
4369*7c568831SAndroid Build Coastguard Worker htmlParseDocTypeDecl(ctxt);
4370*7c568831SAndroid Build Coastguard Worker }
4371*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
4372*7c568831SAndroid Build Coastguard Worker
4373*7c568831SAndroid Build Coastguard Worker /*
4374*7c568831SAndroid Build Coastguard Worker * Parse possible comments and PIs before any content
4375*7c568831SAndroid Build Coastguard Worker */
4376*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_PROLOG;
4377*7c568831SAndroid Build Coastguard Worker while (CUR == '<') {
4378*7c568831SAndroid Build Coastguard Worker if ((NXT(1) == '!') && (NXT(2) == '-') && (NXT(3) == '-')) {
4379*7c568831SAndroid Build Coastguard Worker SKIP(4);
4380*7c568831SAndroid Build Coastguard Worker htmlParseComment(ctxt, /* bogus */ 0);
4381*7c568831SAndroid Build Coastguard Worker } else if (NXT(1) == '?') {
4382*7c568831SAndroid Build Coastguard Worker SKIP(1);
4383*7c568831SAndroid Build Coastguard Worker htmlParseComment(ctxt, /* bogus */ 1);
4384*7c568831SAndroid Build Coastguard Worker } else {
4385*7c568831SAndroid Build Coastguard Worker break;
4386*7c568831SAndroid Build Coastguard Worker }
4387*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
4388*7c568831SAndroid Build Coastguard Worker }
4389*7c568831SAndroid Build Coastguard Worker
4390*7c568831SAndroid Build Coastguard Worker /*
4391*7c568831SAndroid Build Coastguard Worker * Time to start parsing the tree itself
4392*7c568831SAndroid Build Coastguard Worker */
4393*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_CONTENT;
4394*7c568831SAndroid Build Coastguard Worker htmlParseContent(ctxt);
4395*7c568831SAndroid Build Coastguard Worker
4396*7c568831SAndroid Build Coastguard Worker /*
4397*7c568831SAndroid Build Coastguard Worker * autoclose
4398*7c568831SAndroid Build Coastguard Worker */
4399*7c568831SAndroid Build Coastguard Worker if (CUR == 0)
4400*7c568831SAndroid Build Coastguard Worker htmlAutoCloseOnEnd(ctxt);
4401*7c568831SAndroid Build Coastguard Worker
4402*7c568831SAndroid Build Coastguard Worker
4403*7c568831SAndroid Build Coastguard Worker /*
4404*7c568831SAndroid Build Coastguard Worker * SAX: end of the document processing.
4405*7c568831SAndroid Build Coastguard Worker */
4406*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4407*7c568831SAndroid Build Coastguard Worker ctxt->sax->endDocument(ctxt->userData);
4408*7c568831SAndroid Build Coastguard Worker
4409*7c568831SAndroid Build Coastguard Worker if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) {
4410*7c568831SAndroid Build Coastguard Worker dtd = xmlGetIntSubset(ctxt->myDoc);
4411*7c568831SAndroid Build Coastguard Worker if (dtd == NULL) {
4412*7c568831SAndroid Build Coastguard Worker ctxt->myDoc->intSubset =
4413*7c568831SAndroid Build Coastguard Worker xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
4414*7c568831SAndroid Build Coastguard Worker BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
4415*7c568831SAndroid Build Coastguard Worker BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
4416*7c568831SAndroid Build Coastguard Worker if (ctxt->myDoc->intSubset == NULL)
4417*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
4418*7c568831SAndroid Build Coastguard Worker }
4419*7c568831SAndroid Build Coastguard Worker }
4420*7c568831SAndroid Build Coastguard Worker if (! ctxt->wellFormed) return(-1);
4421*7c568831SAndroid Build Coastguard Worker return(0);
4422*7c568831SAndroid Build Coastguard Worker }
4423*7c568831SAndroid Build Coastguard Worker
4424*7c568831SAndroid Build Coastguard Worker
4425*7c568831SAndroid Build Coastguard Worker /************************************************************************
4426*7c568831SAndroid Build Coastguard Worker * *
4427*7c568831SAndroid Build Coastguard Worker * Parser contexts handling *
4428*7c568831SAndroid Build Coastguard Worker * *
4429*7c568831SAndroid Build Coastguard Worker ************************************************************************/
4430*7c568831SAndroid Build Coastguard Worker
4431*7c568831SAndroid Build Coastguard Worker /**
4432*7c568831SAndroid Build Coastguard Worker * htmlInitParserCtxt:
4433*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
4434*7c568831SAndroid Build Coastguard Worker * @sax: SAX handler
4435*7c568831SAndroid Build Coastguard Worker * @userData: user data
4436*7c568831SAndroid Build Coastguard Worker *
4437*7c568831SAndroid Build Coastguard Worker * Initialize a parser context
4438*7c568831SAndroid Build Coastguard Worker *
4439*7c568831SAndroid Build Coastguard Worker * Returns 0 in case of success and -1 in case of error
4440*7c568831SAndroid Build Coastguard Worker */
4441*7c568831SAndroid Build Coastguard Worker
4442*7c568831SAndroid Build Coastguard Worker static int
htmlInitParserCtxt(htmlParserCtxtPtr ctxt,const htmlSAXHandler * sax,void * userData)4443*7c568831SAndroid Build Coastguard Worker htmlInitParserCtxt(htmlParserCtxtPtr ctxt, const htmlSAXHandler *sax,
4444*7c568831SAndroid Build Coastguard Worker void *userData)
4445*7c568831SAndroid Build Coastguard Worker {
4446*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL) return(-1);
4447*7c568831SAndroid Build Coastguard Worker memset(ctxt, 0, sizeof(htmlParserCtxt));
4448*7c568831SAndroid Build Coastguard Worker
4449*7c568831SAndroid Build Coastguard Worker ctxt->dict = xmlDictCreate();
4450*7c568831SAndroid Build Coastguard Worker if (ctxt->dict == NULL)
4451*7c568831SAndroid Build Coastguard Worker return(-1);
4452*7c568831SAndroid Build Coastguard Worker
4453*7c568831SAndroid Build Coastguard Worker if (ctxt->sax == NULL)
4454*7c568831SAndroid Build Coastguard Worker ctxt->sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));
4455*7c568831SAndroid Build Coastguard Worker if (ctxt->sax == NULL)
4456*7c568831SAndroid Build Coastguard Worker return(-1);
4457*7c568831SAndroid Build Coastguard Worker if (sax == NULL) {
4458*7c568831SAndroid Build Coastguard Worker memset(ctxt->sax, 0, sizeof(htmlSAXHandler));
4459*7c568831SAndroid Build Coastguard Worker xmlSAX2InitHtmlDefaultSAXHandler(ctxt->sax);
4460*7c568831SAndroid Build Coastguard Worker ctxt->userData = ctxt;
4461*7c568831SAndroid Build Coastguard Worker } else {
4462*7c568831SAndroid Build Coastguard Worker memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler));
4463*7c568831SAndroid Build Coastguard Worker ctxt->userData = userData ? userData : ctxt;
4464*7c568831SAndroid Build Coastguard Worker }
4465*7c568831SAndroid Build Coastguard Worker
4466*7c568831SAndroid Build Coastguard Worker /* Allocate the Input stack */
4467*7c568831SAndroid Build Coastguard Worker ctxt->inputTab = (htmlParserInputPtr *)
4468*7c568831SAndroid Build Coastguard Worker xmlMalloc(5 * sizeof(htmlParserInputPtr));
4469*7c568831SAndroid Build Coastguard Worker if (ctxt->inputTab == NULL)
4470*7c568831SAndroid Build Coastguard Worker return(-1);
4471*7c568831SAndroid Build Coastguard Worker ctxt->inputNr = 0;
4472*7c568831SAndroid Build Coastguard Worker ctxt->inputMax = 5;
4473*7c568831SAndroid Build Coastguard Worker ctxt->input = NULL;
4474*7c568831SAndroid Build Coastguard Worker ctxt->version = NULL;
4475*7c568831SAndroid Build Coastguard Worker ctxt->encoding = NULL;
4476*7c568831SAndroid Build Coastguard Worker ctxt->standalone = -1;
4477*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_START;
4478*7c568831SAndroid Build Coastguard Worker
4479*7c568831SAndroid Build Coastguard Worker /* Allocate the Node stack */
4480*7c568831SAndroid Build Coastguard Worker ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
4481*7c568831SAndroid Build Coastguard Worker if (ctxt->nodeTab == NULL)
4482*7c568831SAndroid Build Coastguard Worker return(-1);
4483*7c568831SAndroid Build Coastguard Worker ctxt->nodeNr = 0;
4484*7c568831SAndroid Build Coastguard Worker ctxt->nodeMax = 10;
4485*7c568831SAndroid Build Coastguard Worker ctxt->node = NULL;
4486*7c568831SAndroid Build Coastguard Worker
4487*7c568831SAndroid Build Coastguard Worker /* Allocate the Name stack */
4488*7c568831SAndroid Build Coastguard Worker ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
4489*7c568831SAndroid Build Coastguard Worker if (ctxt->nameTab == NULL)
4490*7c568831SAndroid Build Coastguard Worker return(-1);
4491*7c568831SAndroid Build Coastguard Worker ctxt->nameNr = 0;
4492*7c568831SAndroid Build Coastguard Worker ctxt->nameMax = 10;
4493*7c568831SAndroid Build Coastguard Worker ctxt->name = NULL;
4494*7c568831SAndroid Build Coastguard Worker
4495*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfoTab = NULL;
4496*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfoNr = 0;
4497*7c568831SAndroid Build Coastguard Worker ctxt->nodeInfoMax = 0;
4498*7c568831SAndroid Build Coastguard Worker
4499*7c568831SAndroid Build Coastguard Worker ctxt->myDoc = NULL;
4500*7c568831SAndroid Build Coastguard Worker ctxt->wellFormed = 1;
4501*7c568831SAndroid Build Coastguard Worker ctxt->replaceEntities = 0;
4502*7c568831SAndroid Build Coastguard Worker ctxt->linenumbers = xmlLineNumbersDefaultValue;
4503*7c568831SAndroid Build Coastguard Worker ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
4504*7c568831SAndroid Build Coastguard Worker ctxt->html = 1;
4505*7c568831SAndroid Build Coastguard Worker ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
4506*7c568831SAndroid Build Coastguard Worker ctxt->vctxt.userData = ctxt;
4507*7c568831SAndroid Build Coastguard Worker ctxt->vctxt.error = xmlParserValidityError;
4508*7c568831SAndroid Build Coastguard Worker ctxt->vctxt.warning = xmlParserValidityWarning;
4509*7c568831SAndroid Build Coastguard Worker ctxt->record_info = 0;
4510*7c568831SAndroid Build Coastguard Worker ctxt->validate = 0;
4511*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = 0;
4512*7c568831SAndroid Build Coastguard Worker ctxt->catalogs = NULL;
4513*7c568831SAndroid Build Coastguard Worker xmlInitNodeInfoSeq(&ctxt->node_seq);
4514*7c568831SAndroid Build Coastguard Worker return(0);
4515*7c568831SAndroid Build Coastguard Worker }
4516*7c568831SAndroid Build Coastguard Worker
4517*7c568831SAndroid Build Coastguard Worker /**
4518*7c568831SAndroid Build Coastguard Worker * htmlFreeParserCtxt:
4519*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
4520*7c568831SAndroid Build Coastguard Worker *
4521*7c568831SAndroid Build Coastguard Worker * Free all the memory used by a parser context. However the parsed
4522*7c568831SAndroid Build Coastguard Worker * document in ctxt->myDoc is not freed.
4523*7c568831SAndroid Build Coastguard Worker */
4524*7c568831SAndroid Build Coastguard Worker
4525*7c568831SAndroid Build Coastguard Worker void
htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)4526*7c568831SAndroid Build Coastguard Worker htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
4527*7c568831SAndroid Build Coastguard Worker {
4528*7c568831SAndroid Build Coastguard Worker xmlFreeParserCtxt(ctxt);
4529*7c568831SAndroid Build Coastguard Worker }
4530*7c568831SAndroid Build Coastguard Worker
4531*7c568831SAndroid Build Coastguard Worker /**
4532*7c568831SAndroid Build Coastguard Worker * htmlNewParserCtxt:
4533*7c568831SAndroid Build Coastguard Worker *
4534*7c568831SAndroid Build Coastguard Worker * Allocate and initialize a new HTML parser context.
4535*7c568831SAndroid Build Coastguard Worker *
4536*7c568831SAndroid Build Coastguard Worker * This can be used to parse HTML documents into DOM trees with
4537*7c568831SAndroid Build Coastguard Worker * functions like xmlCtxtReadFile or xmlCtxtReadMemory.
4538*7c568831SAndroid Build Coastguard Worker *
4539*7c568831SAndroid Build Coastguard Worker * See htmlCtxtUseOptions for parser options.
4540*7c568831SAndroid Build Coastguard Worker *
4541*7c568831SAndroid Build Coastguard Worker * See xmlCtxtSetErrorHandler for advanced error handling.
4542*7c568831SAndroid Build Coastguard Worker *
4543*7c568831SAndroid Build Coastguard Worker * See htmlNewSAXParserCtxt for custom SAX parsers.
4544*7c568831SAndroid Build Coastguard Worker *
4545*7c568831SAndroid Build Coastguard Worker * Returns the htmlParserCtxtPtr or NULL in case of allocation error
4546*7c568831SAndroid Build Coastguard Worker */
4547*7c568831SAndroid Build Coastguard Worker
4548*7c568831SAndroid Build Coastguard Worker htmlParserCtxtPtr
htmlNewParserCtxt(void)4549*7c568831SAndroid Build Coastguard Worker htmlNewParserCtxt(void)
4550*7c568831SAndroid Build Coastguard Worker {
4551*7c568831SAndroid Build Coastguard Worker return(htmlNewSAXParserCtxt(NULL, NULL));
4552*7c568831SAndroid Build Coastguard Worker }
4553*7c568831SAndroid Build Coastguard Worker
4554*7c568831SAndroid Build Coastguard Worker /**
4555*7c568831SAndroid Build Coastguard Worker * htmlNewSAXParserCtxt:
4556*7c568831SAndroid Build Coastguard Worker * @sax: SAX handler
4557*7c568831SAndroid Build Coastguard Worker * @userData: user data
4558*7c568831SAndroid Build Coastguard Worker *
4559*7c568831SAndroid Build Coastguard Worker * Allocate and initialize a new HTML SAX parser context. If userData
4560*7c568831SAndroid Build Coastguard Worker * is NULL, the parser context will be passed as user data.
4561*7c568831SAndroid Build Coastguard Worker *
4562*7c568831SAndroid Build Coastguard Worker * Available since 2.11.0. If you want support older versions,
4563*7c568831SAndroid Build Coastguard Worker * it's best to invoke htmlNewParserCtxt and set ctxt->sax with
4564*7c568831SAndroid Build Coastguard Worker * struct assignment.
4565*7c568831SAndroid Build Coastguard Worker *
4566*7c568831SAndroid Build Coastguard Worker * Also see htmlNewParserCtxt.
4567*7c568831SAndroid Build Coastguard Worker *
4568*7c568831SAndroid Build Coastguard Worker * Returns the htmlParserCtxtPtr or NULL in case of allocation error
4569*7c568831SAndroid Build Coastguard Worker */
4570*7c568831SAndroid Build Coastguard Worker
4571*7c568831SAndroid Build Coastguard Worker htmlParserCtxtPtr
htmlNewSAXParserCtxt(const htmlSAXHandler * sax,void * userData)4572*7c568831SAndroid Build Coastguard Worker htmlNewSAXParserCtxt(const htmlSAXHandler *sax, void *userData)
4573*7c568831SAndroid Build Coastguard Worker {
4574*7c568831SAndroid Build Coastguard Worker xmlParserCtxtPtr ctxt;
4575*7c568831SAndroid Build Coastguard Worker
4576*7c568831SAndroid Build Coastguard Worker xmlInitParser();
4577*7c568831SAndroid Build Coastguard Worker
4578*7c568831SAndroid Build Coastguard Worker ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
4579*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
4580*7c568831SAndroid Build Coastguard Worker return(NULL);
4581*7c568831SAndroid Build Coastguard Worker memset(ctxt, 0, sizeof(xmlParserCtxt));
4582*7c568831SAndroid Build Coastguard Worker if (htmlInitParserCtxt(ctxt, sax, userData) < 0) {
4583*7c568831SAndroid Build Coastguard Worker htmlFreeParserCtxt(ctxt);
4584*7c568831SAndroid Build Coastguard Worker return(NULL);
4585*7c568831SAndroid Build Coastguard Worker }
4586*7c568831SAndroid Build Coastguard Worker return(ctxt);
4587*7c568831SAndroid Build Coastguard Worker }
4588*7c568831SAndroid Build Coastguard Worker
4589*7c568831SAndroid Build Coastguard Worker static htmlParserCtxtPtr
htmlCreateMemoryParserCtxtInternal(const char * url,const char * buffer,size_t size,const char * encoding)4590*7c568831SAndroid Build Coastguard Worker htmlCreateMemoryParserCtxtInternal(const char *url,
4591*7c568831SAndroid Build Coastguard Worker const char *buffer, size_t size,
4592*7c568831SAndroid Build Coastguard Worker const char *encoding) {
4593*7c568831SAndroid Build Coastguard Worker xmlParserCtxtPtr ctxt;
4594*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input;
4595*7c568831SAndroid Build Coastguard Worker
4596*7c568831SAndroid Build Coastguard Worker if (buffer == NULL)
4597*7c568831SAndroid Build Coastguard Worker return(NULL);
4598*7c568831SAndroid Build Coastguard Worker
4599*7c568831SAndroid Build Coastguard Worker ctxt = htmlNewParserCtxt();
4600*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
4601*7c568831SAndroid Build Coastguard Worker return(NULL);
4602*7c568831SAndroid Build Coastguard Worker
4603*7c568831SAndroid Build Coastguard Worker input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding, 0);
4604*7c568831SAndroid Build Coastguard Worker if (input == NULL) {
4605*7c568831SAndroid Build Coastguard Worker xmlFreeParserCtxt(ctxt);
4606*7c568831SAndroid Build Coastguard Worker return(NULL);
4607*7c568831SAndroid Build Coastguard Worker }
4608*7c568831SAndroid Build Coastguard Worker
4609*7c568831SAndroid Build Coastguard Worker if (inputPush(ctxt, input) < 0) {
4610*7c568831SAndroid Build Coastguard Worker xmlFreeInputStream(input);
4611*7c568831SAndroid Build Coastguard Worker xmlFreeParserCtxt(ctxt);
4612*7c568831SAndroid Build Coastguard Worker return(NULL);
4613*7c568831SAndroid Build Coastguard Worker }
4614*7c568831SAndroid Build Coastguard Worker
4615*7c568831SAndroid Build Coastguard Worker return(ctxt);
4616*7c568831SAndroid Build Coastguard Worker }
4617*7c568831SAndroid Build Coastguard Worker
4618*7c568831SAndroid Build Coastguard Worker /**
4619*7c568831SAndroid Build Coastguard Worker * htmlCreateMemoryParserCtxt:
4620*7c568831SAndroid Build Coastguard Worker * @buffer: a pointer to a char array
4621*7c568831SAndroid Build Coastguard Worker * @size: the size of the array
4622*7c568831SAndroid Build Coastguard Worker *
4623*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Use htmlNewParserCtxt and htmlCtxtReadMemory.
4624*7c568831SAndroid Build Coastguard Worker *
4625*7c568831SAndroid Build Coastguard Worker * Create a parser context for an HTML in-memory document. The input
4626*7c568831SAndroid Build Coastguard Worker * buffer must not contain any terminating null bytes.
4627*7c568831SAndroid Build Coastguard Worker *
4628*7c568831SAndroid Build Coastguard Worker * Returns the new parser context or NULL
4629*7c568831SAndroid Build Coastguard Worker */
4630*7c568831SAndroid Build Coastguard Worker htmlParserCtxtPtr
htmlCreateMemoryParserCtxt(const char * buffer,int size)4631*7c568831SAndroid Build Coastguard Worker htmlCreateMemoryParserCtxt(const char *buffer, int size) {
4632*7c568831SAndroid Build Coastguard Worker if (size <= 0)
4633*7c568831SAndroid Build Coastguard Worker return(NULL);
4634*7c568831SAndroid Build Coastguard Worker
4635*7c568831SAndroid Build Coastguard Worker return(htmlCreateMemoryParserCtxtInternal(NULL, buffer, size, NULL));
4636*7c568831SAndroid Build Coastguard Worker }
4637*7c568831SAndroid Build Coastguard Worker
4638*7c568831SAndroid Build Coastguard Worker /**
4639*7c568831SAndroid Build Coastguard Worker * htmlCreateDocParserCtxt:
4640*7c568831SAndroid Build Coastguard Worker * @str: a pointer to an array of xmlChar
4641*7c568831SAndroid Build Coastguard Worker * @encoding: encoding (optional)
4642*7c568831SAndroid Build Coastguard Worker *
4643*7c568831SAndroid Build Coastguard Worker * Create a parser context for a null-terminated string.
4644*7c568831SAndroid Build Coastguard Worker *
4645*7c568831SAndroid Build Coastguard Worker * Returns the new parser context or NULL if a memory allocation failed.
4646*7c568831SAndroid Build Coastguard Worker */
4647*7c568831SAndroid Build Coastguard Worker static htmlParserCtxtPtr
htmlCreateDocParserCtxt(const xmlChar * str,const char * url,const char * encoding)4648*7c568831SAndroid Build Coastguard Worker htmlCreateDocParserCtxt(const xmlChar *str, const char *url,
4649*7c568831SAndroid Build Coastguard Worker const char *encoding) {
4650*7c568831SAndroid Build Coastguard Worker xmlParserCtxtPtr ctxt;
4651*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input;
4652*7c568831SAndroid Build Coastguard Worker
4653*7c568831SAndroid Build Coastguard Worker if (str == NULL)
4654*7c568831SAndroid Build Coastguard Worker return(NULL);
4655*7c568831SAndroid Build Coastguard Worker
4656*7c568831SAndroid Build Coastguard Worker ctxt = htmlNewParserCtxt();
4657*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
4658*7c568831SAndroid Build Coastguard Worker return(NULL);
4659*7c568831SAndroid Build Coastguard Worker
4660*7c568831SAndroid Build Coastguard Worker input = xmlCtxtNewInputFromString(ctxt, url, (const char *) str,
4661*7c568831SAndroid Build Coastguard Worker encoding, 0);
4662*7c568831SAndroid Build Coastguard Worker if (input == NULL) {
4663*7c568831SAndroid Build Coastguard Worker xmlFreeParserCtxt(ctxt);
4664*7c568831SAndroid Build Coastguard Worker return(NULL);
4665*7c568831SAndroid Build Coastguard Worker }
4666*7c568831SAndroid Build Coastguard Worker
4667*7c568831SAndroid Build Coastguard Worker if (inputPush(ctxt, input) < 0) {
4668*7c568831SAndroid Build Coastguard Worker xmlFreeInputStream(input);
4669*7c568831SAndroid Build Coastguard Worker xmlFreeParserCtxt(ctxt);
4670*7c568831SAndroid Build Coastguard Worker return(NULL);
4671*7c568831SAndroid Build Coastguard Worker }
4672*7c568831SAndroid Build Coastguard Worker
4673*7c568831SAndroid Build Coastguard Worker return(ctxt);
4674*7c568831SAndroid Build Coastguard Worker }
4675*7c568831SAndroid Build Coastguard Worker
4676*7c568831SAndroid Build Coastguard Worker #ifdef LIBXML_PUSH_ENABLED
4677*7c568831SAndroid Build Coastguard Worker /************************************************************************
4678*7c568831SAndroid Build Coastguard Worker * *
4679*7c568831SAndroid Build Coastguard Worker * Progressive parsing interfaces *
4680*7c568831SAndroid Build Coastguard Worker * *
4681*7c568831SAndroid Build Coastguard Worker ************************************************************************/
4682*7c568831SAndroid Build Coastguard Worker
4683*7c568831SAndroid Build Coastguard Worker enum xmlLookupStates {
4684*7c568831SAndroid Build Coastguard Worker LSTATE_TAG_NAME = 0,
4685*7c568831SAndroid Build Coastguard Worker LSTATE_BEFORE_ATTR_NAME,
4686*7c568831SAndroid Build Coastguard Worker LSTATE_ATTR_NAME,
4687*7c568831SAndroid Build Coastguard Worker LSTATE_AFTER_ATTR_NAME,
4688*7c568831SAndroid Build Coastguard Worker LSTATE_BEFORE_ATTR_VALUE,
4689*7c568831SAndroid Build Coastguard Worker LSTATE_ATTR_VALUE_DQUOTED,
4690*7c568831SAndroid Build Coastguard Worker LSTATE_ATTR_VALUE_SQUOTED,
4691*7c568831SAndroid Build Coastguard Worker LSTATE_ATTR_VALUE_UNQUOTED
4692*7c568831SAndroid Build Coastguard Worker };
4693*7c568831SAndroid Build Coastguard Worker
4694*7c568831SAndroid Build Coastguard Worker /**
4695*7c568831SAndroid Build Coastguard Worker * htmlParseLookupGt:
4696*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
4697*7c568831SAndroid Build Coastguard Worker *
4698*7c568831SAndroid Build Coastguard Worker * Check whether there's enough data in the input buffer to finish parsing
4699*7c568831SAndroid Build Coastguard Worker * a tag. This has to take quotes into account.
4700*7c568831SAndroid Build Coastguard Worker */
4701*7c568831SAndroid Build Coastguard Worker static int
htmlParseLookupGt(xmlParserCtxtPtr ctxt)4702*7c568831SAndroid Build Coastguard Worker htmlParseLookupGt(xmlParserCtxtPtr ctxt) {
4703*7c568831SAndroid Build Coastguard Worker const xmlChar *cur;
4704*7c568831SAndroid Build Coastguard Worker const xmlChar *end = ctxt->input->end;
4705*7c568831SAndroid Build Coastguard Worker int state = ctxt->endCheckState;
4706*7c568831SAndroid Build Coastguard Worker size_t index;
4707*7c568831SAndroid Build Coastguard Worker
4708*7c568831SAndroid Build Coastguard Worker if (ctxt->checkIndex == 0)
4709*7c568831SAndroid Build Coastguard Worker cur = ctxt->input->cur + 2; /* Skip '<a' or '</' */
4710*7c568831SAndroid Build Coastguard Worker else
4711*7c568831SAndroid Build Coastguard Worker cur = ctxt->input->cur + ctxt->checkIndex;
4712*7c568831SAndroid Build Coastguard Worker
4713*7c568831SAndroid Build Coastguard Worker while (cur < end) {
4714*7c568831SAndroid Build Coastguard Worker int c = *cur++;
4715*7c568831SAndroid Build Coastguard Worker
4716*7c568831SAndroid Build Coastguard Worker if (state != LSTATE_ATTR_VALUE_SQUOTED &&
4717*7c568831SAndroid Build Coastguard Worker state != LSTATE_ATTR_VALUE_DQUOTED) {
4718*7c568831SAndroid Build Coastguard Worker if (c == '/' &&
4719*7c568831SAndroid Build Coastguard Worker state != LSTATE_BEFORE_ATTR_VALUE &&
4720*7c568831SAndroid Build Coastguard Worker state != LSTATE_ATTR_VALUE_UNQUOTED) {
4721*7c568831SAndroid Build Coastguard Worker state = LSTATE_BEFORE_ATTR_NAME;
4722*7c568831SAndroid Build Coastguard Worker continue;
4723*7c568831SAndroid Build Coastguard Worker } else if (c == '>') {
4724*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = 0;
4725*7c568831SAndroid Build Coastguard Worker ctxt->endCheckState = 0;
4726*7c568831SAndroid Build Coastguard Worker return(0);
4727*7c568831SAndroid Build Coastguard Worker }
4728*7c568831SAndroid Build Coastguard Worker }
4729*7c568831SAndroid Build Coastguard Worker
4730*7c568831SAndroid Build Coastguard Worker switch (state) {
4731*7c568831SAndroid Build Coastguard Worker case LSTATE_TAG_NAME:
4732*7c568831SAndroid Build Coastguard Worker if (IS_WS_HTML(c))
4733*7c568831SAndroid Build Coastguard Worker state = LSTATE_BEFORE_ATTR_NAME;
4734*7c568831SAndroid Build Coastguard Worker break;
4735*7c568831SAndroid Build Coastguard Worker
4736*7c568831SAndroid Build Coastguard Worker case LSTATE_BEFORE_ATTR_NAME:
4737*7c568831SAndroid Build Coastguard Worker if (!IS_WS_HTML(c))
4738*7c568831SAndroid Build Coastguard Worker state = LSTATE_ATTR_NAME;
4739*7c568831SAndroid Build Coastguard Worker break;
4740*7c568831SAndroid Build Coastguard Worker
4741*7c568831SAndroid Build Coastguard Worker case LSTATE_ATTR_NAME:
4742*7c568831SAndroid Build Coastguard Worker if (c == '=')
4743*7c568831SAndroid Build Coastguard Worker state = LSTATE_BEFORE_ATTR_VALUE;
4744*7c568831SAndroid Build Coastguard Worker else if (IS_WS_HTML(c))
4745*7c568831SAndroid Build Coastguard Worker state = LSTATE_AFTER_ATTR_NAME;
4746*7c568831SAndroid Build Coastguard Worker break;
4747*7c568831SAndroid Build Coastguard Worker
4748*7c568831SAndroid Build Coastguard Worker case LSTATE_AFTER_ATTR_NAME:
4749*7c568831SAndroid Build Coastguard Worker if (c == '=')
4750*7c568831SAndroid Build Coastguard Worker state = LSTATE_BEFORE_ATTR_VALUE;
4751*7c568831SAndroid Build Coastguard Worker else if (!IS_WS_HTML(c))
4752*7c568831SAndroid Build Coastguard Worker state = LSTATE_ATTR_NAME;
4753*7c568831SAndroid Build Coastguard Worker break;
4754*7c568831SAndroid Build Coastguard Worker
4755*7c568831SAndroid Build Coastguard Worker case LSTATE_BEFORE_ATTR_VALUE:
4756*7c568831SAndroid Build Coastguard Worker if (c == '"')
4757*7c568831SAndroid Build Coastguard Worker state = LSTATE_ATTR_VALUE_DQUOTED;
4758*7c568831SAndroid Build Coastguard Worker else if (c == '\'')
4759*7c568831SAndroid Build Coastguard Worker state = LSTATE_ATTR_VALUE_SQUOTED;
4760*7c568831SAndroid Build Coastguard Worker else if (!IS_WS_HTML(c))
4761*7c568831SAndroid Build Coastguard Worker state = LSTATE_ATTR_VALUE_UNQUOTED;
4762*7c568831SAndroid Build Coastguard Worker break;
4763*7c568831SAndroid Build Coastguard Worker
4764*7c568831SAndroid Build Coastguard Worker case LSTATE_ATTR_VALUE_DQUOTED:
4765*7c568831SAndroid Build Coastguard Worker if (c == '"')
4766*7c568831SAndroid Build Coastguard Worker state = LSTATE_BEFORE_ATTR_NAME;
4767*7c568831SAndroid Build Coastguard Worker break;
4768*7c568831SAndroid Build Coastguard Worker
4769*7c568831SAndroid Build Coastguard Worker case LSTATE_ATTR_VALUE_SQUOTED:
4770*7c568831SAndroid Build Coastguard Worker if (c == '\'')
4771*7c568831SAndroid Build Coastguard Worker state = LSTATE_BEFORE_ATTR_NAME;
4772*7c568831SAndroid Build Coastguard Worker break;
4773*7c568831SAndroid Build Coastguard Worker
4774*7c568831SAndroid Build Coastguard Worker case LSTATE_ATTR_VALUE_UNQUOTED:
4775*7c568831SAndroid Build Coastguard Worker if (IS_WS_HTML(c))
4776*7c568831SAndroid Build Coastguard Worker state = LSTATE_BEFORE_ATTR_NAME;
4777*7c568831SAndroid Build Coastguard Worker break;
4778*7c568831SAndroid Build Coastguard Worker }
4779*7c568831SAndroid Build Coastguard Worker }
4780*7c568831SAndroid Build Coastguard Worker
4781*7c568831SAndroid Build Coastguard Worker index = cur - ctxt->input->cur;
4782*7c568831SAndroid Build Coastguard Worker if (index > LONG_MAX) {
4783*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = 0;
4784*7c568831SAndroid Build Coastguard Worker ctxt->endCheckState = 0;
4785*7c568831SAndroid Build Coastguard Worker return(0);
4786*7c568831SAndroid Build Coastguard Worker }
4787*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = index;
4788*7c568831SAndroid Build Coastguard Worker ctxt->endCheckState = state;
4789*7c568831SAndroid Build Coastguard Worker return(-1);
4790*7c568831SAndroid Build Coastguard Worker }
4791*7c568831SAndroid Build Coastguard Worker
4792*7c568831SAndroid Build Coastguard Worker /**
4793*7c568831SAndroid Build Coastguard Worker * htmlParseLookupString:
4794*7c568831SAndroid Build Coastguard Worker * @ctxt: an XML parser context
4795*7c568831SAndroid Build Coastguard Worker * @startDelta: delta to apply at the start
4796*7c568831SAndroid Build Coastguard Worker * @str: string
4797*7c568831SAndroid Build Coastguard Worker * @strLen: length of string
4798*7c568831SAndroid Build Coastguard Worker *
4799*7c568831SAndroid Build Coastguard Worker * Check whether the input buffer contains a string.
4800*7c568831SAndroid Build Coastguard Worker */
4801*7c568831SAndroid Build Coastguard Worker static int
htmlParseLookupString(xmlParserCtxtPtr ctxt,size_t startDelta,const char * str,size_t strLen,size_t extraLen)4802*7c568831SAndroid Build Coastguard Worker htmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
4803*7c568831SAndroid Build Coastguard Worker const char *str, size_t strLen, size_t extraLen) {
4804*7c568831SAndroid Build Coastguard Worker const xmlChar *end = ctxt->input->end;
4805*7c568831SAndroid Build Coastguard Worker const xmlChar *cur, *term;
4806*7c568831SAndroid Build Coastguard Worker size_t index, rescan;
4807*7c568831SAndroid Build Coastguard Worker int ret;
4808*7c568831SAndroid Build Coastguard Worker
4809*7c568831SAndroid Build Coastguard Worker if (ctxt->checkIndex == 0) {
4810*7c568831SAndroid Build Coastguard Worker cur = ctxt->input->cur + startDelta;
4811*7c568831SAndroid Build Coastguard Worker } else {
4812*7c568831SAndroid Build Coastguard Worker cur = ctxt->input->cur + ctxt->checkIndex;
4813*7c568831SAndroid Build Coastguard Worker }
4814*7c568831SAndroid Build Coastguard Worker
4815*7c568831SAndroid Build Coastguard Worker term = BAD_CAST strstr((const char *) cur, str);
4816*7c568831SAndroid Build Coastguard Worker if ((term != NULL) &&
4817*7c568831SAndroid Build Coastguard Worker ((size_t) (ctxt->input->end - term) >= extraLen + 1)) {
4818*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = 0;
4819*7c568831SAndroid Build Coastguard Worker
4820*7c568831SAndroid Build Coastguard Worker if (term - ctxt->input->cur > INT_MAX / 2)
4821*7c568831SAndroid Build Coastguard Worker ret = INT_MAX / 2;
4822*7c568831SAndroid Build Coastguard Worker else
4823*7c568831SAndroid Build Coastguard Worker ret = term - ctxt->input->cur;
4824*7c568831SAndroid Build Coastguard Worker
4825*7c568831SAndroid Build Coastguard Worker return(ret);
4826*7c568831SAndroid Build Coastguard Worker }
4827*7c568831SAndroid Build Coastguard Worker
4828*7c568831SAndroid Build Coastguard Worker /* Rescan (strLen + extraLen - 1) characters. */
4829*7c568831SAndroid Build Coastguard Worker rescan = strLen + extraLen - 1;
4830*7c568831SAndroid Build Coastguard Worker if ((size_t) (end - cur) <= rescan)
4831*7c568831SAndroid Build Coastguard Worker end = cur;
4832*7c568831SAndroid Build Coastguard Worker else
4833*7c568831SAndroid Build Coastguard Worker end -= rescan;
4834*7c568831SAndroid Build Coastguard Worker index = end - ctxt->input->cur;
4835*7c568831SAndroid Build Coastguard Worker if (index > INT_MAX / 2) {
4836*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = 0;
4837*7c568831SAndroid Build Coastguard Worker ret = INT_MAX / 2;
4838*7c568831SAndroid Build Coastguard Worker } else {
4839*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = index;
4840*7c568831SAndroid Build Coastguard Worker ret = -1;
4841*7c568831SAndroid Build Coastguard Worker }
4842*7c568831SAndroid Build Coastguard Worker
4843*7c568831SAndroid Build Coastguard Worker return(ret);
4844*7c568831SAndroid Build Coastguard Worker }
4845*7c568831SAndroid Build Coastguard Worker
4846*7c568831SAndroid Build Coastguard Worker /**
4847*7c568831SAndroid Build Coastguard Worker * htmlParseLookupCommentEnd:
4848*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
4849*7c568831SAndroid Build Coastguard Worker *
4850*7c568831SAndroid Build Coastguard Worker * Try to find a comment end tag in the input stream
4851*7c568831SAndroid Build Coastguard Worker * The search includes "-->" as well as WHATWG-recommended incorrectly-closed tags.
4852*7c568831SAndroid Build Coastguard Worker * (See https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment)
4853*7c568831SAndroid Build Coastguard Worker * This function has a side effect of (possibly) incrementing ctxt->checkIndex
4854*7c568831SAndroid Build Coastguard Worker * to avoid rescanning sequences of bytes, it DOES change the state of the
4855*7c568831SAndroid Build Coastguard Worker * parser, do not use liberally.
4856*7c568831SAndroid Build Coastguard Worker *
4857*7c568831SAndroid Build Coastguard Worker * Returns the index to the current parsing point if the full sequence is available, -1 otherwise.
4858*7c568831SAndroid Build Coastguard Worker */
4859*7c568831SAndroid Build Coastguard Worker static int
htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)4860*7c568831SAndroid Build Coastguard Worker htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
4861*7c568831SAndroid Build Coastguard Worker {
4862*7c568831SAndroid Build Coastguard Worker int mark = 0;
4863*7c568831SAndroid Build Coastguard Worker int offset;
4864*7c568831SAndroid Build Coastguard Worker
4865*7c568831SAndroid Build Coastguard Worker while (1) {
4866*7c568831SAndroid Build Coastguard Worker mark = htmlParseLookupString(ctxt, 2, "--", 2, 0);
4867*7c568831SAndroid Build Coastguard Worker if (mark < 0)
4868*7c568831SAndroid Build Coastguard Worker break;
4869*7c568831SAndroid Build Coastguard Worker if ((NXT(mark+2) == '>') ||
4870*7c568831SAndroid Build Coastguard Worker ((NXT(mark+2) == '!') && (NXT(mark+3) == '>'))) {
4871*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = 0;
4872*7c568831SAndroid Build Coastguard Worker break;
4873*7c568831SAndroid Build Coastguard Worker }
4874*7c568831SAndroid Build Coastguard Worker offset = (NXT(mark+2) == '!') ? 3 : 2;
4875*7c568831SAndroid Build Coastguard Worker if (mark + offset >= ctxt->input->end - ctxt->input->cur) {
4876*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = mark;
4877*7c568831SAndroid Build Coastguard Worker return(-1);
4878*7c568831SAndroid Build Coastguard Worker }
4879*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = mark + 1;
4880*7c568831SAndroid Build Coastguard Worker }
4881*7c568831SAndroid Build Coastguard Worker return mark;
4882*7c568831SAndroid Build Coastguard Worker }
4883*7c568831SAndroid Build Coastguard Worker
4884*7c568831SAndroid Build Coastguard Worker
4885*7c568831SAndroid Build Coastguard Worker /**
4886*7c568831SAndroid Build Coastguard Worker * htmlParseTryOrFinish:
4887*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
4888*7c568831SAndroid Build Coastguard Worker * @terminate: last chunk indicator
4889*7c568831SAndroid Build Coastguard Worker *
4890*7c568831SAndroid Build Coastguard Worker * Try to progress on parsing
4891*7c568831SAndroid Build Coastguard Worker *
4892*7c568831SAndroid Build Coastguard Worker * Returns zero if no parsing was possible
4893*7c568831SAndroid Build Coastguard Worker */
4894*7c568831SAndroid Build Coastguard Worker static int
htmlParseTryOrFinish(htmlParserCtxtPtr ctxt,int terminate)4895*7c568831SAndroid Build Coastguard Worker htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
4896*7c568831SAndroid Build Coastguard Worker int ret = 0;
4897*7c568831SAndroid Build Coastguard Worker htmlParserInputPtr in;
4898*7c568831SAndroid Build Coastguard Worker ptrdiff_t avail = 0;
4899*7c568831SAndroid Build Coastguard Worker int cur;
4900*7c568831SAndroid Build Coastguard Worker
4901*7c568831SAndroid Build Coastguard Worker htmlParserNodeInfo node_info;
4902*7c568831SAndroid Build Coastguard Worker
4903*7c568831SAndroid Build Coastguard Worker while (PARSER_STOPPED(ctxt) == 0) {
4904*7c568831SAndroid Build Coastguard Worker
4905*7c568831SAndroid Build Coastguard Worker in = ctxt->input;
4906*7c568831SAndroid Build Coastguard Worker if (in == NULL) break;
4907*7c568831SAndroid Build Coastguard Worker avail = in->end - in->cur;
4908*7c568831SAndroid Build Coastguard Worker if ((avail == 0) && (terminate)) {
4909*7c568831SAndroid Build Coastguard Worker htmlAutoCloseOnEnd(ctxt);
4910*7c568831SAndroid Build Coastguard Worker if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
4911*7c568831SAndroid Build Coastguard Worker /*
4912*7c568831SAndroid Build Coastguard Worker * SAX: end of the document processing.
4913*7c568831SAndroid Build Coastguard Worker */
4914*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_EOF;
4915*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4916*7c568831SAndroid Build Coastguard Worker ctxt->sax->endDocument(ctxt->userData);
4917*7c568831SAndroid Build Coastguard Worker }
4918*7c568831SAndroid Build Coastguard Worker }
4919*7c568831SAndroid Build Coastguard Worker if (avail < 1)
4920*7c568831SAndroid Build Coastguard Worker goto done;
4921*7c568831SAndroid Build Coastguard Worker cur = in->cur[0];
4922*7c568831SAndroid Build Coastguard Worker
4923*7c568831SAndroid Build Coastguard Worker switch (ctxt->instate) {
4924*7c568831SAndroid Build Coastguard Worker case XML_PARSER_EOF:
4925*7c568831SAndroid Build Coastguard Worker /*
4926*7c568831SAndroid Build Coastguard Worker * Document parsing is done !
4927*7c568831SAndroid Build Coastguard Worker */
4928*7c568831SAndroid Build Coastguard Worker goto done;
4929*7c568831SAndroid Build Coastguard Worker case XML_PARSER_START:
4930*7c568831SAndroid Build Coastguard Worker /*
4931*7c568831SAndroid Build Coastguard Worker * This is wrong but matches long-standing behavior. In most
4932*7c568831SAndroid Build Coastguard Worker * cases, a document starting with an XML declaration will
4933*7c568831SAndroid Build Coastguard Worker * specify UTF-8.
4934*7c568831SAndroid Build Coastguard Worker */
4935*7c568831SAndroid Build Coastguard Worker if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
4936*7c568831SAndroid Build Coastguard Worker (xmlStrncmp(ctxt->input->cur, BAD_CAST "<?xm", 4) == 0)) {
4937*7c568831SAndroid Build Coastguard Worker xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_UTF8);
4938*7c568831SAndroid Build Coastguard Worker }
4939*7c568831SAndroid Build Coastguard Worker
4940*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
4941*7c568831SAndroid Build Coastguard Worker ctxt->sax->setDocumentLocator(ctxt->userData,
4942*7c568831SAndroid Build Coastguard Worker (xmlSAXLocator *) &xmlDefaultSAXLocator);
4943*7c568831SAndroid Build Coastguard Worker }
4944*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax) && (ctxt->sax->startDocument) &&
4945*7c568831SAndroid Build Coastguard Worker (!ctxt->disableSAX))
4946*7c568831SAndroid Build Coastguard Worker ctxt->sax->startDocument(ctxt->userData);
4947*7c568831SAndroid Build Coastguard Worker
4948*7c568831SAndroid Build Coastguard Worker /* Allow callback to modify state */
4949*7c568831SAndroid Build Coastguard Worker if (ctxt->instate == XML_PARSER_START)
4950*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_MISC;
4951*7c568831SAndroid Build Coastguard Worker break;
4952*7c568831SAndroid Build Coastguard Worker case XML_PARSER_START_TAG: {
4953*7c568831SAndroid Build Coastguard Worker const xmlChar *name;
4954*7c568831SAndroid Build Coastguard Worker int next;
4955*7c568831SAndroid Build Coastguard Worker const htmlElemDesc * info;
4956*7c568831SAndroid Build Coastguard Worker
4957*7c568831SAndroid Build Coastguard Worker /*
4958*7c568831SAndroid Build Coastguard Worker * not enough chars in buffer
4959*7c568831SAndroid Build Coastguard Worker */
4960*7c568831SAndroid Build Coastguard Worker if (avail < 2)
4961*7c568831SAndroid Build Coastguard Worker goto done;
4962*7c568831SAndroid Build Coastguard Worker cur = in->cur[0];
4963*7c568831SAndroid Build Coastguard Worker next = in->cur[1];
4964*7c568831SAndroid Build Coastguard Worker if (cur != '<') {
4965*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_CONTENT;
4966*7c568831SAndroid Build Coastguard Worker break;
4967*7c568831SAndroid Build Coastguard Worker }
4968*7c568831SAndroid Build Coastguard Worker if (next == '/') {
4969*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_END_TAG;
4970*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = 0;
4971*7c568831SAndroid Build Coastguard Worker break;
4972*7c568831SAndroid Build Coastguard Worker }
4973*7c568831SAndroid Build Coastguard Worker if ((!terminate) &&
4974*7c568831SAndroid Build Coastguard Worker (htmlParseLookupGt(ctxt) < 0))
4975*7c568831SAndroid Build Coastguard Worker goto done;
4976*7c568831SAndroid Build Coastguard Worker
4977*7c568831SAndroid Build Coastguard Worker /* Capture start position */
4978*7c568831SAndroid Build Coastguard Worker if (ctxt->record_info) {
4979*7c568831SAndroid Build Coastguard Worker node_info.begin_pos = ctxt->input->consumed +
4980*7c568831SAndroid Build Coastguard Worker (CUR_PTR - ctxt->input->base);
4981*7c568831SAndroid Build Coastguard Worker node_info.begin_line = ctxt->input->line;
4982*7c568831SAndroid Build Coastguard Worker }
4983*7c568831SAndroid Build Coastguard Worker
4984*7c568831SAndroid Build Coastguard Worker
4985*7c568831SAndroid Build Coastguard Worker htmlParseStartTag(ctxt);
4986*7c568831SAndroid Build Coastguard Worker name = ctxt->name;
4987*7c568831SAndroid Build Coastguard Worker if (name == NULL)
4988*7c568831SAndroid Build Coastguard Worker break;
4989*7c568831SAndroid Build Coastguard Worker
4990*7c568831SAndroid Build Coastguard Worker /*
4991*7c568831SAndroid Build Coastguard Worker * Check for an Empty Element labeled the XML/SGML way
4992*7c568831SAndroid Build Coastguard Worker */
4993*7c568831SAndroid Build Coastguard Worker if ((CUR == '/') && (NXT(1) == '>')) {
4994*7c568831SAndroid Build Coastguard Worker SKIP(2);
4995*7c568831SAndroid Build Coastguard Worker htmlParserFinishElementParsing(ctxt);
4996*7c568831SAndroid Build Coastguard Worker if ((ctxt->options & HTML_PARSE_HTML5) == 0) {
4997*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) &&
4998*7c568831SAndroid Build Coastguard Worker (ctxt->sax->endElement != NULL))
4999*7c568831SAndroid Build Coastguard Worker ctxt->sax->endElement(ctxt->userData, name);
5000*7c568831SAndroid Build Coastguard Worker }
5001*7c568831SAndroid Build Coastguard Worker htmlnamePop(ctxt);
5002*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_CONTENT;
5003*7c568831SAndroid Build Coastguard Worker break;
5004*7c568831SAndroid Build Coastguard Worker }
5005*7c568831SAndroid Build Coastguard Worker
5006*7c568831SAndroid Build Coastguard Worker if (CUR != '>')
5007*7c568831SAndroid Build Coastguard Worker break;
5008*7c568831SAndroid Build Coastguard Worker SKIP(1);
5009*7c568831SAndroid Build Coastguard Worker
5010*7c568831SAndroid Build Coastguard Worker /*
5011*7c568831SAndroid Build Coastguard Worker * Lookup the info for that element.
5012*7c568831SAndroid Build Coastguard Worker */
5013*7c568831SAndroid Build Coastguard Worker info = htmlTagLookup(name);
5014*7c568831SAndroid Build Coastguard Worker
5015*7c568831SAndroid Build Coastguard Worker /*
5016*7c568831SAndroid Build Coastguard Worker * Check for an Empty Element from DTD definition
5017*7c568831SAndroid Build Coastguard Worker */
5018*7c568831SAndroid Build Coastguard Worker if ((info != NULL) && (info->empty)) {
5019*7c568831SAndroid Build Coastguard Worker htmlParserFinishElementParsing(ctxt);
5020*7c568831SAndroid Build Coastguard Worker if ((ctxt->options & HTML_PARSE_HTML5) == 0) {
5021*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) &&
5022*7c568831SAndroid Build Coastguard Worker (ctxt->sax->endElement != NULL))
5023*7c568831SAndroid Build Coastguard Worker ctxt->sax->endElement(ctxt->userData, name);
5024*7c568831SAndroid Build Coastguard Worker }
5025*7c568831SAndroid Build Coastguard Worker htmlnamePop(ctxt);
5026*7c568831SAndroid Build Coastguard Worker }
5027*7c568831SAndroid Build Coastguard Worker
5028*7c568831SAndroid Build Coastguard Worker if (info != NULL)
5029*7c568831SAndroid Build Coastguard Worker ctxt->endCheckState = info->dataMode;
5030*7c568831SAndroid Build Coastguard Worker
5031*7c568831SAndroid Build Coastguard Worker if (ctxt->record_info)
5032*7c568831SAndroid Build Coastguard Worker htmlNodeInfoPush(ctxt, &node_info);
5033*7c568831SAndroid Build Coastguard Worker
5034*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_CONTENT;
5035*7c568831SAndroid Build Coastguard Worker break;
5036*7c568831SAndroid Build Coastguard Worker }
5037*7c568831SAndroid Build Coastguard Worker case XML_PARSER_MISC:
5038*7c568831SAndroid Build Coastguard Worker case XML_PARSER_PROLOG:
5039*7c568831SAndroid Build Coastguard Worker case XML_PARSER_CONTENT:
5040*7c568831SAndroid Build Coastguard Worker case XML_PARSER_EPILOG: {
5041*7c568831SAndroid Build Coastguard Worker int mode;
5042*7c568831SAndroid Build Coastguard Worker
5043*7c568831SAndroid Build Coastguard Worker if ((ctxt->instate == XML_PARSER_MISC) ||
5044*7c568831SAndroid Build Coastguard Worker (ctxt->instate == XML_PARSER_PROLOG)) {
5045*7c568831SAndroid Build Coastguard Worker SKIP_BLANKS;
5046*7c568831SAndroid Build Coastguard Worker avail = in->end - in->cur;
5047*7c568831SAndroid Build Coastguard Worker }
5048*7c568831SAndroid Build Coastguard Worker
5049*7c568831SAndroid Build Coastguard Worker if (avail < 1)
5050*7c568831SAndroid Build Coastguard Worker goto done;
5051*7c568831SAndroid Build Coastguard Worker cur = in->cur[0];
5052*7c568831SAndroid Build Coastguard Worker mode = ctxt->endCheckState;
5053*7c568831SAndroid Build Coastguard Worker
5054*7c568831SAndroid Build Coastguard Worker if (mode != 0) {
5055*7c568831SAndroid Build Coastguard Worker while ((PARSER_STOPPED(ctxt) == 0) &&
5056*7c568831SAndroid Build Coastguard Worker (in->cur < in->end)) {
5057*7c568831SAndroid Build Coastguard Worker size_t extra;
5058*7c568831SAndroid Build Coastguard Worker
5059*7c568831SAndroid Build Coastguard Worker extra = strlen((const char *) ctxt->name) + 2;
5060*7c568831SAndroid Build Coastguard Worker
5061*7c568831SAndroid Build Coastguard Worker if ((!terminate) &&
5062*7c568831SAndroid Build Coastguard Worker (htmlParseLookupString(ctxt, 0, "<", 1,
5063*7c568831SAndroid Build Coastguard Worker extra) < 0))
5064*7c568831SAndroid Build Coastguard Worker goto done;
5065*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = 0;
5066*7c568831SAndroid Build Coastguard Worker
5067*7c568831SAndroid Build Coastguard Worker if (htmlParseCharData(ctxt))
5068*7c568831SAndroid Build Coastguard Worker break;
5069*7c568831SAndroid Build Coastguard Worker }
5070*7c568831SAndroid Build Coastguard Worker
5071*7c568831SAndroid Build Coastguard Worker break;
5072*7c568831SAndroid Build Coastguard Worker } else if (cur == '<') {
5073*7c568831SAndroid Build Coastguard Worker int next;
5074*7c568831SAndroid Build Coastguard Worker
5075*7c568831SAndroid Build Coastguard Worker if (avail < 2) {
5076*7c568831SAndroid Build Coastguard Worker if (!terminate)
5077*7c568831SAndroid Build Coastguard Worker goto done;
5078*7c568831SAndroid Build Coastguard Worker next = ' ';
5079*7c568831SAndroid Build Coastguard Worker } else {
5080*7c568831SAndroid Build Coastguard Worker next = in->cur[1];
5081*7c568831SAndroid Build Coastguard Worker }
5082*7c568831SAndroid Build Coastguard Worker
5083*7c568831SAndroid Build Coastguard Worker if (next == '!') {
5084*7c568831SAndroid Build Coastguard Worker if ((!terminate) && (avail < 4))
5085*7c568831SAndroid Build Coastguard Worker goto done;
5086*7c568831SAndroid Build Coastguard Worker if ((in->cur[2] == '-') && (in->cur[3] == '-')) {
5087*7c568831SAndroid Build Coastguard Worker if ((!terminate) &&
5088*7c568831SAndroid Build Coastguard Worker (htmlParseLookupCommentEnd(ctxt) < 0))
5089*7c568831SAndroid Build Coastguard Worker goto done;
5090*7c568831SAndroid Build Coastguard Worker SKIP(4);
5091*7c568831SAndroid Build Coastguard Worker htmlParseComment(ctxt, /* bogus */ 0);
5092*7c568831SAndroid Build Coastguard Worker break;
5093*7c568831SAndroid Build Coastguard Worker }
5094*7c568831SAndroid Build Coastguard Worker
5095*7c568831SAndroid Build Coastguard Worker if ((!terminate) && (avail < 9))
5096*7c568831SAndroid Build Coastguard Worker goto done;
5097*7c568831SAndroid Build Coastguard Worker if ((UPP(2) == 'D') && (UPP(3) == 'O') &&
5098*7c568831SAndroid Build Coastguard Worker (UPP(4) == 'C') && (UPP(5) == 'T') &&
5099*7c568831SAndroid Build Coastguard Worker (UPP(6) == 'Y') && (UPP(7) == 'P') &&
5100*7c568831SAndroid Build Coastguard Worker (UPP(8) == 'E')) {
5101*7c568831SAndroid Build Coastguard Worker if ((!terminate) &&
5102*7c568831SAndroid Build Coastguard Worker (htmlParseLookupString(ctxt, 9, ">", 1,
5103*7c568831SAndroid Build Coastguard Worker 0) < 0))
5104*7c568831SAndroid Build Coastguard Worker goto done;
5105*7c568831SAndroid Build Coastguard Worker htmlParseDocTypeDecl(ctxt);
5106*7c568831SAndroid Build Coastguard Worker if (ctxt->instate == XML_PARSER_MISC)
5107*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_PROLOG;
5108*7c568831SAndroid Build Coastguard Worker } else {
5109*7c568831SAndroid Build Coastguard Worker if ((!terminate) &&
5110*7c568831SAndroid Build Coastguard Worker (htmlParseLookupString(ctxt, 2, ">", 1, 0) < 0))
5111*7c568831SAndroid Build Coastguard Worker goto done;
5112*7c568831SAndroid Build Coastguard Worker SKIP(2);
5113*7c568831SAndroid Build Coastguard Worker htmlParseComment(ctxt, /* bogus */ 1);
5114*7c568831SAndroid Build Coastguard Worker }
5115*7c568831SAndroid Build Coastguard Worker } else if (next == '?') {
5116*7c568831SAndroid Build Coastguard Worker if ((!terminate) &&
5117*7c568831SAndroid Build Coastguard Worker (htmlParseLookupString(ctxt, 2, ">", 1, 0) < 0))
5118*7c568831SAndroid Build Coastguard Worker goto done;
5119*7c568831SAndroid Build Coastguard Worker SKIP(1);
5120*7c568831SAndroid Build Coastguard Worker htmlParseComment(ctxt, /* bogus */ 1);
5121*7c568831SAndroid Build Coastguard Worker } else if (next == '/') {
5122*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_END_TAG;
5123*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = 0;
5124*7c568831SAndroid Build Coastguard Worker break;
5125*7c568831SAndroid Build Coastguard Worker } else if (IS_ASCII_LETTER(next)) {
5126*7c568831SAndroid Build Coastguard Worker if ((!terminate) && (next == 0))
5127*7c568831SAndroid Build Coastguard Worker goto done;
5128*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_START_TAG;
5129*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = 0;
5130*7c568831SAndroid Build Coastguard Worker break;
5131*7c568831SAndroid Build Coastguard Worker } else {
5132*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_CONTENT;
5133*7c568831SAndroid Build Coastguard Worker htmlCheckParagraph(ctxt);
5134*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5135*7c568831SAndroid Build Coastguard Worker (ctxt->sax->characters != NULL))
5136*7c568831SAndroid Build Coastguard Worker ctxt->sax->characters(ctxt->userData,
5137*7c568831SAndroid Build Coastguard Worker BAD_CAST "<", 1);
5138*7c568831SAndroid Build Coastguard Worker SKIP(1);
5139*7c568831SAndroid Build Coastguard Worker }
5140*7c568831SAndroid Build Coastguard Worker } else {
5141*7c568831SAndroid Build Coastguard Worker /*
5142*7c568831SAndroid Build Coastguard Worker * check that the text sequence is complete
5143*7c568831SAndroid Build Coastguard Worker * before handing out the data to the parser
5144*7c568831SAndroid Build Coastguard Worker * to avoid problems with erroneous end of
5145*7c568831SAndroid Build Coastguard Worker * data detection.
5146*7c568831SAndroid Build Coastguard Worker */
5147*7c568831SAndroid Build Coastguard Worker if ((!terminate) &&
5148*7c568831SAndroid Build Coastguard Worker (htmlParseLookupString(ctxt, 0, "<", 1, 0) < 0))
5149*7c568831SAndroid Build Coastguard Worker goto done;
5150*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = 0;
5151*7c568831SAndroid Build Coastguard Worker htmlParseCharData(ctxt);
5152*7c568831SAndroid Build Coastguard Worker }
5153*7c568831SAndroid Build Coastguard Worker
5154*7c568831SAndroid Build Coastguard Worker break;
5155*7c568831SAndroid Build Coastguard Worker }
5156*7c568831SAndroid Build Coastguard Worker case XML_PARSER_END_TAG:
5157*7c568831SAndroid Build Coastguard Worker if ((terminate) && (avail == 2)) {
5158*7c568831SAndroid Build Coastguard Worker htmlCheckParagraph(ctxt);
5159*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5160*7c568831SAndroid Build Coastguard Worker (ctxt->sax->characters != NULL))
5161*7c568831SAndroid Build Coastguard Worker ctxt->sax->characters(ctxt->userData,
5162*7c568831SAndroid Build Coastguard Worker BAD_CAST "</", 2);
5163*7c568831SAndroid Build Coastguard Worker goto done;
5164*7c568831SAndroid Build Coastguard Worker }
5165*7c568831SAndroid Build Coastguard Worker if ((!terminate) &&
5166*7c568831SAndroid Build Coastguard Worker (htmlParseLookupGt(ctxt) < 0))
5167*7c568831SAndroid Build Coastguard Worker goto done;
5168*7c568831SAndroid Build Coastguard Worker htmlParseEndTag(ctxt);
5169*7c568831SAndroid Build Coastguard Worker if (ctxt->nameNr == 0) {
5170*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_EPILOG;
5171*7c568831SAndroid Build Coastguard Worker } else {
5172*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_CONTENT;
5173*7c568831SAndroid Build Coastguard Worker }
5174*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = 0;
5175*7c568831SAndroid Build Coastguard Worker break;
5176*7c568831SAndroid Build Coastguard Worker default:
5177*7c568831SAndroid Build Coastguard Worker htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5178*7c568831SAndroid Build Coastguard Worker "HPP: internal error\n", NULL, NULL);
5179*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_EOF;
5180*7c568831SAndroid Build Coastguard Worker break;
5181*7c568831SAndroid Build Coastguard Worker }
5182*7c568831SAndroid Build Coastguard Worker }
5183*7c568831SAndroid Build Coastguard Worker done:
5184*7c568831SAndroid Build Coastguard Worker if ((avail == 0) && (terminate)) {
5185*7c568831SAndroid Build Coastguard Worker htmlAutoCloseOnEnd(ctxt);
5186*7c568831SAndroid Build Coastguard Worker if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5187*7c568831SAndroid Build Coastguard Worker /*
5188*7c568831SAndroid Build Coastguard Worker * SAX: end of the document processing.
5189*7c568831SAndroid Build Coastguard Worker */
5190*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_EOF;
5191*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5192*7c568831SAndroid Build Coastguard Worker ctxt->sax->endDocument(ctxt->userData);
5193*7c568831SAndroid Build Coastguard Worker }
5194*7c568831SAndroid Build Coastguard Worker }
5195*7c568831SAndroid Build Coastguard Worker if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL) &&
5196*7c568831SAndroid Build Coastguard Worker ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
5197*7c568831SAndroid Build Coastguard Worker (ctxt->instate == XML_PARSER_EPILOG))) {
5198*7c568831SAndroid Build Coastguard Worker xmlDtdPtr dtd;
5199*7c568831SAndroid Build Coastguard Worker dtd = xmlGetIntSubset(ctxt->myDoc);
5200*7c568831SAndroid Build Coastguard Worker if (dtd == NULL) {
5201*7c568831SAndroid Build Coastguard Worker ctxt->myDoc->intSubset =
5202*7c568831SAndroid Build Coastguard Worker xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
5203*7c568831SAndroid Build Coastguard Worker BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
5204*7c568831SAndroid Build Coastguard Worker BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
5205*7c568831SAndroid Build Coastguard Worker if (ctxt->myDoc->intSubset == NULL)
5206*7c568831SAndroid Build Coastguard Worker htmlErrMemory(ctxt);
5207*7c568831SAndroid Build Coastguard Worker }
5208*7c568831SAndroid Build Coastguard Worker }
5209*7c568831SAndroid Build Coastguard Worker return(ret);
5210*7c568831SAndroid Build Coastguard Worker }
5211*7c568831SAndroid Build Coastguard Worker
5212*7c568831SAndroid Build Coastguard Worker /**
5213*7c568831SAndroid Build Coastguard Worker * htmlParseChunk:
5214*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
5215*7c568831SAndroid Build Coastguard Worker * @chunk: chunk of memory
5216*7c568831SAndroid Build Coastguard Worker * @size: size of chunk in bytes
5217*7c568831SAndroid Build Coastguard Worker * @terminate: last chunk indicator
5218*7c568831SAndroid Build Coastguard Worker *
5219*7c568831SAndroid Build Coastguard Worker * Parse a chunk of memory in push parser mode.
5220*7c568831SAndroid Build Coastguard Worker *
5221*7c568831SAndroid Build Coastguard Worker * Assumes that the parser context was initialized with
5222*7c568831SAndroid Build Coastguard Worker * htmlCreatePushParserCtxt.
5223*7c568831SAndroid Build Coastguard Worker *
5224*7c568831SAndroid Build Coastguard Worker * The last chunk, which will often be empty, must be marked with
5225*7c568831SAndroid Build Coastguard Worker * the @terminate flag. With the default SAX callbacks, the resulting
5226*7c568831SAndroid Build Coastguard Worker * document will be available in ctxt->myDoc. This pointer will not
5227*7c568831SAndroid Build Coastguard Worker * be freed by the library.
5228*7c568831SAndroid Build Coastguard Worker *
5229*7c568831SAndroid Build Coastguard Worker * If the document isn't well-formed, ctxt->myDoc is set to NULL.
5230*7c568831SAndroid Build Coastguard Worker *
5231*7c568831SAndroid Build Coastguard Worker * Returns an xmlParserErrors code (0 on success).
5232*7c568831SAndroid Build Coastguard Worker */
5233*7c568831SAndroid Build Coastguard Worker int
htmlParseChunk(htmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)5234*7c568831SAndroid Build Coastguard Worker htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
5235*7c568831SAndroid Build Coastguard Worker int terminate) {
5236*7c568831SAndroid Build Coastguard Worker if ((ctxt == NULL) || (ctxt->input == NULL))
5237*7c568831SAndroid Build Coastguard Worker return(XML_ERR_ARGUMENT);
5238*7c568831SAndroid Build Coastguard Worker if (PARSER_STOPPED(ctxt) != 0)
5239*7c568831SAndroid Build Coastguard Worker return(ctxt->errNo);
5240*7c568831SAndroid Build Coastguard Worker if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
5241*7c568831SAndroid Build Coastguard Worker (ctxt->input->buf != NULL)) {
5242*7c568831SAndroid Build Coastguard Worker size_t pos = ctxt->input->cur - ctxt->input->base;
5243*7c568831SAndroid Build Coastguard Worker int res;
5244*7c568831SAndroid Build Coastguard Worker
5245*7c568831SAndroid Build Coastguard Worker res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
5246*7c568831SAndroid Build Coastguard Worker xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
5247*7c568831SAndroid Build Coastguard Worker if (res < 0) {
5248*7c568831SAndroid Build Coastguard Worker htmlParseErr(ctxt, ctxt->input->buf->error,
5249*7c568831SAndroid Build Coastguard Worker "xmlParserInputBufferPush failed", NULL, NULL);
5250*7c568831SAndroid Build Coastguard Worker xmlHaltParser(ctxt);
5251*7c568831SAndroid Build Coastguard Worker return (ctxt->errNo);
5252*7c568831SAndroid Build Coastguard Worker }
5253*7c568831SAndroid Build Coastguard Worker }
5254*7c568831SAndroid Build Coastguard Worker htmlParseTryOrFinish(ctxt, terminate);
5255*7c568831SAndroid Build Coastguard Worker if (terminate) {
5256*7c568831SAndroid Build Coastguard Worker if (ctxt->instate != XML_PARSER_EOF) {
5257*7c568831SAndroid Build Coastguard Worker if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5258*7c568831SAndroid Build Coastguard Worker ctxt->sax->endDocument(ctxt->userData);
5259*7c568831SAndroid Build Coastguard Worker }
5260*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_EOF;
5261*7c568831SAndroid Build Coastguard Worker }
5262*7c568831SAndroid Build Coastguard Worker return((xmlParserErrors) ctxt->errNo);
5263*7c568831SAndroid Build Coastguard Worker }
5264*7c568831SAndroid Build Coastguard Worker
5265*7c568831SAndroid Build Coastguard Worker /************************************************************************
5266*7c568831SAndroid Build Coastguard Worker * *
5267*7c568831SAndroid Build Coastguard Worker * User entry points *
5268*7c568831SAndroid Build Coastguard Worker * *
5269*7c568831SAndroid Build Coastguard Worker ************************************************************************/
5270*7c568831SAndroid Build Coastguard Worker
5271*7c568831SAndroid Build Coastguard Worker /**
5272*7c568831SAndroid Build Coastguard Worker * htmlCreatePushParserCtxt:
5273*7c568831SAndroid Build Coastguard Worker * @sax: a SAX handler (optional)
5274*7c568831SAndroid Build Coastguard Worker * @user_data: The user data returned on SAX callbacks (optional)
5275*7c568831SAndroid Build Coastguard Worker * @chunk: a pointer to an array of chars (optional)
5276*7c568831SAndroid Build Coastguard Worker * @size: number of chars in the array
5277*7c568831SAndroid Build Coastguard Worker * @filename: only used for error reporting (optional)
5278*7c568831SAndroid Build Coastguard Worker * @enc: encoding (deprecated, pass XML_CHAR_ENCODING_NONE)
5279*7c568831SAndroid Build Coastguard Worker *
5280*7c568831SAndroid Build Coastguard Worker * Create a parser context for using the HTML parser in push mode.
5281*7c568831SAndroid Build Coastguard Worker *
5282*7c568831SAndroid Build Coastguard Worker * Returns the new parser context or NULL if a memory allocation
5283*7c568831SAndroid Build Coastguard Worker * failed.
5284*7c568831SAndroid Build Coastguard Worker */
5285*7c568831SAndroid Build Coastguard Worker htmlParserCtxtPtr
htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename,xmlCharEncoding enc)5286*7c568831SAndroid Build Coastguard Worker htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
5287*7c568831SAndroid Build Coastguard Worker const char *chunk, int size, const char *filename,
5288*7c568831SAndroid Build Coastguard Worker xmlCharEncoding enc) {
5289*7c568831SAndroid Build Coastguard Worker htmlParserCtxtPtr ctxt;
5290*7c568831SAndroid Build Coastguard Worker htmlParserInputPtr input;
5291*7c568831SAndroid Build Coastguard Worker const char *encoding;
5292*7c568831SAndroid Build Coastguard Worker
5293*7c568831SAndroid Build Coastguard Worker ctxt = htmlNewSAXParserCtxt(sax, user_data);
5294*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
5295*7c568831SAndroid Build Coastguard Worker return(NULL);
5296*7c568831SAndroid Build Coastguard Worker
5297*7c568831SAndroid Build Coastguard Worker encoding = xmlGetCharEncodingName(enc);
5298*7c568831SAndroid Build Coastguard Worker input = xmlNewPushInput(filename, chunk, size);
5299*7c568831SAndroid Build Coastguard Worker if (input == NULL) {
5300*7c568831SAndroid Build Coastguard Worker htmlFreeParserCtxt(ctxt);
5301*7c568831SAndroid Build Coastguard Worker return(NULL);
5302*7c568831SAndroid Build Coastguard Worker }
5303*7c568831SAndroid Build Coastguard Worker
5304*7c568831SAndroid Build Coastguard Worker if (inputPush(ctxt, input) < 0) {
5305*7c568831SAndroid Build Coastguard Worker xmlFreeInputStream(input);
5306*7c568831SAndroid Build Coastguard Worker xmlFreeParserCtxt(ctxt);
5307*7c568831SAndroid Build Coastguard Worker return(NULL);
5308*7c568831SAndroid Build Coastguard Worker }
5309*7c568831SAndroid Build Coastguard Worker
5310*7c568831SAndroid Build Coastguard Worker if (encoding != NULL)
5311*7c568831SAndroid Build Coastguard Worker xmlSwitchEncodingName(ctxt, encoding);
5312*7c568831SAndroid Build Coastguard Worker
5313*7c568831SAndroid Build Coastguard Worker return(ctxt);
5314*7c568831SAndroid Build Coastguard Worker }
5315*7c568831SAndroid Build Coastguard Worker #endif /* LIBXML_PUSH_ENABLED */
5316*7c568831SAndroid Build Coastguard Worker
5317*7c568831SAndroid Build Coastguard Worker /**
5318*7c568831SAndroid Build Coastguard Worker * htmlSAXParseDoc:
5319*7c568831SAndroid Build Coastguard Worker * @cur: a pointer to an array of xmlChar
5320*7c568831SAndroid Build Coastguard Worker * @encoding: a free form C string describing the HTML document encoding, or NULL
5321*7c568831SAndroid Build Coastguard Worker * @sax: the SAX handler block
5322*7c568831SAndroid Build Coastguard Worker * @userData: if using SAX, this pointer will be provided on callbacks.
5323*7c568831SAndroid Build Coastguard Worker *
5324*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Use htmlNewSAXParserCtxt and htmlCtxtReadDoc.
5325*7c568831SAndroid Build Coastguard Worker *
5326*7c568831SAndroid Build Coastguard Worker * Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks
5327*7c568831SAndroid Build Coastguard Worker * to handle parse events. If sax is NULL, fallback to the default DOM
5328*7c568831SAndroid Build Coastguard Worker * behavior and return a tree.
5329*7c568831SAndroid Build Coastguard Worker *
5330*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree unless SAX is NULL or the document is
5331*7c568831SAndroid Build Coastguard Worker * not well formed.
5332*7c568831SAndroid Build Coastguard Worker */
5333*7c568831SAndroid Build Coastguard Worker
5334*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlSAXParseDoc(const xmlChar * cur,const char * encoding,htmlSAXHandlerPtr sax,void * userData)5335*7c568831SAndroid Build Coastguard Worker htmlSAXParseDoc(const xmlChar *cur, const char *encoding,
5336*7c568831SAndroid Build Coastguard Worker htmlSAXHandlerPtr sax, void *userData) {
5337*7c568831SAndroid Build Coastguard Worker htmlDocPtr ret;
5338*7c568831SAndroid Build Coastguard Worker htmlParserCtxtPtr ctxt;
5339*7c568831SAndroid Build Coastguard Worker
5340*7c568831SAndroid Build Coastguard Worker if (cur == NULL)
5341*7c568831SAndroid Build Coastguard Worker return(NULL);
5342*7c568831SAndroid Build Coastguard Worker
5343*7c568831SAndroid Build Coastguard Worker ctxt = htmlCreateDocParserCtxt(cur, NULL, encoding);
5344*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
5345*7c568831SAndroid Build Coastguard Worker return(NULL);
5346*7c568831SAndroid Build Coastguard Worker
5347*7c568831SAndroid Build Coastguard Worker if (sax != NULL) {
5348*7c568831SAndroid Build Coastguard Worker *ctxt->sax = *sax;
5349*7c568831SAndroid Build Coastguard Worker ctxt->userData = userData;
5350*7c568831SAndroid Build Coastguard Worker }
5351*7c568831SAndroid Build Coastguard Worker
5352*7c568831SAndroid Build Coastguard Worker htmlParseDocument(ctxt);
5353*7c568831SAndroid Build Coastguard Worker ret = ctxt->myDoc;
5354*7c568831SAndroid Build Coastguard Worker htmlFreeParserCtxt(ctxt);
5355*7c568831SAndroid Build Coastguard Worker
5356*7c568831SAndroid Build Coastguard Worker return(ret);
5357*7c568831SAndroid Build Coastguard Worker }
5358*7c568831SAndroid Build Coastguard Worker
5359*7c568831SAndroid Build Coastguard Worker /**
5360*7c568831SAndroid Build Coastguard Worker * htmlParseDoc:
5361*7c568831SAndroid Build Coastguard Worker * @cur: a pointer to an array of xmlChar
5362*7c568831SAndroid Build Coastguard Worker * @encoding: the encoding (optional)
5363*7c568831SAndroid Build Coastguard Worker *
5364*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Use htmlReadDoc.
5365*7c568831SAndroid Build Coastguard Worker *
5366*7c568831SAndroid Build Coastguard Worker * Parse an HTML in-memory document and build a tree.
5367*7c568831SAndroid Build Coastguard Worker *
5368*7c568831SAndroid Build Coastguard Worker * This function uses deprecated global parser options.
5369*7c568831SAndroid Build Coastguard Worker *
5370*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree
5371*7c568831SAndroid Build Coastguard Worker */
5372*7c568831SAndroid Build Coastguard Worker
5373*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlParseDoc(const xmlChar * cur,const char * encoding)5374*7c568831SAndroid Build Coastguard Worker htmlParseDoc(const xmlChar *cur, const char *encoding) {
5375*7c568831SAndroid Build Coastguard Worker return(htmlSAXParseDoc(cur, encoding, NULL, NULL));
5376*7c568831SAndroid Build Coastguard Worker }
5377*7c568831SAndroid Build Coastguard Worker
5378*7c568831SAndroid Build Coastguard Worker
5379*7c568831SAndroid Build Coastguard Worker /**
5380*7c568831SAndroid Build Coastguard Worker * htmlCreateFileParserCtxt:
5381*7c568831SAndroid Build Coastguard Worker * @filename: the filename
5382*7c568831SAndroid Build Coastguard Worker * @encoding: optional encoding
5383*7c568831SAndroid Build Coastguard Worker *
5384*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Use htmlNewParserCtxt and htmlCtxtReadFile.
5385*7c568831SAndroid Build Coastguard Worker *
5386*7c568831SAndroid Build Coastguard Worker * Create a parser context to read from a file.
5387*7c568831SAndroid Build Coastguard Worker *
5388*7c568831SAndroid Build Coastguard Worker * A non-NULL encoding overrides encoding declarations in the document.
5389*7c568831SAndroid Build Coastguard Worker *
5390*7c568831SAndroid Build Coastguard Worker * Automatic support for ZLIB/Compress compressed document is provided
5391*7c568831SAndroid Build Coastguard Worker * by default if found at compile-time.
5392*7c568831SAndroid Build Coastguard Worker *
5393*7c568831SAndroid Build Coastguard Worker * Returns the new parser context or NULL if a memory allocation failed.
5394*7c568831SAndroid Build Coastguard Worker */
5395*7c568831SAndroid Build Coastguard Worker htmlParserCtxtPtr
htmlCreateFileParserCtxt(const char * filename,const char * encoding)5396*7c568831SAndroid Build Coastguard Worker htmlCreateFileParserCtxt(const char *filename, const char *encoding)
5397*7c568831SAndroid Build Coastguard Worker {
5398*7c568831SAndroid Build Coastguard Worker htmlParserCtxtPtr ctxt;
5399*7c568831SAndroid Build Coastguard Worker htmlParserInputPtr input;
5400*7c568831SAndroid Build Coastguard Worker
5401*7c568831SAndroid Build Coastguard Worker if (filename == NULL)
5402*7c568831SAndroid Build Coastguard Worker return(NULL);
5403*7c568831SAndroid Build Coastguard Worker
5404*7c568831SAndroid Build Coastguard Worker ctxt = htmlNewParserCtxt();
5405*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL) {
5406*7c568831SAndroid Build Coastguard Worker return(NULL);
5407*7c568831SAndroid Build Coastguard Worker }
5408*7c568831SAndroid Build Coastguard Worker
5409*7c568831SAndroid Build Coastguard Worker input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
5410*7c568831SAndroid Build Coastguard Worker if (input == NULL) {
5411*7c568831SAndroid Build Coastguard Worker xmlFreeParserCtxt(ctxt);
5412*7c568831SAndroid Build Coastguard Worker return(NULL);
5413*7c568831SAndroid Build Coastguard Worker }
5414*7c568831SAndroid Build Coastguard Worker if (inputPush(ctxt, input) < 0) {
5415*7c568831SAndroid Build Coastguard Worker xmlFreeInputStream(input);
5416*7c568831SAndroid Build Coastguard Worker xmlFreeParserCtxt(ctxt);
5417*7c568831SAndroid Build Coastguard Worker return(NULL);
5418*7c568831SAndroid Build Coastguard Worker }
5419*7c568831SAndroid Build Coastguard Worker
5420*7c568831SAndroid Build Coastguard Worker return(ctxt);
5421*7c568831SAndroid Build Coastguard Worker }
5422*7c568831SAndroid Build Coastguard Worker
5423*7c568831SAndroid Build Coastguard Worker /**
5424*7c568831SAndroid Build Coastguard Worker * htmlSAXParseFile:
5425*7c568831SAndroid Build Coastguard Worker * @filename: the filename
5426*7c568831SAndroid Build Coastguard Worker * @encoding: encoding (optional)
5427*7c568831SAndroid Build Coastguard Worker * @sax: the SAX handler block
5428*7c568831SAndroid Build Coastguard Worker * @userData: if using SAX, this pointer will be provided on callbacks.
5429*7c568831SAndroid Build Coastguard Worker *
5430*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Use htmlNewSAXParserCtxt and htmlCtxtReadFile.
5431*7c568831SAndroid Build Coastguard Worker *
5432*7c568831SAndroid Build Coastguard Worker * parse an HTML file and build a tree. Automatic support for ZLIB/Compress
5433*7c568831SAndroid Build Coastguard Worker * compressed document is provided by default if found at compile-time.
5434*7c568831SAndroid Build Coastguard Worker * It use the given SAX function block to handle the parsing callback.
5435*7c568831SAndroid Build Coastguard Worker * If sax is NULL, fallback to the default DOM tree building routines.
5436*7c568831SAndroid Build Coastguard Worker *
5437*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree unless SAX is NULL or the document is
5438*7c568831SAndroid Build Coastguard Worker * not well formed.
5439*7c568831SAndroid Build Coastguard Worker */
5440*7c568831SAndroid Build Coastguard Worker
5441*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlSAXParseFile(const char * filename,const char * encoding,htmlSAXHandlerPtr sax,void * userData)5442*7c568831SAndroid Build Coastguard Worker htmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandlerPtr sax,
5443*7c568831SAndroid Build Coastguard Worker void *userData) {
5444*7c568831SAndroid Build Coastguard Worker htmlDocPtr ret;
5445*7c568831SAndroid Build Coastguard Worker htmlParserCtxtPtr ctxt;
5446*7c568831SAndroid Build Coastguard Worker htmlSAXHandlerPtr oldsax = NULL;
5447*7c568831SAndroid Build Coastguard Worker
5448*7c568831SAndroid Build Coastguard Worker ctxt = htmlCreateFileParserCtxt(filename, encoding);
5449*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL) return(NULL);
5450*7c568831SAndroid Build Coastguard Worker if (sax != NULL) {
5451*7c568831SAndroid Build Coastguard Worker oldsax = ctxt->sax;
5452*7c568831SAndroid Build Coastguard Worker ctxt->sax = sax;
5453*7c568831SAndroid Build Coastguard Worker ctxt->userData = userData;
5454*7c568831SAndroid Build Coastguard Worker }
5455*7c568831SAndroid Build Coastguard Worker
5456*7c568831SAndroid Build Coastguard Worker htmlParseDocument(ctxt);
5457*7c568831SAndroid Build Coastguard Worker
5458*7c568831SAndroid Build Coastguard Worker ret = ctxt->myDoc;
5459*7c568831SAndroid Build Coastguard Worker if (sax != NULL) {
5460*7c568831SAndroid Build Coastguard Worker ctxt->sax = oldsax;
5461*7c568831SAndroid Build Coastguard Worker ctxt->userData = NULL;
5462*7c568831SAndroid Build Coastguard Worker }
5463*7c568831SAndroid Build Coastguard Worker htmlFreeParserCtxt(ctxt);
5464*7c568831SAndroid Build Coastguard Worker
5465*7c568831SAndroid Build Coastguard Worker return(ret);
5466*7c568831SAndroid Build Coastguard Worker }
5467*7c568831SAndroid Build Coastguard Worker
5468*7c568831SAndroid Build Coastguard Worker /**
5469*7c568831SAndroid Build Coastguard Worker * htmlParseFile:
5470*7c568831SAndroid Build Coastguard Worker * @filename: the filename
5471*7c568831SAndroid Build Coastguard Worker * @encoding: encoding (optional)
5472*7c568831SAndroid Build Coastguard Worker *
5473*7c568831SAndroid Build Coastguard Worker * Parse an HTML file and build a tree.
5474*7c568831SAndroid Build Coastguard Worker *
5475*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree
5476*7c568831SAndroid Build Coastguard Worker */
5477*7c568831SAndroid Build Coastguard Worker
5478*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlParseFile(const char * filename,const char * encoding)5479*7c568831SAndroid Build Coastguard Worker htmlParseFile(const char *filename, const char *encoding) {
5480*7c568831SAndroid Build Coastguard Worker return(htmlSAXParseFile(filename, encoding, NULL, NULL));
5481*7c568831SAndroid Build Coastguard Worker }
5482*7c568831SAndroid Build Coastguard Worker
5483*7c568831SAndroid Build Coastguard Worker /**
5484*7c568831SAndroid Build Coastguard Worker * htmlHandleOmittedElem:
5485*7c568831SAndroid Build Coastguard Worker * @val: int 0 or 1
5486*7c568831SAndroid Build Coastguard Worker *
5487*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Use HTML_PARSE_NOIMPLIED
5488*7c568831SAndroid Build Coastguard Worker *
5489*7c568831SAndroid Build Coastguard Worker * Set and return the previous value for handling HTML omitted tags.
5490*7c568831SAndroid Build Coastguard Worker *
5491*7c568831SAndroid Build Coastguard Worker * Returns the last value for 0 for no handling, 1 for auto insertion.
5492*7c568831SAndroid Build Coastguard Worker */
5493*7c568831SAndroid Build Coastguard Worker
5494*7c568831SAndroid Build Coastguard Worker int
htmlHandleOmittedElem(int val)5495*7c568831SAndroid Build Coastguard Worker htmlHandleOmittedElem(int val) {
5496*7c568831SAndroid Build Coastguard Worker int old = htmlOmittedDefaultValue;
5497*7c568831SAndroid Build Coastguard Worker
5498*7c568831SAndroid Build Coastguard Worker htmlOmittedDefaultValue = val;
5499*7c568831SAndroid Build Coastguard Worker return(old);
5500*7c568831SAndroid Build Coastguard Worker }
5501*7c568831SAndroid Build Coastguard Worker
5502*7c568831SAndroid Build Coastguard Worker /**
5503*7c568831SAndroid Build Coastguard Worker * htmlElementAllowedHere:
5504*7c568831SAndroid Build Coastguard Worker * @parent: HTML parent element
5505*7c568831SAndroid Build Coastguard Worker * @elt: HTML element
5506*7c568831SAndroid Build Coastguard Worker *
5507*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Don't use.
5508*7c568831SAndroid Build Coastguard Worker *
5509*7c568831SAndroid Build Coastguard Worker * Returns 1
5510*7c568831SAndroid Build Coastguard Worker */
5511*7c568831SAndroid Build Coastguard Worker int
htmlElementAllowedHere(const htmlElemDesc * parent ATTRIBUTE_UNUSED,const xmlChar * elt ATTRIBUTE_UNUSED)5512*7c568831SAndroid Build Coastguard Worker htmlElementAllowedHere(const htmlElemDesc* parent ATTRIBUTE_UNUSED,
5513*7c568831SAndroid Build Coastguard Worker const xmlChar* elt ATTRIBUTE_UNUSED) {
5514*7c568831SAndroid Build Coastguard Worker return(1);
5515*7c568831SAndroid Build Coastguard Worker }
5516*7c568831SAndroid Build Coastguard Worker
5517*7c568831SAndroid Build Coastguard Worker /**
5518*7c568831SAndroid Build Coastguard Worker * htmlElementStatusHere:
5519*7c568831SAndroid Build Coastguard Worker * @parent: HTML parent element
5520*7c568831SAndroid Build Coastguard Worker * @elt: HTML element
5521*7c568831SAndroid Build Coastguard Worker *
5522*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Don't use.
5523*7c568831SAndroid Build Coastguard Worker *
5524*7c568831SAndroid Build Coastguard Worker * Returns HTML_VALID
5525*7c568831SAndroid Build Coastguard Worker */
5526*7c568831SAndroid Build Coastguard Worker htmlStatus
htmlElementStatusHere(const htmlElemDesc * parent ATTRIBUTE_UNUSED,const htmlElemDesc * elt ATTRIBUTE_UNUSED)5527*7c568831SAndroid Build Coastguard Worker htmlElementStatusHere(const htmlElemDesc* parent ATTRIBUTE_UNUSED,
5528*7c568831SAndroid Build Coastguard Worker const htmlElemDesc* elt ATTRIBUTE_UNUSED) {
5529*7c568831SAndroid Build Coastguard Worker return(HTML_VALID);
5530*7c568831SAndroid Build Coastguard Worker }
5531*7c568831SAndroid Build Coastguard Worker
5532*7c568831SAndroid Build Coastguard Worker /**
5533*7c568831SAndroid Build Coastguard Worker * htmlAttrAllowed:
5534*7c568831SAndroid Build Coastguard Worker * @elt: HTML element
5535*7c568831SAndroid Build Coastguard Worker * @attr: HTML attribute
5536*7c568831SAndroid Build Coastguard Worker * @legacy: whether to allow deprecated attributes
5537*7c568831SAndroid Build Coastguard Worker *
5538*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Don't use.
5539*7c568831SAndroid Build Coastguard Worker *
5540*7c568831SAndroid Build Coastguard Worker * Returns HTML_VALID
5541*7c568831SAndroid Build Coastguard Worker */
5542*7c568831SAndroid Build Coastguard Worker htmlStatus
htmlAttrAllowed(const htmlElemDesc * elt ATTRIBUTE_UNUSED,const xmlChar * attr ATTRIBUTE_UNUSED,int legacy ATTRIBUTE_UNUSED)5543*7c568831SAndroid Build Coastguard Worker htmlAttrAllowed(const htmlElemDesc* elt ATTRIBUTE_UNUSED,
5544*7c568831SAndroid Build Coastguard Worker const xmlChar* attr ATTRIBUTE_UNUSED,
5545*7c568831SAndroid Build Coastguard Worker int legacy ATTRIBUTE_UNUSED) {
5546*7c568831SAndroid Build Coastguard Worker return(HTML_VALID);
5547*7c568831SAndroid Build Coastguard Worker }
5548*7c568831SAndroid Build Coastguard Worker
5549*7c568831SAndroid Build Coastguard Worker /**
5550*7c568831SAndroid Build Coastguard Worker * htmlNodeStatus:
5551*7c568831SAndroid Build Coastguard Worker * @node: an htmlNodePtr in a tree
5552*7c568831SAndroid Build Coastguard Worker * @legacy: whether to allow deprecated elements (YES is faster here
5553*7c568831SAndroid Build Coastguard Worker * for Element nodes)
5554*7c568831SAndroid Build Coastguard Worker *
5555*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Don't use.
5556*7c568831SAndroid Build Coastguard Worker *
5557*7c568831SAndroid Build Coastguard Worker * Returns HTML_VALID
5558*7c568831SAndroid Build Coastguard Worker */
5559*7c568831SAndroid Build Coastguard Worker htmlStatus
htmlNodeStatus(htmlNodePtr node ATTRIBUTE_UNUSED,int legacy ATTRIBUTE_UNUSED)5560*7c568831SAndroid Build Coastguard Worker htmlNodeStatus(htmlNodePtr node ATTRIBUTE_UNUSED,
5561*7c568831SAndroid Build Coastguard Worker int legacy ATTRIBUTE_UNUSED) {
5562*7c568831SAndroid Build Coastguard Worker return(HTML_VALID);
5563*7c568831SAndroid Build Coastguard Worker }
5564*7c568831SAndroid Build Coastguard Worker
5565*7c568831SAndroid Build Coastguard Worker /************************************************************************
5566*7c568831SAndroid Build Coastguard Worker * *
5567*7c568831SAndroid Build Coastguard Worker * New set (2.6.0) of simpler and more flexible APIs *
5568*7c568831SAndroid Build Coastguard Worker * *
5569*7c568831SAndroid Build Coastguard Worker ************************************************************************/
5570*7c568831SAndroid Build Coastguard Worker /**
5571*7c568831SAndroid Build Coastguard Worker * DICT_FREE:
5572*7c568831SAndroid Build Coastguard Worker * @str: a string
5573*7c568831SAndroid Build Coastguard Worker *
5574*7c568831SAndroid Build Coastguard Worker * Free a string if it is not owned by the "dict" dictionary in the
5575*7c568831SAndroid Build Coastguard Worker * current scope
5576*7c568831SAndroid Build Coastguard Worker */
5577*7c568831SAndroid Build Coastguard Worker #define DICT_FREE(str) \
5578*7c568831SAndroid Build Coastguard Worker if ((str) && ((!dict) || \
5579*7c568831SAndroid Build Coastguard Worker (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
5580*7c568831SAndroid Build Coastguard Worker xmlFree((char *)(str));
5581*7c568831SAndroid Build Coastguard Worker
5582*7c568831SAndroid Build Coastguard Worker /**
5583*7c568831SAndroid Build Coastguard Worker * htmlCtxtReset:
5584*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
5585*7c568831SAndroid Build Coastguard Worker *
5586*7c568831SAndroid Build Coastguard Worker * Reset a parser context
5587*7c568831SAndroid Build Coastguard Worker */
5588*7c568831SAndroid Build Coastguard Worker void
htmlCtxtReset(htmlParserCtxtPtr ctxt)5589*7c568831SAndroid Build Coastguard Worker htmlCtxtReset(htmlParserCtxtPtr ctxt)
5590*7c568831SAndroid Build Coastguard Worker {
5591*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input;
5592*7c568831SAndroid Build Coastguard Worker xmlDictPtr dict;
5593*7c568831SAndroid Build Coastguard Worker
5594*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
5595*7c568831SAndroid Build Coastguard Worker return;
5596*7c568831SAndroid Build Coastguard Worker
5597*7c568831SAndroid Build Coastguard Worker dict = ctxt->dict;
5598*7c568831SAndroid Build Coastguard Worker
5599*7c568831SAndroid Build Coastguard Worker while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
5600*7c568831SAndroid Build Coastguard Worker xmlFreeInputStream(input);
5601*7c568831SAndroid Build Coastguard Worker }
5602*7c568831SAndroid Build Coastguard Worker ctxt->inputNr = 0;
5603*7c568831SAndroid Build Coastguard Worker ctxt->input = NULL;
5604*7c568831SAndroid Build Coastguard Worker
5605*7c568831SAndroid Build Coastguard Worker ctxt->spaceNr = 0;
5606*7c568831SAndroid Build Coastguard Worker if (ctxt->spaceTab != NULL) {
5607*7c568831SAndroid Build Coastguard Worker ctxt->spaceTab[0] = -1;
5608*7c568831SAndroid Build Coastguard Worker ctxt->space = &ctxt->spaceTab[0];
5609*7c568831SAndroid Build Coastguard Worker } else {
5610*7c568831SAndroid Build Coastguard Worker ctxt->space = NULL;
5611*7c568831SAndroid Build Coastguard Worker }
5612*7c568831SAndroid Build Coastguard Worker
5613*7c568831SAndroid Build Coastguard Worker
5614*7c568831SAndroid Build Coastguard Worker ctxt->nodeNr = 0;
5615*7c568831SAndroid Build Coastguard Worker ctxt->node = NULL;
5616*7c568831SAndroid Build Coastguard Worker
5617*7c568831SAndroid Build Coastguard Worker ctxt->nameNr = 0;
5618*7c568831SAndroid Build Coastguard Worker ctxt->name = NULL;
5619*7c568831SAndroid Build Coastguard Worker
5620*7c568831SAndroid Build Coastguard Worker ctxt->nsNr = 0;
5621*7c568831SAndroid Build Coastguard Worker
5622*7c568831SAndroid Build Coastguard Worker DICT_FREE(ctxt->version);
5623*7c568831SAndroid Build Coastguard Worker ctxt->version = NULL;
5624*7c568831SAndroid Build Coastguard Worker DICT_FREE(ctxt->encoding);
5625*7c568831SAndroid Build Coastguard Worker ctxt->encoding = NULL;
5626*7c568831SAndroid Build Coastguard Worker DICT_FREE(ctxt->extSubURI);
5627*7c568831SAndroid Build Coastguard Worker ctxt->extSubURI = NULL;
5628*7c568831SAndroid Build Coastguard Worker DICT_FREE(ctxt->extSubSystem);
5629*7c568831SAndroid Build Coastguard Worker ctxt->extSubSystem = NULL;
5630*7c568831SAndroid Build Coastguard Worker
5631*7c568831SAndroid Build Coastguard Worker if (ctxt->directory != NULL) {
5632*7c568831SAndroid Build Coastguard Worker xmlFree(ctxt->directory);
5633*7c568831SAndroid Build Coastguard Worker ctxt->directory = NULL;
5634*7c568831SAndroid Build Coastguard Worker }
5635*7c568831SAndroid Build Coastguard Worker
5636*7c568831SAndroid Build Coastguard Worker if (ctxt->myDoc != NULL)
5637*7c568831SAndroid Build Coastguard Worker xmlFreeDoc(ctxt->myDoc);
5638*7c568831SAndroid Build Coastguard Worker ctxt->myDoc = NULL;
5639*7c568831SAndroid Build Coastguard Worker
5640*7c568831SAndroid Build Coastguard Worker ctxt->standalone = -1;
5641*7c568831SAndroid Build Coastguard Worker ctxt->hasExternalSubset = 0;
5642*7c568831SAndroid Build Coastguard Worker ctxt->hasPErefs = 0;
5643*7c568831SAndroid Build Coastguard Worker ctxt->html = 1;
5644*7c568831SAndroid Build Coastguard Worker ctxt->instate = XML_PARSER_START;
5645*7c568831SAndroid Build Coastguard Worker
5646*7c568831SAndroid Build Coastguard Worker ctxt->wellFormed = 1;
5647*7c568831SAndroid Build Coastguard Worker ctxt->nsWellFormed = 1;
5648*7c568831SAndroid Build Coastguard Worker ctxt->disableSAX = 0;
5649*7c568831SAndroid Build Coastguard Worker ctxt->valid = 1;
5650*7c568831SAndroid Build Coastguard Worker ctxt->vctxt.userData = ctxt;
5651*7c568831SAndroid Build Coastguard Worker ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
5652*7c568831SAndroid Build Coastguard Worker ctxt->vctxt.error = xmlParserValidityError;
5653*7c568831SAndroid Build Coastguard Worker ctxt->vctxt.warning = xmlParserValidityWarning;
5654*7c568831SAndroid Build Coastguard Worker ctxt->record_info = 0;
5655*7c568831SAndroid Build Coastguard Worker ctxt->checkIndex = 0;
5656*7c568831SAndroid Build Coastguard Worker ctxt->endCheckState = 0;
5657*7c568831SAndroid Build Coastguard Worker ctxt->inSubset = 0;
5658*7c568831SAndroid Build Coastguard Worker ctxt->errNo = XML_ERR_OK;
5659*7c568831SAndroid Build Coastguard Worker ctxt->depth = 0;
5660*7c568831SAndroid Build Coastguard Worker ctxt->catalogs = NULL;
5661*7c568831SAndroid Build Coastguard Worker xmlInitNodeInfoSeq(&ctxt->node_seq);
5662*7c568831SAndroid Build Coastguard Worker
5663*7c568831SAndroid Build Coastguard Worker if (ctxt->attsDefault != NULL) {
5664*7c568831SAndroid Build Coastguard Worker xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
5665*7c568831SAndroid Build Coastguard Worker ctxt->attsDefault = NULL;
5666*7c568831SAndroid Build Coastguard Worker }
5667*7c568831SAndroid Build Coastguard Worker if (ctxt->attsSpecial != NULL) {
5668*7c568831SAndroid Build Coastguard Worker xmlHashFree(ctxt->attsSpecial, NULL);
5669*7c568831SAndroid Build Coastguard Worker ctxt->attsSpecial = NULL;
5670*7c568831SAndroid Build Coastguard Worker }
5671*7c568831SAndroid Build Coastguard Worker
5672*7c568831SAndroid Build Coastguard Worker ctxt->nbErrors = 0;
5673*7c568831SAndroid Build Coastguard Worker ctxt->nbWarnings = 0;
5674*7c568831SAndroid Build Coastguard Worker if (ctxt->lastError.code != XML_ERR_OK)
5675*7c568831SAndroid Build Coastguard Worker xmlResetError(&ctxt->lastError);
5676*7c568831SAndroid Build Coastguard Worker }
5677*7c568831SAndroid Build Coastguard Worker
5678*7c568831SAndroid Build Coastguard Worker static int
htmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt,int options,int keepMask)5679*7c568831SAndroid Build Coastguard Worker htmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
5680*7c568831SAndroid Build Coastguard Worker {
5681*7c568831SAndroid Build Coastguard Worker int allMask;
5682*7c568831SAndroid Build Coastguard Worker
5683*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
5684*7c568831SAndroid Build Coastguard Worker return(-1);
5685*7c568831SAndroid Build Coastguard Worker
5686*7c568831SAndroid Build Coastguard Worker allMask = HTML_PARSE_RECOVER |
5687*7c568831SAndroid Build Coastguard Worker HTML_PARSE_HTML5 |
5688*7c568831SAndroid Build Coastguard Worker HTML_PARSE_NODEFDTD |
5689*7c568831SAndroid Build Coastguard Worker HTML_PARSE_NOERROR |
5690*7c568831SAndroid Build Coastguard Worker HTML_PARSE_NOWARNING |
5691*7c568831SAndroid Build Coastguard Worker HTML_PARSE_PEDANTIC |
5692*7c568831SAndroid Build Coastguard Worker HTML_PARSE_NOBLANKS |
5693*7c568831SAndroid Build Coastguard Worker HTML_PARSE_NONET |
5694*7c568831SAndroid Build Coastguard Worker HTML_PARSE_NOIMPLIED |
5695*7c568831SAndroid Build Coastguard Worker HTML_PARSE_COMPACT |
5696*7c568831SAndroid Build Coastguard Worker HTML_PARSE_HUGE |
5697*7c568831SAndroid Build Coastguard Worker HTML_PARSE_IGNORE_ENC |
5698*7c568831SAndroid Build Coastguard Worker HTML_PARSE_BIG_LINES;
5699*7c568831SAndroid Build Coastguard Worker
5700*7c568831SAndroid Build Coastguard Worker ctxt->options = (ctxt->options & keepMask) | (options & allMask);
5701*7c568831SAndroid Build Coastguard Worker
5702*7c568831SAndroid Build Coastguard Worker /*
5703*7c568831SAndroid Build Coastguard Worker * For some options, struct members are historically the source
5704*7c568831SAndroid Build Coastguard Worker * of truth. See xmlCtxtSetOptionsInternal.
5705*7c568831SAndroid Build Coastguard Worker */
5706*7c568831SAndroid Build Coastguard Worker ctxt->keepBlanks = (options & HTML_PARSE_NOBLANKS) ? 0 : 1;
5707*7c568831SAndroid Build Coastguard Worker
5708*7c568831SAndroid Build Coastguard Worker /*
5709*7c568831SAndroid Build Coastguard Worker * Changing SAX callbacks is a bad idea. This should be fixed.
5710*7c568831SAndroid Build Coastguard Worker */
5711*7c568831SAndroid Build Coastguard Worker if (options & HTML_PARSE_NOBLANKS) {
5712*7c568831SAndroid Build Coastguard Worker ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
5713*7c568831SAndroid Build Coastguard Worker }
5714*7c568831SAndroid Build Coastguard Worker if (options & HTML_PARSE_HUGE) {
5715*7c568831SAndroid Build Coastguard Worker if (ctxt->dict != NULL)
5716*7c568831SAndroid Build Coastguard Worker xmlDictSetLimit(ctxt->dict, 0);
5717*7c568831SAndroid Build Coastguard Worker }
5718*7c568831SAndroid Build Coastguard Worker
5719*7c568831SAndroid Build Coastguard Worker /*
5720*7c568831SAndroid Build Coastguard Worker * It would be useful to allow this feature.
5721*7c568831SAndroid Build Coastguard Worker */
5722*7c568831SAndroid Build Coastguard Worker ctxt->dictNames = 0;
5723*7c568831SAndroid Build Coastguard Worker
5724*7c568831SAndroid Build Coastguard Worker ctxt->linenumbers = 1;
5725*7c568831SAndroid Build Coastguard Worker
5726*7c568831SAndroid Build Coastguard Worker return(options & ~allMask);
5727*7c568831SAndroid Build Coastguard Worker }
5728*7c568831SAndroid Build Coastguard Worker
5729*7c568831SAndroid Build Coastguard Worker /**
5730*7c568831SAndroid Build Coastguard Worker * htmlCtxtSetOptions:
5731*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
5732*7c568831SAndroid Build Coastguard Worker * @options: a bitmask of xmlParserOption values
5733*7c568831SAndroid Build Coastguard Worker *
5734*7c568831SAndroid Build Coastguard Worker * Applies the options to the parser context. Unset options are
5735*7c568831SAndroid Build Coastguard Worker * cleared.
5736*7c568831SAndroid Build Coastguard Worker *
5737*7c568831SAndroid Build Coastguard Worker * Available since 2.14.0. With older versions, you can use
5738*7c568831SAndroid Build Coastguard Worker * htmlCtxtUseOptions.
5739*7c568831SAndroid Build Coastguard Worker *
5740*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_RECOVER
5741*7c568831SAndroid Build Coastguard Worker *
5742*7c568831SAndroid Build Coastguard Worker * No effect as of 2.14.0.
5743*7c568831SAndroid Build Coastguard Worker *
5744*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_HTML5
5745*7c568831SAndroid Build Coastguard Worker *
5746*7c568831SAndroid Build Coastguard Worker * Make the tokenizer emit a SAX callback for each token. This results
5747*7c568831SAndroid Build Coastguard Worker * in unbalanced invocations of startElement and endElement.
5748*7c568831SAndroid Build Coastguard Worker *
5749*7c568831SAndroid Build Coastguard Worker * For now, this is only usable with custom SAX callbacks.
5750*7c568831SAndroid Build Coastguard Worker *
5751*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_NODEFDTD
5752*7c568831SAndroid Build Coastguard Worker *
5753*7c568831SAndroid Build Coastguard Worker * Do not default to a doctype if none was found.
5754*7c568831SAndroid Build Coastguard Worker *
5755*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_NOERROR
5756*7c568831SAndroid Build Coastguard Worker *
5757*7c568831SAndroid Build Coastguard Worker * Disable error and warning reports to the error handlers.
5758*7c568831SAndroid Build Coastguard Worker * Errors are still accessible with xmlCtxtGetLastError.
5759*7c568831SAndroid Build Coastguard Worker *
5760*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_NOWARNING
5761*7c568831SAndroid Build Coastguard Worker *
5762*7c568831SAndroid Build Coastguard Worker * Disable warning reports.
5763*7c568831SAndroid Build Coastguard Worker *
5764*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_PEDANTIC
5765*7c568831SAndroid Build Coastguard Worker *
5766*7c568831SAndroid Build Coastguard Worker * No effect.
5767*7c568831SAndroid Build Coastguard Worker *
5768*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_NOBLANKS
5769*7c568831SAndroid Build Coastguard Worker *
5770*7c568831SAndroid Build Coastguard Worker * Remove some text nodes containing only whitespace from the
5771*7c568831SAndroid Build Coastguard Worker * result document. Which nodes are removed depends on a conservative
5772*7c568831SAndroid Build Coastguard Worker * heuristic. The reindenting feature of the serialization code relies
5773*7c568831SAndroid Build Coastguard Worker * on this option to be set when parsing. Use of this option is
5774*7c568831SAndroid Build Coastguard Worker * DISCOURAGED.
5775*7c568831SAndroid Build Coastguard Worker *
5776*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_NONET
5777*7c568831SAndroid Build Coastguard Worker *
5778*7c568831SAndroid Build Coastguard Worker * No effect.
5779*7c568831SAndroid Build Coastguard Worker *
5780*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_NOIMPLIED
5781*7c568831SAndroid Build Coastguard Worker *
5782*7c568831SAndroid Build Coastguard Worker * Do not add implied html, head or body elements.
5783*7c568831SAndroid Build Coastguard Worker *
5784*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_COMPACT
5785*7c568831SAndroid Build Coastguard Worker *
5786*7c568831SAndroid Build Coastguard Worker * Store small strings directly in the node struct to save
5787*7c568831SAndroid Build Coastguard Worker * memory.
5788*7c568831SAndroid Build Coastguard Worker *
5789*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_HUGE
5790*7c568831SAndroid Build Coastguard Worker *
5791*7c568831SAndroid Build Coastguard Worker * Relax some internal limits.
5792*7c568831SAndroid Build Coastguard Worker *
5793*7c568831SAndroid Build Coastguard Worker * Available since 2.14.0. Use XML_PARSE_HUGE works with older
5794*7c568831SAndroid Build Coastguard Worker * versions.
5795*7c568831SAndroid Build Coastguard Worker *
5796*7c568831SAndroid Build Coastguard Worker * Maximum size of text nodes, tags, comments, CDATA sections
5797*7c568831SAndroid Build Coastguard Worker *
5798*7c568831SAndroid Build Coastguard Worker * normal: 10M
5799*7c568831SAndroid Build Coastguard Worker * huge: 1B
5800*7c568831SAndroid Build Coastguard Worker *
5801*7c568831SAndroid Build Coastguard Worker * Maximum size of names, system literals, pubid literals
5802*7c568831SAndroid Build Coastguard Worker *
5803*7c568831SAndroid Build Coastguard Worker * normal: 50K
5804*7c568831SAndroid Build Coastguard Worker * huge: 10M
5805*7c568831SAndroid Build Coastguard Worker *
5806*7c568831SAndroid Build Coastguard Worker * Maximum nesting depth of elements
5807*7c568831SAndroid Build Coastguard Worker *
5808*7c568831SAndroid Build Coastguard Worker * normal: 256
5809*7c568831SAndroid Build Coastguard Worker * huge: 2048
5810*7c568831SAndroid Build Coastguard Worker *
5811*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_IGNORE_ENC
5812*7c568831SAndroid Build Coastguard Worker *
5813*7c568831SAndroid Build Coastguard Worker * Ignore the encoding in the HTML declaration. This option is
5814*7c568831SAndroid Build Coastguard Worker * mostly unneeded these days. The only effect is to enforce
5815*7c568831SAndroid Build Coastguard Worker * UTF-8 decoding of ASCII-like data.
5816*7c568831SAndroid Build Coastguard Worker *
5817*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_BIG_LINES
5818*7c568831SAndroid Build Coastguard Worker *
5819*7c568831SAndroid Build Coastguard Worker * Enable reporting of line numbers larger than 65535.
5820*7c568831SAndroid Build Coastguard Worker *
5821*7c568831SAndroid Build Coastguard Worker * Available since 2.14.0.
5822*7c568831SAndroid Build Coastguard Worker *
5823*7c568831SAndroid Build Coastguard Worker * Returns 0 in case of success, the set of unknown or unimplemented options
5824*7c568831SAndroid Build Coastguard Worker * in case of error.
5825*7c568831SAndroid Build Coastguard Worker */
5826*7c568831SAndroid Build Coastguard Worker int
htmlCtxtSetOptions(xmlParserCtxtPtr ctxt,int options)5827*7c568831SAndroid Build Coastguard Worker htmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
5828*7c568831SAndroid Build Coastguard Worker {
5829*7c568831SAndroid Build Coastguard Worker return(htmlCtxtSetOptionsInternal(ctxt, options, 0));
5830*7c568831SAndroid Build Coastguard Worker }
5831*7c568831SAndroid Build Coastguard Worker
5832*7c568831SAndroid Build Coastguard Worker /**
5833*7c568831SAndroid Build Coastguard Worker * htmlCtxtUseOptions:
5834*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
5835*7c568831SAndroid Build Coastguard Worker * @options: a combination of htmlParserOption(s)
5836*7c568831SAndroid Build Coastguard Worker *
5837*7c568831SAndroid Build Coastguard Worker * DEPRECATED: Use htmlCtxtSetOptions.
5838*7c568831SAndroid Build Coastguard Worker *
5839*7c568831SAndroid Build Coastguard Worker * Applies the options to the parser context. The following options
5840*7c568831SAndroid Build Coastguard Worker * are never cleared and can only be enabled:
5841*7c568831SAndroid Build Coastguard Worker *
5842*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_NODEFDTD
5843*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_NOERROR
5844*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_NOWARNING
5845*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_NOIMPLIED
5846*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_COMPACT
5847*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_HUGE
5848*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_IGNORE_ENC
5849*7c568831SAndroid Build Coastguard Worker * HTML_PARSE_BIG_LINES
5850*7c568831SAndroid Build Coastguard Worker *
5851*7c568831SAndroid Build Coastguard Worker * Returns 0 in case of success, the set of unknown or unimplemented options
5852*7c568831SAndroid Build Coastguard Worker * in case of error.
5853*7c568831SAndroid Build Coastguard Worker */
5854*7c568831SAndroid Build Coastguard Worker int
htmlCtxtUseOptions(htmlParserCtxtPtr ctxt,int options)5855*7c568831SAndroid Build Coastguard Worker htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
5856*7c568831SAndroid Build Coastguard Worker {
5857*7c568831SAndroid Build Coastguard Worker int keepMask;
5858*7c568831SAndroid Build Coastguard Worker
5859*7c568831SAndroid Build Coastguard Worker /*
5860*7c568831SAndroid Build Coastguard Worker * For historic reasons, some options can only be enabled.
5861*7c568831SAndroid Build Coastguard Worker */
5862*7c568831SAndroid Build Coastguard Worker keepMask = HTML_PARSE_NODEFDTD |
5863*7c568831SAndroid Build Coastguard Worker HTML_PARSE_NOERROR |
5864*7c568831SAndroid Build Coastguard Worker HTML_PARSE_NOWARNING |
5865*7c568831SAndroid Build Coastguard Worker HTML_PARSE_NOIMPLIED |
5866*7c568831SAndroid Build Coastguard Worker HTML_PARSE_COMPACT |
5867*7c568831SAndroid Build Coastguard Worker HTML_PARSE_HUGE |
5868*7c568831SAndroid Build Coastguard Worker HTML_PARSE_IGNORE_ENC |
5869*7c568831SAndroid Build Coastguard Worker HTML_PARSE_BIG_LINES;
5870*7c568831SAndroid Build Coastguard Worker
5871*7c568831SAndroid Build Coastguard Worker return(htmlCtxtSetOptionsInternal(ctxt, options, keepMask));
5872*7c568831SAndroid Build Coastguard Worker }
5873*7c568831SAndroid Build Coastguard Worker
5874*7c568831SAndroid Build Coastguard Worker /**
5875*7c568831SAndroid Build Coastguard Worker * htmlCtxtParseDocument:
5876*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
5877*7c568831SAndroid Build Coastguard Worker * @input: parser input
5878*7c568831SAndroid Build Coastguard Worker *
5879*7c568831SAndroid Build Coastguard Worker * Parse an HTML document and return the resulting document tree.
5880*7c568831SAndroid Build Coastguard Worker *
5881*7c568831SAndroid Build Coastguard Worker * Available since 2.13.0.
5882*7c568831SAndroid Build Coastguard Worker *
5883*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree or NULL
5884*7c568831SAndroid Build Coastguard Worker */
5885*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlCtxtParseDocument(htmlParserCtxtPtr ctxt,xmlParserInputPtr input)5886*7c568831SAndroid Build Coastguard Worker htmlCtxtParseDocument(htmlParserCtxtPtr ctxt, xmlParserInputPtr input)
5887*7c568831SAndroid Build Coastguard Worker {
5888*7c568831SAndroid Build Coastguard Worker htmlDocPtr ret;
5889*7c568831SAndroid Build Coastguard Worker
5890*7c568831SAndroid Build Coastguard Worker if ((ctxt == NULL) || (input == NULL))
5891*7c568831SAndroid Build Coastguard Worker return(NULL);
5892*7c568831SAndroid Build Coastguard Worker
5893*7c568831SAndroid Build Coastguard Worker /* assert(ctxt->inputNr == 0); */
5894*7c568831SAndroid Build Coastguard Worker while (ctxt->inputNr > 0)
5895*7c568831SAndroid Build Coastguard Worker xmlFreeInputStream(inputPop(ctxt));
5896*7c568831SAndroid Build Coastguard Worker
5897*7c568831SAndroid Build Coastguard Worker if (inputPush(ctxt, input) < 0) {
5898*7c568831SAndroid Build Coastguard Worker xmlFreeInputStream(input);
5899*7c568831SAndroid Build Coastguard Worker return(NULL);
5900*7c568831SAndroid Build Coastguard Worker }
5901*7c568831SAndroid Build Coastguard Worker
5902*7c568831SAndroid Build Coastguard Worker ctxt->html = 1;
5903*7c568831SAndroid Build Coastguard Worker htmlParseDocument(ctxt);
5904*7c568831SAndroid Build Coastguard Worker
5905*7c568831SAndroid Build Coastguard Worker if (ctxt->errNo != XML_ERR_NO_MEMORY) {
5906*7c568831SAndroid Build Coastguard Worker ret = ctxt->myDoc;
5907*7c568831SAndroid Build Coastguard Worker } else {
5908*7c568831SAndroid Build Coastguard Worker ret = NULL;
5909*7c568831SAndroid Build Coastguard Worker xmlFreeDoc(ctxt->myDoc);
5910*7c568831SAndroid Build Coastguard Worker }
5911*7c568831SAndroid Build Coastguard Worker ctxt->myDoc = NULL;
5912*7c568831SAndroid Build Coastguard Worker
5913*7c568831SAndroid Build Coastguard Worker /* assert(ctxt->inputNr == 1); */
5914*7c568831SAndroid Build Coastguard Worker while (ctxt->inputNr > 0)
5915*7c568831SAndroid Build Coastguard Worker xmlFreeInputStream(inputPop(ctxt));
5916*7c568831SAndroid Build Coastguard Worker
5917*7c568831SAndroid Build Coastguard Worker return(ret);
5918*7c568831SAndroid Build Coastguard Worker }
5919*7c568831SAndroid Build Coastguard Worker
5920*7c568831SAndroid Build Coastguard Worker /**
5921*7c568831SAndroid Build Coastguard Worker * htmlReadDoc:
5922*7c568831SAndroid Build Coastguard Worker * @str: a pointer to a zero terminated string
5923*7c568831SAndroid Build Coastguard Worker * @url: only used for error reporting (optoinal)
5924*7c568831SAndroid Build Coastguard Worker * @encoding: the document encoding (optional)
5925*7c568831SAndroid Build Coastguard Worker * @options: a combination of htmlParserOptions
5926*7c568831SAndroid Build Coastguard Worker *
5927*7c568831SAndroid Build Coastguard Worker * Convenience function to parse an HTML document from a zero-terminated
5928*7c568831SAndroid Build Coastguard Worker * string.
5929*7c568831SAndroid Build Coastguard Worker *
5930*7c568831SAndroid Build Coastguard Worker * See htmlCtxtReadDoc for details.
5931*7c568831SAndroid Build Coastguard Worker *
5932*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree.
5933*7c568831SAndroid Build Coastguard Worker */
5934*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlReadDoc(const xmlChar * str,const char * url,const char * encoding,int options)5935*7c568831SAndroid Build Coastguard Worker htmlReadDoc(const xmlChar *str, const char *url, const char *encoding,
5936*7c568831SAndroid Build Coastguard Worker int options)
5937*7c568831SAndroid Build Coastguard Worker {
5938*7c568831SAndroid Build Coastguard Worker htmlParserCtxtPtr ctxt;
5939*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input;
5940*7c568831SAndroid Build Coastguard Worker htmlDocPtr doc;
5941*7c568831SAndroid Build Coastguard Worker
5942*7c568831SAndroid Build Coastguard Worker ctxt = htmlNewParserCtxt();
5943*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
5944*7c568831SAndroid Build Coastguard Worker return(NULL);
5945*7c568831SAndroid Build Coastguard Worker
5946*7c568831SAndroid Build Coastguard Worker htmlCtxtUseOptions(ctxt, options);
5947*7c568831SAndroid Build Coastguard Worker
5948*7c568831SAndroid Build Coastguard Worker input = xmlCtxtNewInputFromString(ctxt, url, (const char *) str, encoding,
5949*7c568831SAndroid Build Coastguard Worker XML_INPUT_BUF_STATIC);
5950*7c568831SAndroid Build Coastguard Worker
5951*7c568831SAndroid Build Coastguard Worker doc = htmlCtxtParseDocument(ctxt, input);
5952*7c568831SAndroid Build Coastguard Worker
5953*7c568831SAndroid Build Coastguard Worker htmlFreeParserCtxt(ctxt);
5954*7c568831SAndroid Build Coastguard Worker return(doc);
5955*7c568831SAndroid Build Coastguard Worker }
5956*7c568831SAndroid Build Coastguard Worker
5957*7c568831SAndroid Build Coastguard Worker /**
5958*7c568831SAndroid Build Coastguard Worker * htmlReadFile:
5959*7c568831SAndroid Build Coastguard Worker * @filename: a file or URL
5960*7c568831SAndroid Build Coastguard Worker * @encoding: the document encoding (optional)
5961*7c568831SAndroid Build Coastguard Worker * @options: a combination of htmlParserOptions
5962*7c568831SAndroid Build Coastguard Worker *
5963*7c568831SAndroid Build Coastguard Worker * Convenience function to parse an HTML file from the filesystem,
5964*7c568831SAndroid Build Coastguard Worker * the network or a global user-defined resource loader.
5965*7c568831SAndroid Build Coastguard Worker *
5966*7c568831SAndroid Build Coastguard Worker * See htmlCtxtReadFile for details.
5967*7c568831SAndroid Build Coastguard Worker *
5968*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree.
5969*7c568831SAndroid Build Coastguard Worker */
5970*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlReadFile(const char * filename,const char * encoding,int options)5971*7c568831SAndroid Build Coastguard Worker htmlReadFile(const char *filename, const char *encoding, int options)
5972*7c568831SAndroid Build Coastguard Worker {
5973*7c568831SAndroid Build Coastguard Worker htmlParserCtxtPtr ctxt;
5974*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input;
5975*7c568831SAndroid Build Coastguard Worker htmlDocPtr doc;
5976*7c568831SAndroid Build Coastguard Worker
5977*7c568831SAndroid Build Coastguard Worker ctxt = htmlNewParserCtxt();
5978*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
5979*7c568831SAndroid Build Coastguard Worker return(NULL);
5980*7c568831SAndroid Build Coastguard Worker
5981*7c568831SAndroid Build Coastguard Worker htmlCtxtUseOptions(ctxt, options);
5982*7c568831SAndroid Build Coastguard Worker
5983*7c568831SAndroid Build Coastguard Worker input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
5984*7c568831SAndroid Build Coastguard Worker
5985*7c568831SAndroid Build Coastguard Worker doc = htmlCtxtParseDocument(ctxt, input);
5986*7c568831SAndroid Build Coastguard Worker
5987*7c568831SAndroid Build Coastguard Worker htmlFreeParserCtxt(ctxt);
5988*7c568831SAndroid Build Coastguard Worker return(doc);
5989*7c568831SAndroid Build Coastguard Worker }
5990*7c568831SAndroid Build Coastguard Worker
5991*7c568831SAndroid Build Coastguard Worker /**
5992*7c568831SAndroid Build Coastguard Worker * htmlReadMemory:
5993*7c568831SAndroid Build Coastguard Worker * @buffer: a pointer to a char array
5994*7c568831SAndroid Build Coastguard Worker * @size: the size of the array
5995*7c568831SAndroid Build Coastguard Worker * @url: only used for error reporting (optional)
5996*7c568831SAndroid Build Coastguard Worker * @encoding: the document encoding, or NULL
5997*7c568831SAndroid Build Coastguard Worker * @options: a combination of htmlParserOption(s)
5998*7c568831SAndroid Build Coastguard Worker *
5999*7c568831SAndroid Build Coastguard Worker * Convenience function to parse an HTML document from memory.
6000*7c568831SAndroid Build Coastguard Worker * The input buffer must not contain any terminating null bytes.
6001*7c568831SAndroid Build Coastguard Worker *
6002*7c568831SAndroid Build Coastguard Worker * See htmlCtxtReadMemory for details.
6003*7c568831SAndroid Build Coastguard Worker *
6004*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree
6005*7c568831SAndroid Build Coastguard Worker */
6006*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlReadMemory(const char * buffer,int size,const char * url,const char * encoding,int options)6007*7c568831SAndroid Build Coastguard Worker htmlReadMemory(const char *buffer, int size, const char *url,
6008*7c568831SAndroid Build Coastguard Worker const char *encoding, int options)
6009*7c568831SAndroid Build Coastguard Worker {
6010*7c568831SAndroid Build Coastguard Worker htmlParserCtxtPtr ctxt;
6011*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input;
6012*7c568831SAndroid Build Coastguard Worker htmlDocPtr doc;
6013*7c568831SAndroid Build Coastguard Worker
6014*7c568831SAndroid Build Coastguard Worker if (size < 0)
6015*7c568831SAndroid Build Coastguard Worker return(NULL);
6016*7c568831SAndroid Build Coastguard Worker
6017*7c568831SAndroid Build Coastguard Worker ctxt = htmlNewParserCtxt();
6018*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
6019*7c568831SAndroid Build Coastguard Worker return(NULL);
6020*7c568831SAndroid Build Coastguard Worker
6021*7c568831SAndroid Build Coastguard Worker htmlCtxtUseOptions(ctxt, options);
6022*7c568831SAndroid Build Coastguard Worker
6023*7c568831SAndroid Build Coastguard Worker input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
6024*7c568831SAndroid Build Coastguard Worker XML_INPUT_BUF_STATIC);
6025*7c568831SAndroid Build Coastguard Worker
6026*7c568831SAndroid Build Coastguard Worker doc = htmlCtxtParseDocument(ctxt, input);
6027*7c568831SAndroid Build Coastguard Worker
6028*7c568831SAndroid Build Coastguard Worker htmlFreeParserCtxt(ctxt);
6029*7c568831SAndroid Build Coastguard Worker return(doc);
6030*7c568831SAndroid Build Coastguard Worker }
6031*7c568831SAndroid Build Coastguard Worker
6032*7c568831SAndroid Build Coastguard Worker /**
6033*7c568831SAndroid Build Coastguard Worker * htmlReadFd:
6034*7c568831SAndroid Build Coastguard Worker * @fd: an open file descriptor
6035*7c568831SAndroid Build Coastguard Worker * @url: only used for error reporting (optional)
6036*7c568831SAndroid Build Coastguard Worker * @encoding: the document encoding, or NULL
6037*7c568831SAndroid Build Coastguard Worker * @options: a combination of htmlParserOptions
6038*7c568831SAndroid Build Coastguard Worker *
6039*7c568831SAndroid Build Coastguard Worker * Convenience function to parse an HTML document from a
6040*7c568831SAndroid Build Coastguard Worker * file descriptor.
6041*7c568831SAndroid Build Coastguard Worker *
6042*7c568831SAndroid Build Coastguard Worker * NOTE that the file descriptor will not be closed when the
6043*7c568831SAndroid Build Coastguard Worker * context is freed or reset.
6044*7c568831SAndroid Build Coastguard Worker *
6045*7c568831SAndroid Build Coastguard Worker * See htmlCtxtReadFd for details.
6046*7c568831SAndroid Build Coastguard Worker *
6047*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree
6048*7c568831SAndroid Build Coastguard Worker */
6049*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlReadFd(int fd,const char * url,const char * encoding,int options)6050*7c568831SAndroid Build Coastguard Worker htmlReadFd(int fd, const char *url, const char *encoding, int options)
6051*7c568831SAndroid Build Coastguard Worker {
6052*7c568831SAndroid Build Coastguard Worker htmlParserCtxtPtr ctxt;
6053*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input;
6054*7c568831SAndroid Build Coastguard Worker htmlDocPtr doc;
6055*7c568831SAndroid Build Coastguard Worker
6056*7c568831SAndroid Build Coastguard Worker ctxt = htmlNewParserCtxt();
6057*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
6058*7c568831SAndroid Build Coastguard Worker return(NULL);
6059*7c568831SAndroid Build Coastguard Worker
6060*7c568831SAndroid Build Coastguard Worker htmlCtxtUseOptions(ctxt, options);
6061*7c568831SAndroid Build Coastguard Worker
6062*7c568831SAndroid Build Coastguard Worker input = xmlCtxtNewInputFromFd(ctxt, url, fd, encoding, 0);
6063*7c568831SAndroid Build Coastguard Worker
6064*7c568831SAndroid Build Coastguard Worker doc = htmlCtxtParseDocument(ctxt, input);
6065*7c568831SAndroid Build Coastguard Worker
6066*7c568831SAndroid Build Coastguard Worker htmlFreeParserCtxt(ctxt);
6067*7c568831SAndroid Build Coastguard Worker return(doc);
6068*7c568831SAndroid Build Coastguard Worker }
6069*7c568831SAndroid Build Coastguard Worker
6070*7c568831SAndroid Build Coastguard Worker /**
6071*7c568831SAndroid Build Coastguard Worker * htmlReadIO:
6072*7c568831SAndroid Build Coastguard Worker * @ioread: an I/O read function
6073*7c568831SAndroid Build Coastguard Worker * @ioclose: an I/O close function (optional)
6074*7c568831SAndroid Build Coastguard Worker * @ioctx: an I/O handler
6075*7c568831SAndroid Build Coastguard Worker * @url: only used for error reporting (optional)
6076*7c568831SAndroid Build Coastguard Worker * @encoding: the document encoding (optional)
6077*7c568831SAndroid Build Coastguard Worker * @options: a combination of htmlParserOption(s)
6078*7c568831SAndroid Build Coastguard Worker *
6079*7c568831SAndroid Build Coastguard Worker * Convenience function to parse an HTML document from I/O functions
6080*7c568831SAndroid Build Coastguard Worker * and context.
6081*7c568831SAndroid Build Coastguard Worker *
6082*7c568831SAndroid Build Coastguard Worker * See htmlCtxtReadIO for details.
6083*7c568831SAndroid Build Coastguard Worker *
6084*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree
6085*7c568831SAndroid Build Coastguard Worker */
6086*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * url,const char * encoding,int options)6087*7c568831SAndroid Build Coastguard Worker htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
6088*7c568831SAndroid Build Coastguard Worker void *ioctx, const char *url, const char *encoding, int options)
6089*7c568831SAndroid Build Coastguard Worker {
6090*7c568831SAndroid Build Coastguard Worker htmlParserCtxtPtr ctxt;
6091*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input;
6092*7c568831SAndroid Build Coastguard Worker htmlDocPtr doc;
6093*7c568831SAndroid Build Coastguard Worker
6094*7c568831SAndroid Build Coastguard Worker ctxt = htmlNewParserCtxt();
6095*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
6096*7c568831SAndroid Build Coastguard Worker return (NULL);
6097*7c568831SAndroid Build Coastguard Worker
6098*7c568831SAndroid Build Coastguard Worker htmlCtxtUseOptions(ctxt, options);
6099*7c568831SAndroid Build Coastguard Worker
6100*7c568831SAndroid Build Coastguard Worker input = xmlCtxtNewInputFromIO(ctxt, url, ioread, ioclose, ioctx,
6101*7c568831SAndroid Build Coastguard Worker encoding, 0);
6102*7c568831SAndroid Build Coastguard Worker
6103*7c568831SAndroid Build Coastguard Worker doc = htmlCtxtParseDocument(ctxt, input);
6104*7c568831SAndroid Build Coastguard Worker
6105*7c568831SAndroid Build Coastguard Worker htmlFreeParserCtxt(ctxt);
6106*7c568831SAndroid Build Coastguard Worker return(doc);
6107*7c568831SAndroid Build Coastguard Worker }
6108*7c568831SAndroid Build Coastguard Worker
6109*7c568831SAndroid Build Coastguard Worker /**
6110*7c568831SAndroid Build Coastguard Worker * htmlCtxtReadDoc:
6111*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
6112*7c568831SAndroid Build Coastguard Worker * @str: a pointer to a zero terminated string
6113*7c568831SAndroid Build Coastguard Worker * @URL: only used for error reporting (optional)
6114*7c568831SAndroid Build Coastguard Worker * @encoding: the document encoding (optional)
6115*7c568831SAndroid Build Coastguard Worker * @options: a combination of htmlParserOptions
6116*7c568831SAndroid Build Coastguard Worker *
6117*7c568831SAndroid Build Coastguard Worker * Parse an HTML in-memory document and build a tree.
6118*7c568831SAndroid Build Coastguard Worker *
6119*7c568831SAndroid Build Coastguard Worker * See htmlCtxtUseOptions for details.
6120*7c568831SAndroid Build Coastguard Worker *
6121*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree
6122*7c568831SAndroid Build Coastguard Worker */
6123*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlCtxtReadDoc(htmlParserCtxtPtr ctxt,const xmlChar * str,const char * URL,const char * encoding,int options)6124*7c568831SAndroid Build Coastguard Worker htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar *str,
6125*7c568831SAndroid Build Coastguard Worker const char *URL, const char *encoding, int options)
6126*7c568831SAndroid Build Coastguard Worker {
6127*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input;
6128*7c568831SAndroid Build Coastguard Worker
6129*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
6130*7c568831SAndroid Build Coastguard Worker return (NULL);
6131*7c568831SAndroid Build Coastguard Worker
6132*7c568831SAndroid Build Coastguard Worker htmlCtxtReset(ctxt);
6133*7c568831SAndroid Build Coastguard Worker htmlCtxtUseOptions(ctxt, options);
6134*7c568831SAndroid Build Coastguard Worker
6135*7c568831SAndroid Build Coastguard Worker input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str,
6136*7c568831SAndroid Build Coastguard Worker encoding, 0);
6137*7c568831SAndroid Build Coastguard Worker
6138*7c568831SAndroid Build Coastguard Worker return(htmlCtxtParseDocument(ctxt, input));
6139*7c568831SAndroid Build Coastguard Worker }
6140*7c568831SAndroid Build Coastguard Worker
6141*7c568831SAndroid Build Coastguard Worker /**
6142*7c568831SAndroid Build Coastguard Worker * htmlCtxtReadFile:
6143*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
6144*7c568831SAndroid Build Coastguard Worker * @filename: a file or URL
6145*7c568831SAndroid Build Coastguard Worker * @encoding: the document encoding (optional)
6146*7c568831SAndroid Build Coastguard Worker * @options: a combination of htmlParserOptions
6147*7c568831SAndroid Build Coastguard Worker *
6148*7c568831SAndroid Build Coastguard Worker * Parse an HTML file from the filesystem, the network or a
6149*7c568831SAndroid Build Coastguard Worker * user-defined resource loader.
6150*7c568831SAndroid Build Coastguard Worker *
6151*7c568831SAndroid Build Coastguard Worker * See htmlCtxtUseOptions for details.
6152*7c568831SAndroid Build Coastguard Worker *
6153*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree
6154*7c568831SAndroid Build Coastguard Worker */
6155*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlCtxtReadFile(htmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)6156*7c568831SAndroid Build Coastguard Worker htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
6157*7c568831SAndroid Build Coastguard Worker const char *encoding, int options)
6158*7c568831SAndroid Build Coastguard Worker {
6159*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input;
6160*7c568831SAndroid Build Coastguard Worker
6161*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
6162*7c568831SAndroid Build Coastguard Worker return (NULL);
6163*7c568831SAndroid Build Coastguard Worker
6164*7c568831SAndroid Build Coastguard Worker htmlCtxtReset(ctxt);
6165*7c568831SAndroid Build Coastguard Worker htmlCtxtUseOptions(ctxt, options);
6166*7c568831SAndroid Build Coastguard Worker
6167*7c568831SAndroid Build Coastguard Worker input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
6168*7c568831SAndroid Build Coastguard Worker
6169*7c568831SAndroid Build Coastguard Worker return(htmlCtxtParseDocument(ctxt, input));
6170*7c568831SAndroid Build Coastguard Worker }
6171*7c568831SAndroid Build Coastguard Worker
6172*7c568831SAndroid Build Coastguard Worker /**
6173*7c568831SAndroid Build Coastguard Worker * htmlCtxtReadMemory:
6174*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
6175*7c568831SAndroid Build Coastguard Worker * @buffer: a pointer to a char array
6176*7c568831SAndroid Build Coastguard Worker * @size: the size of the array
6177*7c568831SAndroid Build Coastguard Worker * @URL: only used for error reporting (optional)
6178*7c568831SAndroid Build Coastguard Worker * @encoding: the document encoding (optinal)
6179*7c568831SAndroid Build Coastguard Worker * @options: a combination of htmlParserOptions
6180*7c568831SAndroid Build Coastguard Worker *
6181*7c568831SAndroid Build Coastguard Worker * Parse an HTML in-memory document and build a tree. The input buffer must
6182*7c568831SAndroid Build Coastguard Worker * not contain any terminating null bytes.
6183*7c568831SAndroid Build Coastguard Worker *
6184*7c568831SAndroid Build Coastguard Worker * See htmlCtxtUseOptions for details.
6185*7c568831SAndroid Build Coastguard Worker *
6186*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree
6187*7c568831SAndroid Build Coastguard Worker */
6188*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlCtxtReadMemory(htmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)6189*7c568831SAndroid Build Coastguard Worker htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
6190*7c568831SAndroid Build Coastguard Worker const char *URL, const char *encoding, int options)
6191*7c568831SAndroid Build Coastguard Worker {
6192*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input;
6193*7c568831SAndroid Build Coastguard Worker
6194*7c568831SAndroid Build Coastguard Worker if ((ctxt == NULL) || (size < 0))
6195*7c568831SAndroid Build Coastguard Worker return (NULL);
6196*7c568831SAndroid Build Coastguard Worker
6197*7c568831SAndroid Build Coastguard Worker htmlCtxtReset(ctxt);
6198*7c568831SAndroid Build Coastguard Worker htmlCtxtUseOptions(ctxt, options);
6199*7c568831SAndroid Build Coastguard Worker
6200*7c568831SAndroid Build Coastguard Worker input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
6201*7c568831SAndroid Build Coastguard Worker XML_INPUT_BUF_STATIC);
6202*7c568831SAndroid Build Coastguard Worker
6203*7c568831SAndroid Build Coastguard Worker return(htmlCtxtParseDocument(ctxt, input));
6204*7c568831SAndroid Build Coastguard Worker }
6205*7c568831SAndroid Build Coastguard Worker
6206*7c568831SAndroid Build Coastguard Worker /**
6207*7c568831SAndroid Build Coastguard Worker * htmlCtxtReadFd:
6208*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
6209*7c568831SAndroid Build Coastguard Worker * @fd: an open file descriptor
6210*7c568831SAndroid Build Coastguard Worker * @URL: only used for error reporting (optional)
6211*7c568831SAndroid Build Coastguard Worker * @encoding: the document encoding (optinal)
6212*7c568831SAndroid Build Coastguard Worker * @options: a combination of htmlParserOptions
6213*7c568831SAndroid Build Coastguard Worker *
6214*7c568831SAndroid Build Coastguard Worker * Parse an HTML from a file descriptor and build a tree.
6215*7c568831SAndroid Build Coastguard Worker *
6216*7c568831SAndroid Build Coastguard Worker * See htmlCtxtUseOptions for details.
6217*7c568831SAndroid Build Coastguard Worker *
6218*7c568831SAndroid Build Coastguard Worker * NOTE that the file descriptor will not be closed when the
6219*7c568831SAndroid Build Coastguard Worker * context is freed or reset.
6220*7c568831SAndroid Build Coastguard Worker *
6221*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree
6222*7c568831SAndroid Build Coastguard Worker */
6223*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlCtxtReadFd(htmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)6224*7c568831SAndroid Build Coastguard Worker htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
6225*7c568831SAndroid Build Coastguard Worker const char *URL, const char *encoding, int options)
6226*7c568831SAndroid Build Coastguard Worker {
6227*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input;
6228*7c568831SAndroid Build Coastguard Worker
6229*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
6230*7c568831SAndroid Build Coastguard Worker return(NULL);
6231*7c568831SAndroid Build Coastguard Worker
6232*7c568831SAndroid Build Coastguard Worker htmlCtxtReset(ctxt);
6233*7c568831SAndroid Build Coastguard Worker htmlCtxtUseOptions(ctxt, options);
6234*7c568831SAndroid Build Coastguard Worker
6235*7c568831SAndroid Build Coastguard Worker input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
6236*7c568831SAndroid Build Coastguard Worker
6237*7c568831SAndroid Build Coastguard Worker return(htmlCtxtParseDocument(ctxt, input));
6238*7c568831SAndroid Build Coastguard Worker }
6239*7c568831SAndroid Build Coastguard Worker
6240*7c568831SAndroid Build Coastguard Worker /**
6241*7c568831SAndroid Build Coastguard Worker * htmlCtxtReadIO:
6242*7c568831SAndroid Build Coastguard Worker * @ctxt: an HTML parser context
6243*7c568831SAndroid Build Coastguard Worker * @ioread: an I/O read function
6244*7c568831SAndroid Build Coastguard Worker * @ioclose: an I/O close function
6245*7c568831SAndroid Build Coastguard Worker * @ioctx: an I/O handler
6246*7c568831SAndroid Build Coastguard Worker * @URL: the base URL to use for the document
6247*7c568831SAndroid Build Coastguard Worker * @encoding: the document encoding, or NULL
6248*7c568831SAndroid Build Coastguard Worker * @options: a combination of htmlParserOption(s)
6249*7c568831SAndroid Build Coastguard Worker *
6250*7c568831SAndroid Build Coastguard Worker * Parse an HTML document from I/O functions and source and build a tree.
6251*7c568831SAndroid Build Coastguard Worker *
6252*7c568831SAndroid Build Coastguard Worker * See htmlCtxtUseOptions for details.
6253*7c568831SAndroid Build Coastguard Worker *
6254*7c568831SAndroid Build Coastguard Worker * Returns the resulting document tree
6255*7c568831SAndroid Build Coastguard Worker */
6256*7c568831SAndroid Build Coastguard Worker htmlDocPtr
htmlCtxtReadIO(htmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)6257*7c568831SAndroid Build Coastguard Worker htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
6258*7c568831SAndroid Build Coastguard Worker xmlInputCloseCallback ioclose, void *ioctx,
6259*7c568831SAndroid Build Coastguard Worker const char *URL,
6260*7c568831SAndroid Build Coastguard Worker const char *encoding, int options)
6261*7c568831SAndroid Build Coastguard Worker {
6262*7c568831SAndroid Build Coastguard Worker xmlParserInputPtr input;
6263*7c568831SAndroid Build Coastguard Worker
6264*7c568831SAndroid Build Coastguard Worker if (ctxt == NULL)
6265*7c568831SAndroid Build Coastguard Worker return (NULL);
6266*7c568831SAndroid Build Coastguard Worker
6267*7c568831SAndroid Build Coastguard Worker htmlCtxtReset(ctxt);
6268*7c568831SAndroid Build Coastguard Worker htmlCtxtUseOptions(ctxt, options);
6269*7c568831SAndroid Build Coastguard Worker
6270*7c568831SAndroid Build Coastguard Worker input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
6271*7c568831SAndroid Build Coastguard Worker encoding, 0);
6272*7c568831SAndroid Build Coastguard Worker
6273*7c568831SAndroid Build Coastguard Worker return(htmlCtxtParseDocument(ctxt, input));
6274*7c568831SAndroid Build Coastguard Worker }
6275*7c568831SAndroid Build Coastguard Worker
6276*7c568831SAndroid Build Coastguard Worker #endif /* LIBXML_HTML_ENABLED */
6277