1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * [email protected]
31 */
32
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37
38 #define IN_LIBXML
39 #include "libxml.h"
40
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/parser.h>
55 #include <libxml/xmlmemory.h>
56 #include <libxml/tree.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #include <libxml/SAX2.h>
65 #ifdef LIBXML_CATALOG_ENABLED
66 #include <libxml/catalog.h>
67 #endif
68
69 #include "private/buf.h"
70 #include "private/dict.h"
71 #include "private/entities.h"
72 #include "private/error.h"
73 #include "private/html.h"
74 #include "private/io.h"
75 #include "private/parser.h"
76
77 #define NS_INDEX_EMPTY INT_MAX
78 #define NS_INDEX_XML (INT_MAX - 1)
79 #define URI_HASH_EMPTY 0xD943A04E
80 #define URI_HASH_XML 0xF0451F02
81
82 struct _xmlStartTag {
83 const xmlChar *prefix;
84 const xmlChar *URI;
85 int line;
86 int nsNr;
87 };
88
89 typedef struct {
90 void *saxData;
91 unsigned prefixHashValue;
92 unsigned uriHashValue;
93 unsigned elementId;
94 int oldIndex;
95 } xmlParserNsExtra;
96
97 typedef struct {
98 unsigned hashValue;
99 int index;
100 } xmlParserNsBucket;
101
102 struct _xmlParserNsData {
103 xmlParserNsExtra *extra;
104
105 unsigned hashSize;
106 unsigned hashElems;
107 xmlParserNsBucket *hash;
108
109 unsigned elementId;
110 int defaultNsIndex;
111 int minNsIndex;
112 };
113
114 struct _xmlAttrHashBucket {
115 int index;
116 };
117
118 static int
119 xmlParseElementStart(xmlParserCtxtPtr ctxt);
120
121 static void
122 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
123
124 static xmlEntityPtr
125 xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
126
127 static const xmlChar *
128 xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
129
130 /************************************************************************
131 * *
132 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
133 * *
134 ************************************************************************/
135
136 #define XML_PARSER_BIG_ENTITY 1000
137 #define XML_PARSER_LOT_ENTITY 5000
138
139 /*
140 * Constants for protection against abusive entity expansion
141 * ("billion laughs").
142 */
143
144 /*
145 * A certain amount of entity expansion which is always allowed.
146 */
147 #define XML_PARSER_ALLOWED_EXPANSION 1000000
148
149 /*
150 * Fixed cost for each entity reference. This crudely models processing time
151 * as well to protect, for example, against exponential expansion of empty
152 * or very short entities.
153 */
154 #define XML_ENT_FIXED_COST 20
155
156 /**
157 * xmlParserMaxDepth:
158 *
159 * arbitrary depth limit for the XML documents that we allow to
160 * process. This is not a limitation of the parser but a safety
161 * boundary feature. It can be disabled with the XML_PARSE_HUGE
162 * parser option.
163 */
164 const unsigned int xmlParserMaxDepth = 256;
165
166
167
168 #define XML_PARSER_BIG_BUFFER_SIZE 300
169 #define XML_PARSER_BUFFER_SIZE 100
170 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
171
172 /**
173 * XML_PARSER_CHUNK_SIZE
174 *
175 * When calling GROW that's the minimal amount of data
176 * the parser expected to have received. It is not a hard
177 * limit but an optimization when reading strings like Names
178 * It is not strictly needed as long as inputs available characters
179 * are followed by 0, which should be provided by the I/O level
180 */
181 #define XML_PARSER_CHUNK_SIZE 100
182
183 /**
184 * xmlParserVersion:
185 *
186 * Constant string describing the internal version of the library
187 */
188 const char *const
189 xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
190
191 /*
192 * List of XML prefixed PI allowed by W3C specs
193 */
194
195 static const char* const xmlW3CPIs[] = {
196 "xml-stylesheet",
197 "xml-model",
198 NULL
199 };
200
201
202 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
203 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
204 const xmlChar **str);
205
206 static void
207 xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
208
209 static int
210 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
211
212 /************************************************************************
213 * *
214 * Some factorized error routines *
215 * *
216 ************************************************************************/
217
218 static void
xmlErrMemory(xmlParserCtxtPtr ctxt)219 xmlErrMemory(xmlParserCtxtPtr ctxt) {
220 xmlCtxtErrMemory(ctxt);
221 }
222
223 /**
224 * xmlErrAttributeDup:
225 * @ctxt: an XML parser context
226 * @prefix: the attribute prefix
227 * @localname: the attribute localname
228 *
229 * Handle a redefinition of attribute error
230 */
231 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)232 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
233 const xmlChar * localname)
234 {
235 if (prefix == NULL)
236 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237 XML_ERR_FATAL, localname, NULL, NULL, 0,
238 "Attribute %s redefined\n", localname);
239 else
240 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241 XML_ERR_FATAL, prefix, localname, NULL, 0,
242 "Attribute %s:%s redefined\n", prefix, localname);
243 }
244
245 /**
246 * xmlFatalErrMsg:
247 * @ctxt: an XML parser context
248 * @error: the error number
249 * @msg: the error message
250 *
251 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
252 */
253 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)254 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
255 const char *msg)
256 {
257 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
258 NULL, NULL, NULL, 0, "%s", msg);
259 }
260
261 /**
262 * xmlWarningMsg:
263 * @ctxt: an XML parser context
264 * @error: the error number
265 * @msg: the error message
266 * @str1: extra data
267 * @str2: extra data
268 *
269 * Handle a warning.
270 */
271 void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)272 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
273 const char *msg, const xmlChar *str1, const xmlChar *str2)
274 {
275 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
276 str1, str2, NULL, 0, msg, str1, str2);
277 }
278
279 /**
280 * xmlValidityError:
281 * @ctxt: an XML parser context
282 * @error: the error number
283 * @msg: the error message
284 * @str1: extra data
285 *
286 * Handle a validity error.
287 */
288 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)289 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
290 const char *msg, const xmlChar *str1, const xmlChar *str2)
291 {
292 ctxt->valid = 0;
293
294 xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
295 str1, str2, NULL, 0, msg, str1, str2);
296 }
297
298 /**
299 * xmlFatalErrMsgInt:
300 * @ctxt: an XML parser context
301 * @error: the error number
302 * @msg: the error message
303 * @val: an integer value
304 *
305 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
306 */
307 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)308 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
309 const char *msg, int val)
310 {
311 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
312 NULL, NULL, NULL, val, msg, val);
313 }
314
315 /**
316 * xmlFatalErrMsgStrIntStr:
317 * @ctxt: an XML parser context
318 * @error: the error number
319 * @msg: the error message
320 * @str1: an string info
321 * @val: an integer value
322 * @str2: an string info
323 *
324 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
325 */
326 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)327 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
328 const char *msg, const xmlChar *str1, int val,
329 const xmlChar *str2)
330 {
331 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
332 str1, str2, NULL, val, msg, str1, val, str2);
333 }
334
335 /**
336 * xmlFatalErrMsgStr:
337 * @ctxt: an XML parser context
338 * @error: the error number
339 * @msg: the error message
340 * @val: a string value
341 *
342 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
343 */
344 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)345 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
346 const char *msg, const xmlChar * val)
347 {
348 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
349 val, NULL, NULL, 0, msg, val);
350 }
351
352 /**
353 * xmlErrMsgStr:
354 * @ctxt: an XML parser context
355 * @error: the error number
356 * @msg: the error message
357 * @val: a string value
358 *
359 * Handle a non fatal parser error
360 */
361 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)362 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363 const char *msg, const xmlChar * val)
364 {
365 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366 val, NULL, NULL, 0, msg, val);
367 }
368
369 /**
370 * xmlNsErr:
371 * @ctxt: an XML parser context
372 * @error: the error number
373 * @msg: the message
374 * @info1: extra information string
375 * @info2: extra information string
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)380 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg,
382 const xmlChar * info1, const xmlChar * info2,
383 const xmlChar * info3)
384 {
385 ctxt->nsWellFormed = 0;
386
387 xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388 info1, info2, info3, 0, msg, info1, info2, info3);
389 }
390
391 /**
392 * xmlNsWarn
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the message
396 * @info1: extra information string
397 * @info2: extra information string
398 *
399 * Handle a namespace warning error
400 */
401 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)402 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg,
404 const xmlChar * info1, const xmlChar * info2,
405 const xmlChar * info3)
406 {
407 xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408 info1, info2, info3, 0, msg, info1, info2, info3);
409 }
410
411 static void
xmlSaturatedAdd(unsigned long * dst,unsigned long val)412 xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413 if (val > ULONG_MAX - *dst)
414 *dst = ULONG_MAX;
415 else
416 *dst += val;
417 }
418
419 static void
xmlSaturatedAddSizeT(unsigned long * dst,unsigned long val)420 xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421 if (val > ULONG_MAX - *dst)
422 *dst = ULONG_MAX;
423 else
424 *dst += val;
425 }
426
427 /**
428 * xmlParserEntityCheck:
429 * @ctxt: parser context
430 * @extra: sum of unexpanded entity sizes
431 *
432 * Check for non-linear entity expansion behaviour.
433 *
434 * In some cases like xmlExpandEntityInAttValue, this function is called
435 * for each, possibly nested entity and its unexpanded content length.
436 *
437 * In other cases like xmlParseReference, it's only called for each
438 * top-level entity with its unexpanded content length plus the sum of
439 * the unexpanded content lengths (plus fixed cost) of all nested
440 * entities.
441 *
442 * Summing the unexpanded lengths also adds the length of the reference.
443 * This is by design. Taking the length of the entity name into account
444 * discourages attacks that try to waste CPU time with abusively long
445 * entity names. See test/recurse/lol6.xml for example. Each call also
446 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
447 * short entities.
448 *
449 * Returns 1 on error, 0 on success.
450 */
451 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long extra)452 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
453 {
454 unsigned long consumed;
455 unsigned long *expandedSize;
456 xmlParserInputPtr input = ctxt->input;
457 xmlEntityPtr entity = input->entity;
458
459 if ((entity) && (entity->flags & XML_ENT_CHECKED))
460 return(0);
461
462 /*
463 * Compute total consumed bytes so far, including input streams of
464 * external entities.
465 */
466 consumed = input->consumed;
467 xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
468 xmlSaturatedAdd(&consumed, ctxt->sizeentities);
469
470 if (entity)
471 expandedSize = &entity->expandedSize;
472 else
473 expandedSize = &ctxt->sizeentcopy;
474
475 /*
476 * Add extra cost and some fixed cost.
477 */
478 xmlSaturatedAdd(expandedSize, extra);
479 xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
480
481 /*
482 * It's important to always use saturation arithmetic when tracking
483 * entity sizes to make the size checks reliable. If "sizeentcopy"
484 * overflows, we have to abort.
485 */
486 if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
487 ((*expandedSize >= ULONG_MAX) ||
488 (*expandedSize / ctxt->maxAmpl > consumed))) {
489 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
490 "Maximum entity amplification factor exceeded, see "
491 "xmlCtxtSetMaxAmplification.\n");
492 xmlHaltParser(ctxt);
493 return(1);
494 }
495
496 return(0);
497 }
498
499 /************************************************************************
500 * *
501 * Library wide options *
502 * *
503 ************************************************************************/
504
505 /**
506 * xmlHasFeature:
507 * @feature: the feature to be examined
508 *
509 * Examines if the library has been compiled with a given feature.
510 *
511 * Returns a non-zero value if the feature exist, otherwise zero.
512 * Returns zero (0) if the feature does not exist or an unknown
513 * unknown feature is requested, non-zero otherwise.
514 */
515 int
xmlHasFeature(xmlFeature feature)516 xmlHasFeature(xmlFeature feature)
517 {
518 switch (feature) {
519 case XML_WITH_THREAD:
520 #ifdef LIBXML_THREAD_ENABLED
521 return(1);
522 #else
523 return(0);
524 #endif
525 case XML_WITH_TREE:
526 #ifdef LIBXML_TREE_ENABLED
527 return(1);
528 #else
529 return(0);
530 #endif
531 case XML_WITH_OUTPUT:
532 #ifdef LIBXML_OUTPUT_ENABLED
533 return(1);
534 #else
535 return(0);
536 #endif
537 case XML_WITH_PUSH:
538 #ifdef LIBXML_PUSH_ENABLED
539 return(1);
540 #else
541 return(0);
542 #endif
543 case XML_WITH_READER:
544 #ifdef LIBXML_READER_ENABLED
545 return(1);
546 #else
547 return(0);
548 #endif
549 case XML_WITH_PATTERN:
550 #ifdef LIBXML_PATTERN_ENABLED
551 return(1);
552 #else
553 return(0);
554 #endif
555 case XML_WITH_WRITER:
556 #ifdef LIBXML_WRITER_ENABLED
557 return(1);
558 #else
559 return(0);
560 #endif
561 case XML_WITH_SAX1:
562 #ifdef LIBXML_SAX1_ENABLED
563 return(1);
564 #else
565 return(0);
566 #endif
567 case XML_WITH_FTP:
568 #ifdef LIBXML_FTP_ENABLED
569 return(1);
570 #else
571 return(0);
572 #endif
573 case XML_WITH_HTTP:
574 #ifdef LIBXML_HTTP_ENABLED
575 return(1);
576 #else
577 return(0);
578 #endif
579 case XML_WITH_VALID:
580 #ifdef LIBXML_VALID_ENABLED
581 return(1);
582 #else
583 return(0);
584 #endif
585 case XML_WITH_HTML:
586 #ifdef LIBXML_HTML_ENABLED
587 return(1);
588 #else
589 return(0);
590 #endif
591 case XML_WITH_LEGACY:
592 #ifdef LIBXML_LEGACY_ENABLED
593 return(1);
594 #else
595 return(0);
596 #endif
597 case XML_WITH_C14N:
598 #ifdef LIBXML_C14N_ENABLED
599 return(1);
600 #else
601 return(0);
602 #endif
603 case XML_WITH_CATALOG:
604 #ifdef LIBXML_CATALOG_ENABLED
605 return(1);
606 #else
607 return(0);
608 #endif
609 case XML_WITH_XPATH:
610 #ifdef LIBXML_XPATH_ENABLED
611 return(1);
612 #else
613 return(0);
614 #endif
615 case XML_WITH_XPTR:
616 #ifdef LIBXML_XPTR_ENABLED
617 return(1);
618 #else
619 return(0);
620 #endif
621 case XML_WITH_XINCLUDE:
622 #ifdef LIBXML_XINCLUDE_ENABLED
623 return(1);
624 #else
625 return(0);
626 #endif
627 case XML_WITH_ICONV:
628 #ifdef LIBXML_ICONV_ENABLED
629 return(1);
630 #else
631 return(0);
632 #endif
633 case XML_WITH_ISO8859X:
634 #ifdef LIBXML_ISO8859X_ENABLED
635 return(1);
636 #else
637 return(0);
638 #endif
639 case XML_WITH_UNICODE:
640 #ifdef LIBXML_UNICODE_ENABLED
641 return(1);
642 #else
643 return(0);
644 #endif
645 case XML_WITH_REGEXP:
646 #ifdef LIBXML_REGEXP_ENABLED
647 return(1);
648 #else
649 return(0);
650 #endif
651 case XML_WITH_AUTOMATA:
652 #ifdef LIBXML_AUTOMATA_ENABLED
653 return(1);
654 #else
655 return(0);
656 #endif
657 case XML_WITH_EXPR:
658 #ifdef LIBXML_EXPR_ENABLED
659 return(1);
660 #else
661 return(0);
662 #endif
663 case XML_WITH_SCHEMAS:
664 #ifdef LIBXML_SCHEMAS_ENABLED
665 return(1);
666 #else
667 return(0);
668 #endif
669 case XML_WITH_SCHEMATRON:
670 #ifdef LIBXML_SCHEMATRON_ENABLED
671 return(1);
672 #else
673 return(0);
674 #endif
675 case XML_WITH_MODULES:
676 #ifdef LIBXML_MODULES_ENABLED
677 return(1);
678 #else
679 return(0);
680 #endif
681 case XML_WITH_DEBUG:
682 #ifdef LIBXML_DEBUG_ENABLED
683 return(1);
684 #else
685 return(0);
686 #endif
687 case XML_WITH_DEBUG_MEM:
688 #ifdef DEBUG_MEMORY_LOCATION
689 return(1);
690 #else
691 return(0);
692 #endif
693 case XML_WITH_ZLIB:
694 #ifdef LIBXML_ZLIB_ENABLED
695 return(1);
696 #else
697 return(0);
698 #endif
699 case XML_WITH_LZMA:
700 #ifdef LIBXML_LZMA_ENABLED
701 return(1);
702 #else
703 return(0);
704 #endif
705 case XML_WITH_ICU:
706 #ifdef LIBXML_ICU_ENABLED
707 return(1);
708 #else
709 return(0);
710 #endif
711 default:
712 break;
713 }
714 return(0);
715 }
716
717 /************************************************************************
718 * *
719 * Simple string buffer *
720 * *
721 ************************************************************************/
722
723 typedef struct {
724 xmlChar *mem;
725 unsigned size;
726 unsigned cap; /* size < cap */
727 unsigned max; /* size <= max */
728 xmlParserErrors code;
729 } xmlSBuf;
730
731 static void
xmlSBufInit(xmlSBuf * buf,unsigned max)732 xmlSBufInit(xmlSBuf *buf, unsigned max) {
733 buf->mem = NULL;
734 buf->size = 0;
735 buf->cap = 0;
736 buf->max = max;
737 buf->code = XML_ERR_OK;
738 }
739
740 static int
xmlSBufGrow(xmlSBuf * buf,unsigned len)741 xmlSBufGrow(xmlSBuf *buf, unsigned len) {
742 xmlChar *mem;
743 unsigned cap;
744
745 if (len >= UINT_MAX / 2 - buf->size) {
746 buf->code = XML_ERR_RESOURCE_LIMIT;
747 return(-1);
748 }
749
750 cap = (buf->size + len) * 2;
751 if (cap < 240)
752 cap = 240;
753
754 mem = xmlRealloc(buf->mem, cap);
755 if (mem == NULL) {
756 buf->code = XML_ERR_NO_MEMORY;
757 return(-1);
758 }
759
760 buf->mem = mem;
761 buf->cap = cap;
762
763 return(0);
764 }
765
766 static void
xmlSBufAddString(xmlSBuf * buf,const xmlChar * str,unsigned len)767 xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
768 if (buf->max - buf->size < len) {
769 buf->code = XML_ERR_RESOURCE_LIMIT;
770 return;
771 }
772
773 if (buf->cap - buf->size <= len) {
774 if (xmlSBufGrow(buf, len) < 0)
775 return;
776 }
777
778 if (len > 0)
779 memcpy(buf->mem + buf->size, str, len);
780 buf->size += len;
781 }
782
783 static void
xmlSBufAddCString(xmlSBuf * buf,const char * str,unsigned len)784 xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
785 xmlSBufAddString(buf, (const xmlChar *) str, len);
786 }
787
788 static void
xmlSBufAddChar(xmlSBuf * buf,int c)789 xmlSBufAddChar(xmlSBuf *buf, int c) {
790 xmlChar *end;
791
792 if (buf->max - buf->size < 4) {
793 buf->code = XML_ERR_RESOURCE_LIMIT;
794 return;
795 }
796
797 if (buf->cap - buf->size <= 4) {
798 if (xmlSBufGrow(buf, 4) < 0)
799 return;
800 }
801
802 end = buf->mem + buf->size;
803
804 if (c < 0x80) {
805 *end = (xmlChar) c;
806 buf->size += 1;
807 } else {
808 buf->size += xmlCopyCharMultiByte(end, c);
809 }
810 }
811
812 static void
xmlSBufAddReplChar(xmlSBuf * buf)813 xmlSBufAddReplChar(xmlSBuf *buf) {
814 xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
815 }
816
817 static void
xmlSBufReportError(xmlSBuf * buf,xmlParserCtxtPtr ctxt,const char * errMsg)818 xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
819 if (buf->code == XML_ERR_NO_MEMORY)
820 xmlCtxtErrMemory(ctxt);
821 else
822 xmlFatalErr(ctxt, buf->code, errMsg);
823 }
824
825 static xmlChar *
xmlSBufFinish(xmlSBuf * buf,int * sizeOut,xmlParserCtxtPtr ctxt,const char * errMsg)826 xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
827 const char *errMsg) {
828 if (buf->mem == NULL) {
829 buf->mem = xmlMalloc(1);
830 if (buf->mem == NULL) {
831 buf->code = XML_ERR_NO_MEMORY;
832 } else {
833 buf->mem[0] = 0;
834 }
835 } else {
836 buf->mem[buf->size] = 0;
837 }
838
839 if (buf->code == XML_ERR_OK) {
840 if (sizeOut != NULL)
841 *sizeOut = buf->size;
842 return(buf->mem);
843 }
844
845 xmlSBufReportError(buf, ctxt, errMsg);
846
847 xmlFree(buf->mem);
848
849 if (sizeOut != NULL)
850 *sizeOut = 0;
851 return(NULL);
852 }
853
854 static void
xmlSBufCleanup(xmlSBuf * buf,xmlParserCtxtPtr ctxt,const char * errMsg)855 xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
856 if (buf->code != XML_ERR_OK)
857 xmlSBufReportError(buf, ctxt, errMsg);
858
859 xmlFree(buf->mem);
860 }
861
862 static int
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt,const xmlChar * str,const char * errMsg)863 xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
864 const char *errMsg) {
865 int c = str[0];
866 int c1 = str[1];
867
868 if ((c1 & 0xC0) != 0x80)
869 goto encoding_error;
870
871 if (c < 0xE0) {
872 /* 2-byte sequence */
873 if (c < 0xC2)
874 goto encoding_error;
875
876 return(2);
877 } else {
878 int c2 = str[2];
879
880 if ((c2 & 0xC0) != 0x80)
881 goto encoding_error;
882
883 if (c < 0xF0) {
884 /* 3-byte sequence */
885 if (c == 0xE0) {
886 /* overlong */
887 if (c1 < 0xA0)
888 goto encoding_error;
889 } else if (c == 0xED) {
890 /* surrogate */
891 if (c1 >= 0xA0)
892 goto encoding_error;
893 } else if (c == 0xEF) {
894 /* U+FFFE and U+FFFF are invalid Chars */
895 if ((c1 == 0xBF) && (c2 >= 0xBE))
896 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
897 }
898
899 return(3);
900 } else {
901 /* 4-byte sequence */
902 if ((str[3] & 0xC0) != 0x80)
903 goto encoding_error;
904 if (c == 0xF0) {
905 /* overlong */
906 if (c1 < 0x90)
907 goto encoding_error;
908 } else if (c >= 0xF4) {
909 /* greater than 0x10FFFF */
910 if ((c > 0xF4) || (c1 >= 0x90))
911 goto encoding_error;
912 }
913
914 return(4);
915 }
916 }
917
918 encoding_error:
919 /* Only report the first error */
920 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
921 xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
922 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
923 }
924
925 return(0);
926 }
927
928 /************************************************************************
929 * *
930 * SAX2 defaulted attributes handling *
931 * *
932 ************************************************************************/
933
934 /**
935 * xmlCtxtInitializeLate:
936 * @ctxt: an XML parser context
937 *
938 * Final initialization of the parser context before starting to parse.
939 *
940 * This accounts for users modifying struct members of parser context
941 * directly.
942 */
943 static void
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt)944 xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
945 xmlSAXHandlerPtr sax;
946
947 /* Avoid unused variable warning if features are disabled. */
948 (void) sax;
949
950 /*
951 * Changing the SAX struct directly is still widespread practice
952 * in internal and external code.
953 */
954 if (ctxt == NULL) return;
955 sax = ctxt->sax;
956 #ifdef LIBXML_SAX1_ENABLED
957 /*
958 * Only enable SAX2 if there SAX2 element handlers, except when there
959 * are no element handlers at all.
960 */
961 if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
962 (sax) &&
963 (sax->initialized == XML_SAX2_MAGIC) &&
964 ((sax->startElementNs != NULL) ||
965 (sax->endElementNs != NULL) ||
966 ((sax->startElement == NULL) && (sax->endElement == NULL))))
967 ctxt->sax2 = 1;
968 #else
969 ctxt->sax2 = 1;
970 #endif /* LIBXML_SAX1_ENABLED */
971
972 /*
973 * Some users replace the dictionary directly in the context struct.
974 * We really need an API function to do that cleanly.
975 */
976 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
977 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
978 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
979 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
980 (ctxt->str_xml_ns == NULL)) {
981 xmlErrMemory(ctxt);
982 }
983 }
984
985 typedef struct {
986 xmlHashedString prefix;
987 xmlHashedString name;
988 xmlHashedString value;
989 const xmlChar *valueEnd;
990 int external;
991 int expandedSize;
992 } xmlDefAttr;
993
994 typedef struct _xmlDefAttrs xmlDefAttrs;
995 typedef xmlDefAttrs *xmlDefAttrsPtr;
996 struct _xmlDefAttrs {
997 int nbAttrs; /* number of defaulted attributes on that element */
998 int maxAttrs; /* the size of the array */
999 #if __STDC_VERSION__ >= 199901L
1000 /* Using a C99 flexible array member avoids UBSan errors. */
1001 xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
1002 #else
1003 xmlDefAttr attrs[1];
1004 #endif
1005 };
1006
1007 /**
1008 * xmlAttrNormalizeSpace:
1009 * @src: the source string
1010 * @dst: the target string
1011 *
1012 * Normalize the space in non CDATA attribute values:
1013 * If the attribute type is not CDATA, then the XML processor MUST further
1014 * process the normalized attribute value by discarding any leading and
1015 * trailing space (#x20) characters, and by replacing sequences of space
1016 * (#x20) characters by a single space (#x20) character.
1017 * Note that the size of dst need to be at least src, and if one doesn't need
1018 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1019 * passing src as dst is just fine.
1020 *
1021 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1022 * is needed.
1023 */
1024 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1025 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1026 {
1027 if ((src == NULL) || (dst == NULL))
1028 return(NULL);
1029
1030 while (*src == 0x20) src++;
1031 while (*src != 0) {
1032 if (*src == 0x20) {
1033 while (*src == 0x20) src++;
1034 if (*src != 0)
1035 *dst++ = 0x20;
1036 } else {
1037 *dst++ = *src++;
1038 }
1039 }
1040 *dst = 0;
1041 if (dst == src)
1042 return(NULL);
1043 return(dst);
1044 }
1045
1046 /**
1047 * xmlAddDefAttrs:
1048 * @ctxt: an XML parser context
1049 * @fullname: the element fullname
1050 * @fullattr: the attribute fullname
1051 * @value: the attribute value
1052 *
1053 * Add a defaulted attribute for an element
1054 */
1055 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1056 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1057 const xmlChar *fullname,
1058 const xmlChar *fullattr,
1059 const xmlChar *value) {
1060 xmlDefAttrsPtr defaults;
1061 xmlDefAttr *attr;
1062 int len, expandedSize;
1063 xmlHashedString name;
1064 xmlHashedString prefix;
1065 xmlHashedString hvalue;
1066 const xmlChar *localname;
1067
1068 /*
1069 * Allows to detect attribute redefinitions
1070 */
1071 if (ctxt->attsSpecial != NULL) {
1072 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1073 return;
1074 }
1075
1076 if (ctxt->attsDefault == NULL) {
1077 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1078 if (ctxt->attsDefault == NULL)
1079 goto mem_error;
1080 }
1081
1082 /*
1083 * split the element name into prefix:localname , the string found
1084 * are within the DTD and then not associated to namespace names.
1085 */
1086 localname = xmlSplitQName3(fullname, &len);
1087 if (localname == NULL) {
1088 name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1089 prefix.name = NULL;
1090 } else {
1091 name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1092 prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1093 if (prefix.name == NULL)
1094 goto mem_error;
1095 }
1096 if (name.name == NULL)
1097 goto mem_error;
1098
1099 /*
1100 * make sure there is some storage
1101 */
1102 defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1103 if ((defaults == NULL) ||
1104 (defaults->nbAttrs >= defaults->maxAttrs)) {
1105 xmlDefAttrsPtr temp;
1106 int newSize;
1107
1108 newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1109 temp = xmlRealloc(defaults,
1110 sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1111 if (temp == NULL)
1112 goto mem_error;
1113 if (defaults == NULL)
1114 temp->nbAttrs = 0;
1115 temp->maxAttrs = newSize;
1116 defaults = temp;
1117 if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1118 defaults, NULL) < 0) {
1119 xmlFree(defaults);
1120 goto mem_error;
1121 }
1122 }
1123
1124 /*
1125 * Split the attribute name into prefix:localname , the string found
1126 * are within the DTD and hen not associated to namespace names.
1127 */
1128 localname = xmlSplitQName3(fullattr, &len);
1129 if (localname == NULL) {
1130 name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1131 prefix.name = NULL;
1132 } else {
1133 name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1134 prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1135 if (prefix.name == NULL)
1136 goto mem_error;
1137 }
1138 if (name.name == NULL)
1139 goto mem_error;
1140
1141 /* intern the string and precompute the end */
1142 len = strlen((const char *) value);
1143 hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1144 if (hvalue.name == NULL)
1145 goto mem_error;
1146
1147 expandedSize = strlen((const char *) name.name);
1148 if (prefix.name != NULL)
1149 expandedSize += strlen((const char *) prefix.name);
1150 expandedSize += len;
1151
1152 attr = &defaults->attrs[defaults->nbAttrs++];
1153 attr->name = name;
1154 attr->prefix = prefix;
1155 attr->value = hvalue;
1156 attr->valueEnd = hvalue.name + len;
1157 attr->external = PARSER_EXTERNAL(ctxt);
1158 attr->expandedSize = expandedSize;
1159
1160 return;
1161
1162 mem_error:
1163 xmlErrMemory(ctxt);
1164 return;
1165 }
1166
1167 /**
1168 * xmlAddSpecialAttr:
1169 * @ctxt: an XML parser context
1170 * @fullname: the element fullname
1171 * @fullattr: the attribute fullname
1172 * @type: the attribute type
1173 *
1174 * Register this attribute type
1175 */
1176 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1177 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1178 const xmlChar *fullname,
1179 const xmlChar *fullattr,
1180 int type)
1181 {
1182 if (ctxt->attsSpecial == NULL) {
1183 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1184 if (ctxt->attsSpecial == NULL)
1185 goto mem_error;
1186 }
1187
1188 if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1189 (void *) (ptrdiff_t) type) < 0)
1190 goto mem_error;
1191 return;
1192
1193 mem_error:
1194 xmlErrMemory(ctxt);
1195 return;
1196 }
1197
1198 /**
1199 * xmlCleanSpecialAttrCallback:
1200 *
1201 * Removes CDATA attributes from the special attribute table
1202 */
1203 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1204 xmlCleanSpecialAttrCallback(void *payload, void *data,
1205 const xmlChar *fullname, const xmlChar *fullattr,
1206 const xmlChar *unused ATTRIBUTE_UNUSED) {
1207 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1208
1209 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1210 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1211 }
1212 }
1213
1214 /**
1215 * xmlCleanSpecialAttr:
1216 * @ctxt: an XML parser context
1217 *
1218 * Trim the list of attributes defined to remove all those of type
1219 * CDATA as they are not special. This call should be done when finishing
1220 * to parse the DTD and before starting to parse the document root.
1221 */
1222 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1223 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1224 {
1225 if (ctxt->attsSpecial == NULL)
1226 return;
1227
1228 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1229
1230 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1231 xmlHashFree(ctxt->attsSpecial, NULL);
1232 ctxt->attsSpecial = NULL;
1233 }
1234 return;
1235 }
1236
1237 /**
1238 * xmlCheckLanguageID:
1239 * @lang: pointer to the string value
1240 *
1241 * DEPRECATED: Internal function, do not use.
1242 *
1243 * Checks that the value conforms to the LanguageID production:
1244 *
1245 * NOTE: this is somewhat deprecated, those productions were removed from
1246 * the XML Second edition.
1247 *
1248 * [33] LanguageID ::= Langcode ('-' Subcode)*
1249 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1250 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1251 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1252 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1253 * [38] Subcode ::= ([a-z] | [A-Z])+
1254 *
1255 * The current REC reference the successors of RFC 1766, currently 5646
1256 *
1257 * http://www.rfc-editor.org/rfc/rfc5646.txt
1258 * langtag = language
1259 * ["-" script]
1260 * ["-" region]
1261 * *("-" variant)
1262 * *("-" extension)
1263 * ["-" privateuse]
1264 * language = 2*3ALPHA ; shortest ISO 639 code
1265 * ["-" extlang] ; sometimes followed by
1266 * ; extended language subtags
1267 * / 4ALPHA ; or reserved for future use
1268 * / 5*8ALPHA ; or registered language subtag
1269 *
1270 * extlang = 3ALPHA ; selected ISO 639 codes
1271 * *2("-" 3ALPHA) ; permanently reserved
1272 *
1273 * script = 4ALPHA ; ISO 15924 code
1274 *
1275 * region = 2ALPHA ; ISO 3166-1 code
1276 * / 3DIGIT ; UN M.49 code
1277 *
1278 * variant = 5*8alphanum ; registered variants
1279 * / (DIGIT 3alphanum)
1280 *
1281 * extension = singleton 1*("-" (2*8alphanum))
1282 *
1283 * ; Single alphanumerics
1284 * ; "x" reserved for private use
1285 * singleton = DIGIT ; 0 - 9
1286 * / %x41-57 ; A - W
1287 * / %x59-5A ; Y - Z
1288 * / %x61-77 ; a - w
1289 * / %x79-7A ; y - z
1290 *
1291 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1292 * The parser below doesn't try to cope with extension or privateuse
1293 * that could be added but that's not interoperable anyway
1294 *
1295 * Returns 1 if correct 0 otherwise
1296 **/
1297 int
xmlCheckLanguageID(const xmlChar * lang)1298 xmlCheckLanguageID(const xmlChar * lang)
1299 {
1300 const xmlChar *cur = lang, *nxt;
1301
1302 if (cur == NULL)
1303 return (0);
1304 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1305 ((cur[0] == 'I') && (cur[1] == '-')) ||
1306 ((cur[0] == 'x') && (cur[1] == '-')) ||
1307 ((cur[0] == 'X') && (cur[1] == '-'))) {
1308 /*
1309 * Still allow IANA code and user code which were coming
1310 * from the previous version of the XML-1.0 specification
1311 * it's deprecated but we should not fail
1312 */
1313 cur += 2;
1314 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1315 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1316 cur++;
1317 return(cur[0] == 0);
1318 }
1319 nxt = cur;
1320 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1321 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1322 nxt++;
1323 if (nxt - cur >= 4) {
1324 /*
1325 * Reserved
1326 */
1327 if ((nxt - cur > 8) || (nxt[0] != 0))
1328 return(0);
1329 return(1);
1330 }
1331 if (nxt - cur < 2)
1332 return(0);
1333 /* we got an ISO 639 code */
1334 if (nxt[0] == 0)
1335 return(1);
1336 if (nxt[0] != '-')
1337 return(0);
1338
1339 nxt++;
1340 cur = nxt;
1341 /* now we can have extlang or script or region or variant */
1342 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1343 goto region_m49;
1344
1345 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1346 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1347 nxt++;
1348 if (nxt - cur == 4)
1349 goto script;
1350 if (nxt - cur == 2)
1351 goto region;
1352 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1353 goto variant;
1354 if (nxt - cur != 3)
1355 return(0);
1356 /* we parsed an extlang */
1357 if (nxt[0] == 0)
1358 return(1);
1359 if (nxt[0] != '-')
1360 return(0);
1361
1362 nxt++;
1363 cur = nxt;
1364 /* now we can have script or region or variant */
1365 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1366 goto region_m49;
1367
1368 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1369 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1370 nxt++;
1371 if (nxt - cur == 2)
1372 goto region;
1373 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1374 goto variant;
1375 if (nxt - cur != 4)
1376 return(0);
1377 /* we parsed a script */
1378 script:
1379 if (nxt[0] == 0)
1380 return(1);
1381 if (nxt[0] != '-')
1382 return(0);
1383
1384 nxt++;
1385 cur = nxt;
1386 /* now we can have region or variant */
1387 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1388 goto region_m49;
1389
1390 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1391 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1392 nxt++;
1393
1394 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1395 goto variant;
1396 if (nxt - cur != 2)
1397 return(0);
1398 /* we parsed a region */
1399 region:
1400 if (nxt[0] == 0)
1401 return(1);
1402 if (nxt[0] != '-')
1403 return(0);
1404
1405 nxt++;
1406 cur = nxt;
1407 /* now we can just have a variant */
1408 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1409 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1410 nxt++;
1411
1412 if ((nxt - cur < 5) || (nxt - cur > 8))
1413 return(0);
1414
1415 /* we parsed a variant */
1416 variant:
1417 if (nxt[0] == 0)
1418 return(1);
1419 if (nxt[0] != '-')
1420 return(0);
1421 /* extensions and private use subtags not checked */
1422 return (1);
1423
1424 region_m49:
1425 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1426 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1427 nxt += 3;
1428 goto region;
1429 }
1430 return(0);
1431 }
1432
1433 /************************************************************************
1434 * *
1435 * Parser stacks related functions and macros *
1436 * *
1437 ************************************************************************/
1438
1439 static xmlChar *
1440 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1441
1442 /**
1443 * xmlParserNsCreate:
1444 *
1445 * Create a new namespace database.
1446 *
1447 * Returns the new obejct.
1448 */
1449 xmlParserNsData *
xmlParserNsCreate(void)1450 xmlParserNsCreate(void) {
1451 xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1452
1453 if (nsdb == NULL)
1454 return(NULL);
1455 memset(nsdb, 0, sizeof(*nsdb));
1456 nsdb->defaultNsIndex = INT_MAX;
1457
1458 return(nsdb);
1459 }
1460
1461 /**
1462 * xmlParserNsFree:
1463 * @nsdb: namespace database
1464 *
1465 * Free a namespace database.
1466 */
1467 void
xmlParserNsFree(xmlParserNsData * nsdb)1468 xmlParserNsFree(xmlParserNsData *nsdb) {
1469 if (nsdb == NULL)
1470 return;
1471
1472 xmlFree(nsdb->extra);
1473 xmlFree(nsdb->hash);
1474 xmlFree(nsdb);
1475 }
1476
1477 /**
1478 * xmlParserNsReset:
1479 * @nsdb: namespace database
1480 *
1481 * Reset a namespace database.
1482 */
1483 static void
xmlParserNsReset(xmlParserNsData * nsdb)1484 xmlParserNsReset(xmlParserNsData *nsdb) {
1485 if (nsdb == NULL)
1486 return;
1487
1488 nsdb->hashElems = 0;
1489 nsdb->elementId = 0;
1490 nsdb->defaultNsIndex = INT_MAX;
1491
1492 if (nsdb->hash)
1493 memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1494 }
1495
1496 /**
1497 * xmlParserStartElement:
1498 * @nsdb: namespace database
1499 *
1500 * Signal that a new element has started.
1501 *
1502 * Returns 0 on success, -1 if the element counter overflowed.
1503 */
1504 static int
xmlParserNsStartElement(xmlParserNsData * nsdb)1505 xmlParserNsStartElement(xmlParserNsData *nsdb) {
1506 if (nsdb->elementId == UINT_MAX)
1507 return(-1);
1508 nsdb->elementId++;
1509
1510 return(0);
1511 }
1512
1513 /**
1514 * xmlParserNsLookup:
1515 * @ctxt: parser context
1516 * @prefix: namespace prefix
1517 * @bucketPtr: optional bucket (return value)
1518 *
1519 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1520 * be set to the matching bucket, or the first empty bucket if no match
1521 * was found.
1522 *
1523 * Returns the namespace index on success, INT_MAX if no namespace was
1524 * found.
1525 */
1526 static int
xmlParserNsLookup(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,xmlParserNsBucket ** bucketPtr)1527 xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1528 xmlParserNsBucket **bucketPtr) {
1529 xmlParserNsBucket *bucket;
1530 unsigned index, hashValue;
1531
1532 if (prefix->name == NULL)
1533 return(ctxt->nsdb->defaultNsIndex);
1534
1535 if (ctxt->nsdb->hashSize == 0)
1536 return(INT_MAX);
1537
1538 hashValue = prefix->hashValue;
1539 index = hashValue & (ctxt->nsdb->hashSize - 1);
1540 bucket = &ctxt->nsdb->hash[index];
1541
1542 while (bucket->hashValue) {
1543 if ((bucket->hashValue == hashValue) &&
1544 (bucket->index != INT_MAX)) {
1545 if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1546 if (bucketPtr != NULL)
1547 *bucketPtr = bucket;
1548 return(bucket->index);
1549 }
1550 }
1551
1552 index++;
1553 bucket++;
1554 if (index == ctxt->nsdb->hashSize) {
1555 index = 0;
1556 bucket = ctxt->nsdb->hash;
1557 }
1558 }
1559
1560 if (bucketPtr != NULL)
1561 *bucketPtr = bucket;
1562 return(INT_MAX);
1563 }
1564
1565 /**
1566 * xmlParserNsLookupUri:
1567 * @ctxt: parser context
1568 * @prefix: namespace prefix
1569 *
1570 * Lookup namespace URI with given prefix.
1571 *
1572 * Returns the namespace URI on success, NULL if no namespace was found.
1573 */
1574 static const xmlChar *
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix)1575 xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1576 const xmlChar *ret;
1577 int nsIndex;
1578
1579 if (prefix->name == ctxt->str_xml)
1580 return(ctxt->str_xml_ns);
1581
1582 /*
1583 * minNsIndex is used when building an entity tree. We must
1584 * ignore namespaces declared outside the entity.
1585 */
1586 nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1587 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1588 return(NULL);
1589
1590 ret = ctxt->nsTab[nsIndex * 2 + 1];
1591 if (ret[0] == 0)
1592 ret = NULL;
1593 return(ret);
1594 }
1595
1596 /**
1597 * xmlParserNsLookupSax:
1598 * @ctxt: parser context
1599 * @prefix: namespace prefix
1600 *
1601 * Lookup extra data for the given prefix. This returns data stored
1602 * with xmlParserNsUdpateSax.
1603 *
1604 * Returns the data on success, NULL if no namespace was found.
1605 */
1606 void *
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix)1607 xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1608 xmlHashedString hprefix;
1609 int nsIndex;
1610
1611 if (prefix == ctxt->str_xml)
1612 return(NULL);
1613
1614 hprefix.name = prefix;
1615 if (prefix != NULL)
1616 hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1617 else
1618 hprefix.hashValue = 0;
1619 nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1620 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1621 return(NULL);
1622
1623 return(ctxt->nsdb->extra[nsIndex].saxData);
1624 }
1625
1626 /**
1627 * xmlParserNsUpdateSax:
1628 * @ctxt: parser context
1629 * @prefix: namespace prefix
1630 * @saxData: extra data for SAX handler
1631 *
1632 * Sets or updates extra data for the given prefix. This value will be
1633 * returned by xmlParserNsLookupSax as long as the namespace with the
1634 * given prefix is in scope.
1635 *
1636 * Returns the data on success, NULL if no namespace was found.
1637 */
1638 int
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix,void * saxData)1639 xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1640 void *saxData) {
1641 xmlHashedString hprefix;
1642 int nsIndex;
1643
1644 if (prefix == ctxt->str_xml)
1645 return(-1);
1646
1647 hprefix.name = prefix;
1648 if (prefix != NULL)
1649 hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1650 else
1651 hprefix.hashValue = 0;
1652 nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1653 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1654 return(-1);
1655
1656 ctxt->nsdb->extra[nsIndex].saxData = saxData;
1657 return(0);
1658 }
1659
1660 /**
1661 * xmlParserNsGrow:
1662 * @ctxt: parser context
1663 *
1664 * Grows the namespace tables.
1665 *
1666 * Returns 0 on success, -1 if a memory allocation failed.
1667 */
1668 static int
xmlParserNsGrow(xmlParserCtxtPtr ctxt)1669 xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1670 const xmlChar **table;
1671 xmlParserNsExtra *extra;
1672 int newSize;
1673
1674 if (ctxt->nsMax > INT_MAX / 2)
1675 goto error;
1676 newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1677
1678 table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1679 if (table == NULL)
1680 goto error;
1681 ctxt->nsTab = table;
1682
1683 extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1684 if (extra == NULL)
1685 goto error;
1686 ctxt->nsdb->extra = extra;
1687
1688 ctxt->nsMax = newSize;
1689 return(0);
1690
1691 error:
1692 xmlErrMemory(ctxt);
1693 return(-1);
1694 }
1695
1696 /**
1697 * xmlParserNsPush:
1698 * @ctxt: parser context
1699 * @prefix: prefix with hash value
1700 * @uri: uri with hash value
1701 * @saxData: extra data for SAX handler
1702 * @defAttr: whether the namespace comes from a default attribute
1703 *
1704 * Push a new namespace on the table.
1705 *
1706 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1707 * -1 if a memory allocation failed.
1708 */
1709 static int
xmlParserNsPush(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,const xmlHashedString * uri,void * saxData,int defAttr)1710 xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1711 const xmlHashedString *uri, void *saxData, int defAttr) {
1712 xmlParserNsBucket *bucket = NULL;
1713 xmlParserNsExtra *extra;
1714 const xmlChar **ns;
1715 unsigned hashValue, nsIndex, oldIndex;
1716
1717 if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1718 return(0);
1719
1720 if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1721 xmlErrMemory(ctxt);
1722 return(-1);
1723 }
1724
1725 /*
1726 * Default namespace and 'xml' namespace
1727 */
1728 if ((prefix == NULL) || (prefix->name == NULL)) {
1729 oldIndex = ctxt->nsdb->defaultNsIndex;
1730
1731 if (oldIndex != INT_MAX) {
1732 extra = &ctxt->nsdb->extra[oldIndex];
1733
1734 if (extra->elementId == ctxt->nsdb->elementId) {
1735 if (defAttr == 0)
1736 xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1737 return(0);
1738 }
1739
1740 if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1741 (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1742 return(0);
1743 }
1744
1745 ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1746 goto populate_entry;
1747 }
1748
1749 /*
1750 * Hash table lookup
1751 */
1752 oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1753 if (oldIndex != INT_MAX) {
1754 extra = &ctxt->nsdb->extra[oldIndex];
1755
1756 /*
1757 * Check for duplicate definitions on the same element.
1758 */
1759 if (extra->elementId == ctxt->nsdb->elementId) {
1760 if (defAttr == 0)
1761 xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1762 return(0);
1763 }
1764
1765 if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1766 (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1767 return(0);
1768
1769 bucket->index = ctxt->nsNr;
1770 goto populate_entry;
1771 }
1772
1773 /*
1774 * Insert new bucket
1775 */
1776
1777 hashValue = prefix->hashValue;
1778
1779 /*
1780 * Grow hash table, 50% fill factor
1781 */
1782 if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1783 xmlParserNsBucket *newHash;
1784 unsigned newSize, i, index;
1785
1786 if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1787 xmlErrMemory(ctxt);
1788 return(-1);
1789 }
1790 newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1791 newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1792 if (newHash == NULL) {
1793 xmlErrMemory(ctxt);
1794 return(-1);
1795 }
1796 memset(newHash, 0, newSize * sizeof(newHash[0]));
1797
1798 for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1799 unsigned hv = ctxt->nsdb->hash[i].hashValue;
1800 unsigned newIndex;
1801
1802 if (hv == 0)
1803 continue;
1804 newIndex = hv & (newSize - 1);
1805
1806 while (newHash[newIndex].hashValue != 0) {
1807 newIndex++;
1808 if (newIndex == newSize)
1809 newIndex = 0;
1810 }
1811
1812 newHash[newIndex] = ctxt->nsdb->hash[i];
1813 }
1814
1815 xmlFree(ctxt->nsdb->hash);
1816 ctxt->nsdb->hash = newHash;
1817 ctxt->nsdb->hashSize = newSize;
1818
1819 /*
1820 * Relookup
1821 */
1822 index = hashValue & (newSize - 1);
1823
1824 while (newHash[index].hashValue != 0) {
1825 index++;
1826 if (index == newSize)
1827 index = 0;
1828 }
1829
1830 bucket = &newHash[index];
1831 }
1832
1833 bucket->hashValue = hashValue;
1834 bucket->index = ctxt->nsNr;
1835 ctxt->nsdb->hashElems++;
1836 oldIndex = INT_MAX;
1837
1838 populate_entry:
1839 nsIndex = ctxt->nsNr;
1840
1841 ns = &ctxt->nsTab[nsIndex * 2];
1842 ns[0] = prefix ? prefix->name : NULL;
1843 ns[1] = uri->name;
1844
1845 extra = &ctxt->nsdb->extra[nsIndex];
1846 extra->saxData = saxData;
1847 extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1848 extra->uriHashValue = uri->hashValue;
1849 extra->elementId = ctxt->nsdb->elementId;
1850 extra->oldIndex = oldIndex;
1851
1852 ctxt->nsNr++;
1853
1854 return(1);
1855 }
1856
1857 /**
1858 * xmlParserNsPop:
1859 * @ctxt: an XML parser context
1860 * @nr: the number to pop
1861 *
1862 * Pops the top @nr namespaces and restores the hash table.
1863 *
1864 * Returns the number of namespaces popped.
1865 */
1866 static int
xmlParserNsPop(xmlParserCtxtPtr ctxt,int nr)1867 xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1868 {
1869 int i;
1870
1871 /* assert(nr <= ctxt->nsNr); */
1872
1873 for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1874 const xmlChar *prefix = ctxt->nsTab[i * 2];
1875 xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1876
1877 if (prefix == NULL) {
1878 ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1879 } else {
1880 xmlHashedString hprefix;
1881 xmlParserNsBucket *bucket = NULL;
1882
1883 hprefix.name = prefix;
1884 hprefix.hashValue = extra->prefixHashValue;
1885 xmlParserNsLookup(ctxt, &hprefix, &bucket);
1886 /* assert(bucket && bucket->hashValue); */
1887 bucket->index = extra->oldIndex;
1888 }
1889 }
1890
1891 ctxt->nsNr -= nr;
1892 return(nr);
1893 }
1894
1895 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1896 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1897 const xmlChar **atts;
1898 unsigned *attallocs;
1899 int maxatts;
1900
1901 if (nr + 5 > ctxt->maxatts) {
1902 maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1903 atts = (const xmlChar **) xmlMalloc(
1904 maxatts * sizeof(const xmlChar *));
1905 if (atts == NULL) goto mem_error;
1906 attallocs = xmlRealloc(ctxt->attallocs,
1907 (maxatts / 5) * sizeof(attallocs[0]));
1908 if (attallocs == NULL) {
1909 xmlFree(atts);
1910 goto mem_error;
1911 }
1912 if (ctxt->maxatts > 0)
1913 memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1914 xmlFree(ctxt->atts);
1915 ctxt->atts = atts;
1916 ctxt->attallocs = attallocs;
1917 ctxt->maxatts = maxatts;
1918 }
1919 return(ctxt->maxatts);
1920 mem_error:
1921 xmlErrMemory(ctxt);
1922 return(-1);
1923 }
1924
1925 /**
1926 * inputPush:
1927 * @ctxt: an XML parser context
1928 * @value: the parser input
1929 *
1930 * Pushes a new parser input on top of the input stack
1931 *
1932 * Returns -1 in case of error, the index in the stack otherwise
1933 */
1934 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1935 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1936 {
1937 if ((ctxt == NULL) || (value == NULL))
1938 return(-1);
1939 if (ctxt->inputNr >= ctxt->inputMax) {
1940 size_t newSize = ctxt->inputMax * 2;
1941 xmlParserInputPtr *tmp;
1942
1943 tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1944 newSize * sizeof(*tmp));
1945 if (tmp == NULL) {
1946 xmlErrMemory(ctxt);
1947 return (-1);
1948 }
1949 ctxt->inputTab = tmp;
1950 ctxt->inputMax = newSize;
1951 }
1952 ctxt->inputTab[ctxt->inputNr] = value;
1953 ctxt->input = value;
1954 return (ctxt->inputNr++);
1955 }
1956 /**
1957 * inputPop:
1958 * @ctxt: an XML parser context
1959 *
1960 * Pops the top parser input from the input stack
1961 *
1962 * Returns the input just removed
1963 */
1964 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1965 inputPop(xmlParserCtxtPtr ctxt)
1966 {
1967 xmlParserInputPtr ret;
1968
1969 if (ctxt == NULL)
1970 return(NULL);
1971 if (ctxt->inputNr <= 0)
1972 return (NULL);
1973 ctxt->inputNr--;
1974 if (ctxt->inputNr > 0)
1975 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1976 else
1977 ctxt->input = NULL;
1978 ret = ctxt->inputTab[ctxt->inputNr];
1979 ctxt->inputTab[ctxt->inputNr] = NULL;
1980 return (ret);
1981 }
1982 /**
1983 * nodePush:
1984 * @ctxt: an XML parser context
1985 * @value: the element node
1986 *
1987 * DEPRECATED: Internal function, do not use.
1988 *
1989 * Pushes a new element node on top of the node stack
1990 *
1991 * Returns -1 in case of error, the index in the stack otherwise
1992 */
1993 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1994 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1995 {
1996 int maxDepth;
1997
1998 if (ctxt == NULL)
1999 return(0);
2000
2001 maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2002 if (ctxt->nodeNr > maxDepth) {
2003 xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2004 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2005 ctxt->nodeNr);
2006 xmlHaltParser(ctxt);
2007 return(-1);
2008 }
2009 if (ctxt->nodeNr >= ctxt->nodeMax) {
2010 xmlNodePtr *tmp;
2011
2012 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2013 ctxt->nodeMax * 2 *
2014 sizeof(ctxt->nodeTab[0]));
2015 if (tmp == NULL) {
2016 xmlErrMemory(ctxt);
2017 return (-1);
2018 }
2019 ctxt->nodeTab = tmp;
2020 ctxt->nodeMax *= 2;
2021 }
2022 ctxt->nodeTab[ctxt->nodeNr] = value;
2023 ctxt->node = value;
2024 return (ctxt->nodeNr++);
2025 }
2026
2027 /**
2028 * nodePop:
2029 * @ctxt: an XML parser context
2030 *
2031 * DEPRECATED: Internal function, do not use.
2032 *
2033 * Pops the top element node from the node stack
2034 *
2035 * Returns the node just removed
2036 */
2037 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)2038 nodePop(xmlParserCtxtPtr ctxt)
2039 {
2040 xmlNodePtr ret;
2041
2042 if (ctxt == NULL) return(NULL);
2043 if (ctxt->nodeNr <= 0)
2044 return (NULL);
2045 ctxt->nodeNr--;
2046 if (ctxt->nodeNr > 0)
2047 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2048 else
2049 ctxt->node = NULL;
2050 ret = ctxt->nodeTab[ctxt->nodeNr];
2051 ctxt->nodeTab[ctxt->nodeNr] = NULL;
2052 return (ret);
2053 }
2054
2055 /**
2056 * nameNsPush:
2057 * @ctxt: an XML parser context
2058 * @value: the element name
2059 * @prefix: the element prefix
2060 * @URI: the element namespace name
2061 * @line: the current line number for error messages
2062 * @nsNr: the number of namespaces pushed on the namespace table
2063 *
2064 * Pushes a new element name/prefix/URL on top of the name stack
2065 *
2066 * Returns -1 in case of error, the index in the stack otherwise
2067 */
2068 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)2069 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2070 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2071 {
2072 xmlStartTag *tag;
2073
2074 if (ctxt->nameNr >= ctxt->nameMax) {
2075 const xmlChar * *tmp;
2076 xmlStartTag *tmp2;
2077 ctxt->nameMax *= 2;
2078 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2079 ctxt->nameMax *
2080 sizeof(ctxt->nameTab[0]));
2081 if (tmp == NULL) {
2082 ctxt->nameMax /= 2;
2083 goto mem_error;
2084 }
2085 ctxt->nameTab = tmp;
2086 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2087 ctxt->nameMax *
2088 sizeof(ctxt->pushTab[0]));
2089 if (tmp2 == NULL) {
2090 ctxt->nameMax /= 2;
2091 goto mem_error;
2092 }
2093 ctxt->pushTab = tmp2;
2094 } else if (ctxt->pushTab == NULL) {
2095 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2096 sizeof(ctxt->pushTab[0]));
2097 if (ctxt->pushTab == NULL)
2098 goto mem_error;
2099 }
2100 ctxt->nameTab[ctxt->nameNr] = value;
2101 ctxt->name = value;
2102 tag = &ctxt->pushTab[ctxt->nameNr];
2103 tag->prefix = prefix;
2104 tag->URI = URI;
2105 tag->line = line;
2106 tag->nsNr = nsNr;
2107 return (ctxt->nameNr++);
2108 mem_error:
2109 xmlErrMemory(ctxt);
2110 return (-1);
2111 }
2112 #ifdef LIBXML_PUSH_ENABLED
2113 /**
2114 * nameNsPop:
2115 * @ctxt: an XML parser context
2116 *
2117 * Pops the top element/prefix/URI name from the name stack
2118 *
2119 * Returns the name just removed
2120 */
2121 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)2122 nameNsPop(xmlParserCtxtPtr ctxt)
2123 {
2124 const xmlChar *ret;
2125
2126 if (ctxt->nameNr <= 0)
2127 return (NULL);
2128 ctxt->nameNr--;
2129 if (ctxt->nameNr > 0)
2130 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2131 else
2132 ctxt->name = NULL;
2133 ret = ctxt->nameTab[ctxt->nameNr];
2134 ctxt->nameTab[ctxt->nameNr] = NULL;
2135 return (ret);
2136 }
2137 #endif /* LIBXML_PUSH_ENABLED */
2138
2139 /**
2140 * namePush:
2141 * @ctxt: an XML parser context
2142 * @value: the element name
2143 *
2144 * DEPRECATED: Internal function, do not use.
2145 *
2146 * Pushes a new element name on top of the name stack
2147 *
2148 * Returns -1 in case of error, the index in the stack otherwise
2149 */
2150 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)2151 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2152 {
2153 if (ctxt == NULL) return (-1);
2154
2155 if (ctxt->nameNr >= ctxt->nameMax) {
2156 const xmlChar * *tmp;
2157 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2158 ctxt->nameMax * 2 *
2159 sizeof(ctxt->nameTab[0]));
2160 if (tmp == NULL) {
2161 goto mem_error;
2162 }
2163 ctxt->nameTab = tmp;
2164 ctxt->nameMax *= 2;
2165 }
2166 ctxt->nameTab[ctxt->nameNr] = value;
2167 ctxt->name = value;
2168 return (ctxt->nameNr++);
2169 mem_error:
2170 xmlErrMemory(ctxt);
2171 return (-1);
2172 }
2173
2174 /**
2175 * namePop:
2176 * @ctxt: an XML parser context
2177 *
2178 * DEPRECATED: Internal function, do not use.
2179 *
2180 * Pops the top element name from the name stack
2181 *
2182 * Returns the name just removed
2183 */
2184 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)2185 namePop(xmlParserCtxtPtr ctxt)
2186 {
2187 const xmlChar *ret;
2188
2189 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2190 return (NULL);
2191 ctxt->nameNr--;
2192 if (ctxt->nameNr > 0)
2193 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2194 else
2195 ctxt->name = NULL;
2196 ret = ctxt->nameTab[ctxt->nameNr];
2197 ctxt->nameTab[ctxt->nameNr] = NULL;
2198 return (ret);
2199 }
2200
spacePush(xmlParserCtxtPtr ctxt,int val)2201 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2202 if (ctxt->spaceNr >= ctxt->spaceMax) {
2203 int *tmp;
2204
2205 ctxt->spaceMax *= 2;
2206 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2207 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2208 if (tmp == NULL) {
2209 xmlErrMemory(ctxt);
2210 ctxt->spaceMax /=2;
2211 return(-1);
2212 }
2213 ctxt->spaceTab = tmp;
2214 }
2215 ctxt->spaceTab[ctxt->spaceNr] = val;
2216 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2217 return(ctxt->spaceNr++);
2218 }
2219
spacePop(xmlParserCtxtPtr ctxt)2220 static int spacePop(xmlParserCtxtPtr ctxt) {
2221 int ret;
2222 if (ctxt->spaceNr <= 0) return(0);
2223 ctxt->spaceNr--;
2224 if (ctxt->spaceNr > 0)
2225 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2226 else
2227 ctxt->space = &ctxt->spaceTab[0];
2228 ret = ctxt->spaceTab[ctxt->spaceNr];
2229 ctxt->spaceTab[ctxt->spaceNr] = -1;
2230 return(ret);
2231 }
2232
2233 /*
2234 * Macros for accessing the content. Those should be used only by the parser,
2235 * and not exported.
2236 *
2237 * Dirty macros, i.e. one often need to make assumption on the context to
2238 * use them
2239 *
2240 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2241 * To be used with extreme caution since operations consuming
2242 * characters may move the input buffer to a different location !
2243 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2244 * This should be used internally by the parser
2245 * only to compare to ASCII values otherwise it would break when
2246 * running with UTF-8 encoding.
2247 * RAW same as CUR but in the input buffer, bypass any token
2248 * extraction that may have been done
2249 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2250 * to compare on ASCII based substring.
2251 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2252 * strings without newlines within the parser.
2253 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2254 * defined char within the parser.
2255 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2256 *
2257 * NEXT Skip to the next character, this does the proper decoding
2258 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2259 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2260 * CUR_CHAR(l) returns the current unicode character (int), set l
2261 * to the number of xmlChars used for the encoding [0-5].
2262 * CUR_SCHAR same but operate on a string instead of the context
2263 * COPY_BUF copy the current unicode char to the target buffer, increment
2264 * the index
2265 * GROW, SHRINK handling of input buffers
2266 */
2267
2268 #define RAW (*ctxt->input->cur)
2269 #define CUR (*ctxt->input->cur)
2270 #define NXT(val) ctxt->input->cur[(val)]
2271 #define CUR_PTR ctxt->input->cur
2272 #define BASE_PTR ctxt->input->base
2273
2274 #define CMP4( s, c1, c2, c3, c4 ) \
2275 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2276 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2277 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2278 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2279 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2280 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2281 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2282 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2283 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2284 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2285 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2286 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2287 ((unsigned char *) s)[ 8 ] == c9 )
2288 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2289 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2290 ((unsigned char *) s)[ 9 ] == c10 )
2291
2292 #define SKIP(val) do { \
2293 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2294 if (*ctxt->input->cur == 0) \
2295 xmlParserGrow(ctxt); \
2296 } while (0)
2297
2298 #define SKIPL(val) do { \
2299 int skipl; \
2300 for(skipl=0; skipl<val; skipl++) { \
2301 if (*(ctxt->input->cur) == '\n') { \
2302 ctxt->input->line++; ctxt->input->col = 1; \
2303 } else ctxt->input->col++; \
2304 ctxt->input->cur++; \
2305 } \
2306 if (*ctxt->input->cur == 0) \
2307 xmlParserGrow(ctxt); \
2308 } while (0)
2309
2310 #define SHRINK \
2311 if ((!PARSER_PROGRESSIVE(ctxt)) && \
2312 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2313 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2314 xmlParserShrink(ctxt);
2315
2316 #define GROW \
2317 if ((!PARSER_PROGRESSIVE(ctxt)) && \
2318 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2319 xmlParserGrow(ctxt);
2320
2321 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2322
2323 #define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2324
2325 #define NEXT xmlNextChar(ctxt)
2326
2327 #define NEXT1 { \
2328 ctxt->input->col++; \
2329 ctxt->input->cur++; \
2330 if (*ctxt->input->cur == 0) \
2331 xmlParserGrow(ctxt); \
2332 }
2333
2334 #define NEXTL(l) do { \
2335 if (*(ctxt->input->cur) == '\n') { \
2336 ctxt->input->line++; ctxt->input->col = 1; \
2337 } else ctxt->input->col++; \
2338 ctxt->input->cur += l; \
2339 } while (0)
2340
2341 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2342 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2343
2344 #define COPY_BUF(b, i, v) \
2345 if (v < 0x80) b[i++] = v; \
2346 else i += xmlCopyCharMultiByte(&b[i],v)
2347
2348 /**
2349 * xmlSkipBlankChars:
2350 * @ctxt: the XML parser context
2351 *
2352 * DEPRECATED: Internal function, do not use.
2353 *
2354 * Skip whitespace in the input stream.
2355 *
2356 * Returns the number of space chars skipped
2357 */
2358 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2359 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2360 const xmlChar *cur;
2361 int res = 0;
2362
2363 /*
2364 * It's Okay to use CUR/NEXT here since all the blanks are on
2365 * the ASCII range.
2366 */
2367 cur = ctxt->input->cur;
2368 while (IS_BLANK_CH(*cur)) {
2369 if (*cur == '\n') {
2370 ctxt->input->line++; ctxt->input->col = 1;
2371 } else {
2372 ctxt->input->col++;
2373 }
2374 cur++;
2375 if (res < INT_MAX)
2376 res++;
2377 if (*cur == 0) {
2378 ctxt->input->cur = cur;
2379 xmlParserGrow(ctxt);
2380 cur = ctxt->input->cur;
2381 }
2382 }
2383 ctxt->input->cur = cur;
2384
2385 return(res);
2386 }
2387
2388 static void
xmlPopPE(xmlParserCtxtPtr ctxt)2389 xmlPopPE(xmlParserCtxtPtr ctxt) {
2390 unsigned long consumed;
2391 xmlEntityPtr ent;
2392
2393 ent = ctxt->input->entity;
2394
2395 ent->flags &= ~XML_ENT_EXPANDING;
2396
2397 if ((ent->flags & XML_ENT_CHECKED) == 0) {
2398 int result;
2399
2400 /*
2401 * Read the rest of the stream in case of errors. We want
2402 * to account for the whole entity size.
2403 */
2404 do {
2405 ctxt->input->cur = ctxt->input->end;
2406 xmlParserShrink(ctxt);
2407 result = xmlParserGrow(ctxt);
2408 } while (result > 0);
2409
2410 consumed = ctxt->input->consumed;
2411 xmlSaturatedAddSizeT(&consumed,
2412 ctxt->input->end - ctxt->input->base);
2413
2414 xmlSaturatedAdd(&ent->expandedSize, consumed);
2415
2416 /*
2417 * Add to sizeentities when parsing an external entity
2418 * for the first time.
2419 */
2420 if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2421 xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2422 }
2423
2424 ent->flags |= XML_ENT_CHECKED;
2425 }
2426
2427 xmlPopInput(ctxt);
2428
2429 xmlParserEntityCheck(ctxt, ent->expandedSize);
2430 }
2431
2432 /**
2433 * xmlSkipBlankCharsPE:
2434 * @ctxt: the XML parser context
2435 *
2436 * Skip whitespace in the input stream, also handling parameter
2437 * entities.
2438 *
2439 * Returns the number of space chars skipped
2440 */
2441 static int
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt)2442 xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2443 int res = 0;
2444 int inParam;
2445 int expandParam;
2446
2447 inParam = PARSER_IN_PE(ctxt);
2448 expandParam = PARSER_EXTERNAL(ctxt);
2449
2450 if (!inParam && !expandParam)
2451 return(xmlSkipBlankChars(ctxt));
2452
2453 while (PARSER_STOPPED(ctxt) == 0) {
2454 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2455 NEXT;
2456 } else if (CUR == '%') {
2457 if ((expandParam == 0) ||
2458 (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2459 break;
2460
2461 /*
2462 * Expand parameter entity. We continue to consume
2463 * whitespace at the start of the entity and possible
2464 * even consume the whole entity and pop it. We might
2465 * even pop multiple PEs in this loop.
2466 */
2467 xmlParsePEReference(ctxt);
2468
2469 inParam = PARSER_IN_PE(ctxt);
2470 expandParam = PARSER_EXTERNAL(ctxt);
2471 } else if (CUR == 0) {
2472 if (inParam == 0)
2473 break;
2474
2475 xmlPopPE(ctxt);
2476
2477 inParam = PARSER_IN_PE(ctxt);
2478 expandParam = PARSER_EXTERNAL(ctxt);
2479 } else {
2480 break;
2481 }
2482
2483 /*
2484 * Also increase the counter when entering or exiting a PERef.
2485 * The spec says: "When a parameter-entity reference is recognized
2486 * in the DTD and included, its replacement text MUST be enlarged
2487 * by the attachment of one leading and one following space (#x20)
2488 * character."
2489 */
2490 if (res < INT_MAX)
2491 res++;
2492 }
2493
2494 return(res);
2495 }
2496
2497 /************************************************************************
2498 * *
2499 * Commodity functions to handle entities *
2500 * *
2501 ************************************************************************/
2502
2503 /**
2504 * xmlPopInput:
2505 * @ctxt: an XML parser context
2506 *
2507 * xmlPopInput: the current input pointed by ctxt->input came to an end
2508 * pop it and return the next char.
2509 *
2510 * Returns the current xmlChar in the parser context
2511 */
2512 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2513 xmlPopInput(xmlParserCtxtPtr ctxt) {
2514 xmlParserInputPtr input;
2515
2516 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2517 input = inputPop(ctxt);
2518 xmlFreeInputStream(input);
2519 if (*ctxt->input->cur == 0)
2520 xmlParserGrow(ctxt);
2521 return(CUR);
2522 }
2523
2524 /**
2525 * xmlPushInput:
2526 * @ctxt: an XML parser context
2527 * @input: an XML parser input fragment (entity, XML fragment ...).
2528 *
2529 * Push an input stream onto the stack.
2530 *
2531 * This makes the parser use an input returned from advanced functions
2532 * like xmlNewInputURL or xmlNewInputMemory.
2533 *
2534 * Returns -1 in case of error or the index in the input stack
2535 */
2536 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2537 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2538 int maxDepth;
2539 int ret;
2540
2541 if ((ctxt == NULL) || (input == NULL))
2542 return(-1);
2543
2544 maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2545 if (ctxt->inputNr > maxDepth) {
2546 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2547 "Maximum entity nesting depth exceeded");
2548 xmlHaltParser(ctxt);
2549 return(-1);
2550 }
2551 ret = inputPush(ctxt, input);
2552 GROW;
2553 return(ret);
2554 }
2555
2556 /**
2557 * xmlParseCharRef:
2558 * @ctxt: an XML parser context
2559 *
2560 * DEPRECATED: Internal function, don't use.
2561 *
2562 * Parse a numeric character reference. Always consumes '&'.
2563 *
2564 * [66] CharRef ::= '&#' [0-9]+ ';' |
2565 * '&#x' [0-9a-fA-F]+ ';'
2566 *
2567 * [ WFC: Legal Character ]
2568 * Characters referred to using character references must match the
2569 * production for Char.
2570 *
2571 * Returns the value parsed (as an int), 0 in case of error
2572 */
2573 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2574 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2575 int val = 0;
2576 int count = 0;
2577
2578 /*
2579 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2580 */
2581 if ((RAW == '&') && (NXT(1) == '#') &&
2582 (NXT(2) == 'x')) {
2583 SKIP(3);
2584 GROW;
2585 while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2586 if (count++ > 20) {
2587 count = 0;
2588 GROW;
2589 }
2590 if ((RAW >= '0') && (RAW <= '9'))
2591 val = val * 16 + (CUR - '0');
2592 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2593 val = val * 16 + (CUR - 'a') + 10;
2594 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2595 val = val * 16 + (CUR - 'A') + 10;
2596 else {
2597 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2598 val = 0;
2599 break;
2600 }
2601 if (val > 0x110000)
2602 val = 0x110000;
2603
2604 NEXT;
2605 count++;
2606 }
2607 if (RAW == ';') {
2608 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2609 ctxt->input->col++;
2610 ctxt->input->cur++;
2611 }
2612 } else if ((RAW == '&') && (NXT(1) == '#')) {
2613 SKIP(2);
2614 GROW;
2615 while (RAW != ';') { /* loop blocked by count */
2616 if (count++ > 20) {
2617 count = 0;
2618 GROW;
2619 }
2620 if ((RAW >= '0') && (RAW <= '9'))
2621 val = val * 10 + (CUR - '0');
2622 else {
2623 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2624 val = 0;
2625 break;
2626 }
2627 if (val > 0x110000)
2628 val = 0x110000;
2629
2630 NEXT;
2631 count++;
2632 }
2633 if (RAW == ';') {
2634 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2635 ctxt->input->col++;
2636 ctxt->input->cur++;
2637 }
2638 } else {
2639 if (RAW == '&')
2640 SKIP(1);
2641 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2642 }
2643
2644 /*
2645 * [ WFC: Legal Character ]
2646 * Characters referred to using character references must match the
2647 * production for Char.
2648 */
2649 if (val >= 0x110000) {
2650 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2651 "xmlParseCharRef: character reference out of bounds\n",
2652 val);
2653 } else if (IS_CHAR(val)) {
2654 return(val);
2655 } else {
2656 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2657 "xmlParseCharRef: invalid xmlChar value %d\n",
2658 val);
2659 }
2660 return(0);
2661 }
2662
2663 /**
2664 * xmlParseStringCharRef:
2665 * @ctxt: an XML parser context
2666 * @str: a pointer to an index in the string
2667 *
2668 * parse Reference declarations, variant parsing from a string rather
2669 * than an an input flow.
2670 *
2671 * [66] CharRef ::= '&#' [0-9]+ ';' |
2672 * '&#x' [0-9a-fA-F]+ ';'
2673 *
2674 * [ WFC: Legal Character ]
2675 * Characters referred to using character references must match the
2676 * production for Char.
2677 *
2678 * Returns the value parsed (as an int), 0 in case of error, str will be
2679 * updated to the current value of the index
2680 */
2681 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2682 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2683 const xmlChar *ptr;
2684 xmlChar cur;
2685 int val = 0;
2686
2687 if ((str == NULL) || (*str == NULL)) return(0);
2688 ptr = *str;
2689 cur = *ptr;
2690 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2691 ptr += 3;
2692 cur = *ptr;
2693 while (cur != ';') { /* Non input consuming loop */
2694 if ((cur >= '0') && (cur <= '9'))
2695 val = val * 16 + (cur - '0');
2696 else if ((cur >= 'a') && (cur <= 'f'))
2697 val = val * 16 + (cur - 'a') + 10;
2698 else if ((cur >= 'A') && (cur <= 'F'))
2699 val = val * 16 + (cur - 'A') + 10;
2700 else {
2701 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2702 val = 0;
2703 break;
2704 }
2705 if (val > 0x110000)
2706 val = 0x110000;
2707
2708 ptr++;
2709 cur = *ptr;
2710 }
2711 if (cur == ';')
2712 ptr++;
2713 } else if ((cur == '&') && (ptr[1] == '#')){
2714 ptr += 2;
2715 cur = *ptr;
2716 while (cur != ';') { /* Non input consuming loops */
2717 if ((cur >= '0') && (cur <= '9'))
2718 val = val * 10 + (cur - '0');
2719 else {
2720 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2721 val = 0;
2722 break;
2723 }
2724 if (val > 0x110000)
2725 val = 0x110000;
2726
2727 ptr++;
2728 cur = *ptr;
2729 }
2730 if (cur == ';')
2731 ptr++;
2732 } else {
2733 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2734 return(0);
2735 }
2736 *str = ptr;
2737
2738 /*
2739 * [ WFC: Legal Character ]
2740 * Characters referred to using character references must match the
2741 * production for Char.
2742 */
2743 if (val >= 0x110000) {
2744 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2745 "xmlParseStringCharRef: character reference out of bounds\n",
2746 val);
2747 } else if (IS_CHAR(val)) {
2748 return(val);
2749 } else {
2750 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2751 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2752 val);
2753 }
2754 return(0);
2755 }
2756
2757 /**
2758 * xmlParserHandlePEReference:
2759 * @ctxt: the parser context
2760 *
2761 * DEPRECATED: Internal function, do not use.
2762 *
2763 * [69] PEReference ::= '%' Name ';'
2764 *
2765 * [ WFC: No Recursion ]
2766 * A parsed entity must not contain a recursive
2767 * reference to itself, either directly or indirectly.
2768 *
2769 * [ WFC: Entity Declared ]
2770 * In a document without any DTD, a document with only an internal DTD
2771 * subset which contains no parameter entity references, or a document
2772 * with "standalone='yes'", ... ... The declaration of a parameter
2773 * entity must precede any reference to it...
2774 *
2775 * [ VC: Entity Declared ]
2776 * In a document with an external subset or external parameter entities
2777 * with "standalone='no'", ... ... The declaration of a parameter entity
2778 * must precede any reference to it...
2779 *
2780 * [ WFC: In DTD ]
2781 * Parameter-entity references may only appear in the DTD.
2782 * NOTE: misleading but this is handled.
2783 *
2784 * A PEReference may have been detected in the current input stream
2785 * the handling is done accordingly to
2786 * http://www.w3.org/TR/REC-xml#entproc
2787 * i.e.
2788 * - Included in literal in entity values
2789 * - Included as Parameter Entity reference within DTDs
2790 */
2791 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2792 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2793 xmlParsePEReference(ctxt);
2794 }
2795
2796 /**
2797 * xmlStringLenDecodeEntities:
2798 * @ctxt: the parser context
2799 * @str: the input string
2800 * @len: the string length
2801 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2802 * @end: an end marker xmlChar, 0 if none
2803 * @end2: an end marker xmlChar, 0 if none
2804 * @end3: an end marker xmlChar, 0 if none
2805 *
2806 * DEPRECATED: Internal function, don't use.
2807 *
2808 * Returns A newly allocated string with the substitution done. The caller
2809 * must deallocate it !
2810 */
2811 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what ATTRIBUTE_UNUSED,xmlChar end,xmlChar end2,xmlChar end3)2812 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2813 int what ATTRIBUTE_UNUSED,
2814 xmlChar end, xmlChar end2, xmlChar end3) {
2815 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2816 return(NULL);
2817
2818 if ((str[len] != 0) ||
2819 (end != 0) || (end2 != 0) || (end3 != 0))
2820 return(NULL);
2821
2822 return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2823 }
2824
2825 /**
2826 * xmlStringDecodeEntities:
2827 * @ctxt: the parser context
2828 * @str: the input string
2829 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2830 * @end: an end marker xmlChar, 0 if none
2831 * @end2: an end marker xmlChar, 0 if none
2832 * @end3: an end marker xmlChar, 0 if none
2833 *
2834 * DEPRECATED: Internal function, don't use.
2835 *
2836 * Returns A newly allocated string with the substitution done. The caller
2837 * must deallocate it !
2838 */
2839 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what ATTRIBUTE_UNUSED,xmlChar end,xmlChar end2,xmlChar end3)2840 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2841 int what ATTRIBUTE_UNUSED,
2842 xmlChar end, xmlChar end2, xmlChar end3) {
2843 if ((ctxt == NULL) || (str == NULL))
2844 return(NULL);
2845
2846 if ((end != 0) || (end2 != 0) || (end3 != 0))
2847 return(NULL);
2848
2849 return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2850 }
2851
2852 /************************************************************************
2853 * *
2854 * Commodity functions, cleanup needed ? *
2855 * *
2856 ************************************************************************/
2857
2858 /**
2859 * areBlanks:
2860 * @ctxt: an XML parser context
2861 * @str: a xmlChar *
2862 * @len: the size of @str
2863 * @blank_chars: we know the chars are blanks
2864 *
2865 * Is this a sequence of blank chars that one can ignore ?
2866 *
2867 * Returns 1 if ignorable 0 otherwise.
2868 */
2869
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2870 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2871 int blank_chars) {
2872 int i;
2873 xmlNodePtr lastChild;
2874
2875 /*
2876 * Don't spend time trying to differentiate them, the same callback is
2877 * used !
2878 */
2879 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2880 return(0);
2881
2882 /*
2883 * Check for xml:space value.
2884 */
2885 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2886 (*(ctxt->space) == -2))
2887 return(0);
2888
2889 /*
2890 * Check that the string is made of blanks
2891 */
2892 if (blank_chars == 0) {
2893 for (i = 0;i < len;i++)
2894 if (!(IS_BLANK_CH(str[i]))) return(0);
2895 }
2896
2897 /*
2898 * Look if the element is mixed content in the DTD if available
2899 */
2900 if (ctxt->node == NULL) return(0);
2901 if (ctxt->myDoc != NULL) {
2902 xmlElementPtr elemDecl = NULL;
2903 xmlDocPtr doc = ctxt->myDoc;
2904 const xmlChar *prefix = NULL;
2905
2906 if (ctxt->node->ns)
2907 prefix = ctxt->node->ns->prefix;
2908 if (doc->intSubset != NULL)
2909 elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2910 prefix);
2911 if ((elemDecl == NULL) && (doc->extSubset != NULL))
2912 elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2913 prefix);
2914 if (elemDecl != NULL) {
2915 if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2916 return(1);
2917 if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2918 (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2919 return(0);
2920 }
2921 }
2922
2923 /*
2924 * Otherwise, heuristic :-\
2925 */
2926 if ((RAW != '<') && (RAW != 0xD)) return(0);
2927 if ((ctxt->node->children == NULL) &&
2928 (RAW == '<') && (NXT(1) == '/')) return(0);
2929
2930 lastChild = xmlGetLastChild(ctxt->node);
2931 if (lastChild == NULL) {
2932 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2933 (ctxt->node->content != NULL)) return(0);
2934 } else if (xmlNodeIsText(lastChild))
2935 return(0);
2936 else if ((ctxt->node->children != NULL) &&
2937 (xmlNodeIsText(ctxt->node->children)))
2938 return(0);
2939 return(1);
2940 }
2941
2942 /************************************************************************
2943 * *
2944 * Extra stuff for namespace support *
2945 * Relates to http://www.w3.org/TR/WD-xml-names *
2946 * *
2947 ************************************************************************/
2948
2949 /**
2950 * xmlSplitQName:
2951 * @ctxt: an XML parser context
2952 * @name: an XML parser context
2953 * @prefixOut: a xmlChar **
2954 *
2955 * parse an UTF8 encoded XML qualified name string
2956 *
2957 * [NS 5] QName ::= (Prefix ':')? LocalPart
2958 *
2959 * [NS 6] Prefix ::= NCName
2960 *
2961 * [NS 7] LocalPart ::= NCName
2962 *
2963 * Returns the local part, and prefix is updated
2964 * to get the Prefix if any.
2965 */
2966
2967 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefixOut)2968 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2969 xmlChar buf[XML_MAX_NAMELEN + 5];
2970 xmlChar *buffer = NULL;
2971 int len = 0;
2972 int max = XML_MAX_NAMELEN;
2973 xmlChar *ret = NULL;
2974 xmlChar *prefix;
2975 const xmlChar *cur = name;
2976 int c;
2977
2978 if (prefixOut == NULL) return(NULL);
2979 *prefixOut = NULL;
2980
2981 if (cur == NULL) return(NULL);
2982
2983 #ifndef XML_XML_NAMESPACE
2984 /* xml: prefix is not really a namespace */
2985 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2986 (cur[2] == 'l') && (cur[3] == ':'))
2987 return(xmlStrdup(name));
2988 #endif
2989
2990 /* nasty but well=formed */
2991 if (cur[0] == ':')
2992 return(xmlStrdup(name));
2993
2994 c = *cur++;
2995 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2996 buf[len++] = c;
2997 c = *cur++;
2998 }
2999 if (len >= max) {
3000 /*
3001 * Okay someone managed to make a huge name, so he's ready to pay
3002 * for the processing speed.
3003 */
3004 max = len * 2;
3005
3006 buffer = (xmlChar *) xmlMallocAtomic(max);
3007 if (buffer == NULL) {
3008 xmlErrMemory(ctxt);
3009 return(NULL);
3010 }
3011 memcpy(buffer, buf, len);
3012 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3013 if (len + 10 > max) {
3014 xmlChar *tmp;
3015
3016 max *= 2;
3017 tmp = (xmlChar *) xmlRealloc(buffer, max);
3018 if (tmp == NULL) {
3019 xmlFree(buffer);
3020 xmlErrMemory(ctxt);
3021 return(NULL);
3022 }
3023 buffer = tmp;
3024 }
3025 buffer[len++] = c;
3026 c = *cur++;
3027 }
3028 buffer[len] = 0;
3029 }
3030
3031 if ((c == ':') && (*cur == 0)) {
3032 if (buffer != NULL)
3033 xmlFree(buffer);
3034 return(xmlStrdup(name));
3035 }
3036
3037 if (buffer == NULL) {
3038 ret = xmlStrndup(buf, len);
3039 if (ret == NULL) {
3040 xmlErrMemory(ctxt);
3041 return(NULL);
3042 }
3043 } else {
3044 ret = buffer;
3045 buffer = NULL;
3046 max = XML_MAX_NAMELEN;
3047 }
3048
3049
3050 if (c == ':') {
3051 c = *cur;
3052 prefix = ret;
3053 if (c == 0) {
3054 ret = xmlStrndup(BAD_CAST "", 0);
3055 if (ret == NULL) {
3056 xmlFree(prefix);
3057 return(NULL);
3058 }
3059 *prefixOut = prefix;
3060 return(ret);
3061 }
3062 len = 0;
3063
3064 /*
3065 * Check that the first character is proper to start
3066 * a new name
3067 */
3068 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3069 ((c >= 0x41) && (c <= 0x5A)) ||
3070 (c == '_') || (c == ':'))) {
3071 int l;
3072 int first = CUR_SCHAR(cur, l);
3073
3074 if (!IS_LETTER(first) && (first != '_')) {
3075 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3076 "Name %s is not XML Namespace compliant\n",
3077 name);
3078 }
3079 }
3080 cur++;
3081
3082 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3083 buf[len++] = c;
3084 c = *cur++;
3085 }
3086 if (len >= max) {
3087 /*
3088 * Okay someone managed to make a huge name, so he's ready to pay
3089 * for the processing speed.
3090 */
3091 max = len * 2;
3092
3093 buffer = (xmlChar *) xmlMallocAtomic(max);
3094 if (buffer == NULL) {
3095 xmlErrMemory(ctxt);
3096 xmlFree(prefix);
3097 return(NULL);
3098 }
3099 memcpy(buffer, buf, len);
3100 while (c != 0) { /* tested bigname2.xml */
3101 if (len + 10 > max) {
3102 xmlChar *tmp;
3103
3104 max *= 2;
3105 tmp = (xmlChar *) xmlRealloc(buffer, max);
3106 if (tmp == NULL) {
3107 xmlErrMemory(ctxt);
3108 xmlFree(prefix);
3109 xmlFree(buffer);
3110 return(NULL);
3111 }
3112 buffer = tmp;
3113 }
3114 buffer[len++] = c;
3115 c = *cur++;
3116 }
3117 buffer[len] = 0;
3118 }
3119
3120 if (buffer == NULL) {
3121 ret = xmlStrndup(buf, len);
3122 if (ret == NULL) {
3123 xmlFree(prefix);
3124 return(NULL);
3125 }
3126 } else {
3127 ret = buffer;
3128 }
3129
3130 *prefixOut = prefix;
3131 }
3132
3133 return(ret);
3134 }
3135
3136 /************************************************************************
3137 * *
3138 * The parser itself *
3139 * Relates to http://www.w3.org/TR/REC-xml *
3140 * *
3141 ************************************************************************/
3142
3143 /************************************************************************
3144 * *
3145 * Routines to parse Name, NCName and NmToken *
3146 * *
3147 ************************************************************************/
3148
3149 /*
3150 * The two following functions are related to the change of accepted
3151 * characters for Name and NmToken in the Revision 5 of XML-1.0
3152 * They correspond to the modified production [4] and the new production [4a]
3153 * changes in that revision. Also note that the macros used for the
3154 * productions Letter, Digit, CombiningChar and Extender are not needed
3155 * anymore.
3156 * We still keep compatibility to pre-revision5 parsing semantic if the
3157 * new XML_PARSE_OLD10 option is given to the parser.
3158 */
3159 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3160 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3161 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3162 /*
3163 * Use the new checks of production [4] [4a] amd [5] of the
3164 * Update 5 of XML-1.0
3165 */
3166 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3167 (((c >= 'a') && (c <= 'z')) ||
3168 ((c >= 'A') && (c <= 'Z')) ||
3169 (c == '_') || (c == ':') ||
3170 ((c >= 0xC0) && (c <= 0xD6)) ||
3171 ((c >= 0xD8) && (c <= 0xF6)) ||
3172 ((c >= 0xF8) && (c <= 0x2FF)) ||
3173 ((c >= 0x370) && (c <= 0x37D)) ||
3174 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3175 ((c >= 0x200C) && (c <= 0x200D)) ||
3176 ((c >= 0x2070) && (c <= 0x218F)) ||
3177 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3178 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3179 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3180 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3181 ((c >= 0x10000) && (c <= 0xEFFFF))))
3182 return(1);
3183 } else {
3184 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3185 return(1);
3186 }
3187 return(0);
3188 }
3189
3190 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3191 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3192 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3193 /*
3194 * Use the new checks of production [4] [4a] amd [5] of the
3195 * Update 5 of XML-1.0
3196 */
3197 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3198 (((c >= 'a') && (c <= 'z')) ||
3199 ((c >= 'A') && (c <= 'Z')) ||
3200 ((c >= '0') && (c <= '9')) || /* !start */
3201 (c == '_') || (c == ':') ||
3202 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3203 ((c >= 0xC0) && (c <= 0xD6)) ||
3204 ((c >= 0xD8) && (c <= 0xF6)) ||
3205 ((c >= 0xF8) && (c <= 0x2FF)) ||
3206 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3207 ((c >= 0x370) && (c <= 0x37D)) ||
3208 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3209 ((c >= 0x200C) && (c <= 0x200D)) ||
3210 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3211 ((c >= 0x2070) && (c <= 0x218F)) ||
3212 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3213 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3214 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3215 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3216 ((c >= 0x10000) && (c <= 0xEFFFF))))
3217 return(1);
3218 } else {
3219 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3220 (c == '.') || (c == '-') ||
3221 (c == '_') || (c == ':') ||
3222 (IS_COMBINING(c)) ||
3223 (IS_EXTENDER(c)))
3224 return(1);
3225 }
3226 return(0);
3227 }
3228
3229 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3230 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3231 const xmlChar *ret;
3232 int len = 0, l;
3233 int c;
3234 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3235 XML_MAX_TEXT_LENGTH :
3236 XML_MAX_NAME_LENGTH;
3237
3238 /*
3239 * Handler for more complex cases
3240 */
3241 c = CUR_CHAR(l);
3242 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3243 /*
3244 * Use the new checks of production [4] [4a] amd [5] of the
3245 * Update 5 of XML-1.0
3246 */
3247 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3248 (!(((c >= 'a') && (c <= 'z')) ||
3249 ((c >= 'A') && (c <= 'Z')) ||
3250 (c == '_') || (c == ':') ||
3251 ((c >= 0xC0) && (c <= 0xD6)) ||
3252 ((c >= 0xD8) && (c <= 0xF6)) ||
3253 ((c >= 0xF8) && (c <= 0x2FF)) ||
3254 ((c >= 0x370) && (c <= 0x37D)) ||
3255 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3256 ((c >= 0x200C) && (c <= 0x200D)) ||
3257 ((c >= 0x2070) && (c <= 0x218F)) ||
3258 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3259 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3260 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3261 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3262 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3263 return(NULL);
3264 }
3265 len += l;
3266 NEXTL(l);
3267 c = CUR_CHAR(l);
3268 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3269 (((c >= 'a') && (c <= 'z')) ||
3270 ((c >= 'A') && (c <= 'Z')) ||
3271 ((c >= '0') && (c <= '9')) || /* !start */
3272 (c == '_') || (c == ':') ||
3273 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3274 ((c >= 0xC0) && (c <= 0xD6)) ||
3275 ((c >= 0xD8) && (c <= 0xF6)) ||
3276 ((c >= 0xF8) && (c <= 0x2FF)) ||
3277 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3278 ((c >= 0x370) && (c <= 0x37D)) ||
3279 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3280 ((c >= 0x200C) && (c <= 0x200D)) ||
3281 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3282 ((c >= 0x2070) && (c <= 0x218F)) ||
3283 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3284 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3285 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3286 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3287 ((c >= 0x10000) && (c <= 0xEFFFF))
3288 )) {
3289 if (len <= INT_MAX - l)
3290 len += l;
3291 NEXTL(l);
3292 c = CUR_CHAR(l);
3293 }
3294 } else {
3295 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296 (!IS_LETTER(c) && (c != '_') &&
3297 (c != ':'))) {
3298 return(NULL);
3299 }
3300 len += l;
3301 NEXTL(l);
3302 c = CUR_CHAR(l);
3303
3304 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3305 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3306 (c == '.') || (c == '-') ||
3307 (c == '_') || (c == ':') ||
3308 (IS_COMBINING(c)) ||
3309 (IS_EXTENDER(c)))) {
3310 if (len <= INT_MAX - l)
3311 len += l;
3312 NEXTL(l);
3313 c = CUR_CHAR(l);
3314 }
3315 }
3316 if (len > maxLength) {
3317 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3318 return(NULL);
3319 }
3320 if (ctxt->input->cur - ctxt->input->base < len) {
3321 /*
3322 * There were a couple of bugs where PERefs lead to to a change
3323 * of the buffer. Check the buffer size to avoid passing an invalid
3324 * pointer to xmlDictLookup.
3325 */
3326 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3327 "unexpected change of input buffer");
3328 return (NULL);
3329 }
3330 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3331 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3332 else
3333 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3334 if (ret == NULL)
3335 xmlErrMemory(ctxt);
3336 return(ret);
3337 }
3338
3339 /**
3340 * xmlParseName:
3341 * @ctxt: an XML parser context
3342 *
3343 * DEPRECATED: Internal function, don't use.
3344 *
3345 * parse an XML name.
3346 *
3347 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3348 * CombiningChar | Extender
3349 *
3350 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3351 *
3352 * [6] Names ::= Name (#x20 Name)*
3353 *
3354 * Returns the Name parsed or NULL
3355 */
3356
3357 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3358 xmlParseName(xmlParserCtxtPtr ctxt) {
3359 const xmlChar *in;
3360 const xmlChar *ret;
3361 size_t count = 0;
3362 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3363 XML_MAX_TEXT_LENGTH :
3364 XML_MAX_NAME_LENGTH;
3365
3366 GROW;
3367
3368 /*
3369 * Accelerator for simple ASCII names
3370 */
3371 in = ctxt->input->cur;
3372 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373 ((*in >= 0x41) && (*in <= 0x5A)) ||
3374 (*in == '_') || (*in == ':')) {
3375 in++;
3376 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377 ((*in >= 0x41) && (*in <= 0x5A)) ||
3378 ((*in >= 0x30) && (*in <= 0x39)) ||
3379 (*in == '_') || (*in == '-') ||
3380 (*in == ':') || (*in == '.'))
3381 in++;
3382 if ((*in > 0) && (*in < 0x80)) {
3383 count = in - ctxt->input->cur;
3384 if (count > maxLength) {
3385 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3386 return(NULL);
3387 }
3388 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389 ctxt->input->cur = in;
3390 ctxt->input->col += count;
3391 if (ret == NULL)
3392 xmlErrMemory(ctxt);
3393 return(ret);
3394 }
3395 }
3396 /* accelerator for special cases */
3397 return(xmlParseNameComplex(ctxt));
3398 }
3399
3400 static xmlHashedString
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3401 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3402 xmlHashedString ret;
3403 int len = 0, l;
3404 int c;
3405 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3406 XML_MAX_TEXT_LENGTH :
3407 XML_MAX_NAME_LENGTH;
3408 size_t startPosition = 0;
3409
3410 ret.name = NULL;
3411 ret.hashValue = 0;
3412
3413 /*
3414 * Handler for more complex cases
3415 */
3416 startPosition = CUR_PTR - BASE_PTR;
3417 c = CUR_CHAR(l);
3418 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420 return(ret);
3421 }
3422
3423 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425 if (len <= INT_MAX - l)
3426 len += l;
3427 NEXTL(l);
3428 c = CUR_CHAR(l);
3429 }
3430 if (len > maxLength) {
3431 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3432 return(ret);
3433 }
3434 ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3435 if (ret.name == NULL)
3436 xmlErrMemory(ctxt);
3437 return(ret);
3438 }
3439
3440 /**
3441 * xmlParseNCName:
3442 * @ctxt: an XML parser context
3443 * @len: length of the string parsed
3444 *
3445 * parse an XML name.
3446 *
3447 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3448 * CombiningChar | Extender
3449 *
3450 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3451 *
3452 * Returns the Name parsed or NULL
3453 */
3454
3455 static xmlHashedString
xmlParseNCName(xmlParserCtxtPtr ctxt)3456 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3457 const xmlChar *in, *e;
3458 xmlHashedString ret;
3459 size_t count = 0;
3460 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3461 XML_MAX_TEXT_LENGTH :
3462 XML_MAX_NAME_LENGTH;
3463
3464 ret.name = NULL;
3465
3466 /*
3467 * Accelerator for simple ASCII names
3468 */
3469 in = ctxt->input->cur;
3470 e = ctxt->input->end;
3471 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3472 ((*in >= 0x41) && (*in <= 0x5A)) ||
3473 (*in == '_')) && (in < e)) {
3474 in++;
3475 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3476 ((*in >= 0x41) && (*in <= 0x5A)) ||
3477 ((*in >= 0x30) && (*in <= 0x39)) ||
3478 (*in == '_') || (*in == '-') ||
3479 (*in == '.')) && (in < e))
3480 in++;
3481 if (in >= e)
3482 goto complex;
3483 if ((*in > 0) && (*in < 0x80)) {
3484 count = in - ctxt->input->cur;
3485 if (count > maxLength) {
3486 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3487 return(ret);
3488 }
3489 ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3490 ctxt->input->cur = in;
3491 ctxt->input->col += count;
3492 if (ret.name == NULL) {
3493 xmlErrMemory(ctxt);
3494 }
3495 return(ret);
3496 }
3497 }
3498 complex:
3499 return(xmlParseNCNameComplex(ctxt));
3500 }
3501
3502 /**
3503 * xmlParseNameAndCompare:
3504 * @ctxt: an XML parser context
3505 *
3506 * parse an XML name and compares for match
3507 * (specialized for endtag parsing)
3508 *
3509 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3510 * and the name for mismatch
3511 */
3512
3513 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3514 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3515 register const xmlChar *cmp = other;
3516 register const xmlChar *in;
3517 const xmlChar *ret;
3518
3519 GROW;
3520
3521 in = ctxt->input->cur;
3522 while (*in != 0 && *in == *cmp) {
3523 ++in;
3524 ++cmp;
3525 }
3526 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3527 /* success */
3528 ctxt->input->col += in - ctxt->input->cur;
3529 ctxt->input->cur = in;
3530 return (const xmlChar*) 1;
3531 }
3532 /* failure (or end of input buffer), check with full function */
3533 ret = xmlParseName (ctxt);
3534 /* strings coming from the dictionary direct compare possible */
3535 if (ret == other) {
3536 return (const xmlChar*) 1;
3537 }
3538 return ret;
3539 }
3540
3541 /**
3542 * xmlParseStringName:
3543 * @ctxt: an XML parser context
3544 * @str: a pointer to the string pointer (IN/OUT)
3545 *
3546 * parse an XML name.
3547 *
3548 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3549 * CombiningChar | Extender
3550 *
3551 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3552 *
3553 * [6] Names ::= Name (#x20 Name)*
3554 *
3555 * Returns the Name parsed or NULL. The @str pointer
3556 * is updated to the current location in the string.
3557 */
3558
3559 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3560 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3561 xmlChar buf[XML_MAX_NAMELEN + 5];
3562 xmlChar *ret;
3563 const xmlChar *cur = *str;
3564 int len = 0, l;
3565 int c;
3566 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3567 XML_MAX_TEXT_LENGTH :
3568 XML_MAX_NAME_LENGTH;
3569
3570 c = CUR_SCHAR(cur, l);
3571 if (!xmlIsNameStartChar(ctxt, c)) {
3572 return(NULL);
3573 }
3574
3575 COPY_BUF(buf, len, c);
3576 cur += l;
3577 c = CUR_SCHAR(cur, l);
3578 while (xmlIsNameChar(ctxt, c)) {
3579 COPY_BUF(buf, len, c);
3580 cur += l;
3581 c = CUR_SCHAR(cur, l);
3582 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3583 /*
3584 * Okay someone managed to make a huge name, so he's ready to pay
3585 * for the processing speed.
3586 */
3587 xmlChar *buffer;
3588 int max = len * 2;
3589
3590 buffer = (xmlChar *) xmlMallocAtomic(max);
3591 if (buffer == NULL) {
3592 xmlErrMemory(ctxt);
3593 return(NULL);
3594 }
3595 memcpy(buffer, buf, len);
3596 while (xmlIsNameChar(ctxt, c)) {
3597 if (len + 10 > max) {
3598 xmlChar *tmp;
3599
3600 max *= 2;
3601 tmp = (xmlChar *) xmlRealloc(buffer, max);
3602 if (tmp == NULL) {
3603 xmlErrMemory(ctxt);
3604 xmlFree(buffer);
3605 return(NULL);
3606 }
3607 buffer = tmp;
3608 }
3609 COPY_BUF(buffer, len, c);
3610 cur += l;
3611 c = CUR_SCHAR(cur, l);
3612 if (len > maxLength) {
3613 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3614 xmlFree(buffer);
3615 return(NULL);
3616 }
3617 }
3618 buffer[len] = 0;
3619 *str = cur;
3620 return(buffer);
3621 }
3622 }
3623 if (len > maxLength) {
3624 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3625 return(NULL);
3626 }
3627 *str = cur;
3628 ret = xmlStrndup(buf, len);
3629 if (ret == NULL)
3630 xmlErrMemory(ctxt);
3631 return(ret);
3632 }
3633
3634 /**
3635 * xmlParseNmtoken:
3636 * @ctxt: an XML parser context
3637 *
3638 * DEPRECATED: Internal function, don't use.
3639 *
3640 * parse an XML Nmtoken.
3641 *
3642 * [7] Nmtoken ::= (NameChar)+
3643 *
3644 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3645 *
3646 * Returns the Nmtoken parsed or NULL
3647 */
3648
3649 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3650 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3651 xmlChar buf[XML_MAX_NAMELEN + 5];
3652 xmlChar *ret;
3653 int len = 0, l;
3654 int c;
3655 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3656 XML_MAX_TEXT_LENGTH :
3657 XML_MAX_NAME_LENGTH;
3658
3659 c = CUR_CHAR(l);
3660
3661 while (xmlIsNameChar(ctxt, c)) {
3662 COPY_BUF(buf, len, c);
3663 NEXTL(l);
3664 c = CUR_CHAR(l);
3665 if (len >= XML_MAX_NAMELEN) {
3666 /*
3667 * Okay someone managed to make a huge token, so he's ready to pay
3668 * for the processing speed.
3669 */
3670 xmlChar *buffer;
3671 int max = len * 2;
3672
3673 buffer = (xmlChar *) xmlMallocAtomic(max);
3674 if (buffer == NULL) {
3675 xmlErrMemory(ctxt);
3676 return(NULL);
3677 }
3678 memcpy(buffer, buf, len);
3679 while (xmlIsNameChar(ctxt, c)) {
3680 if (len + 10 > max) {
3681 xmlChar *tmp;
3682
3683 max *= 2;
3684 tmp = (xmlChar *) xmlRealloc(buffer, max);
3685 if (tmp == NULL) {
3686 xmlErrMemory(ctxt);
3687 xmlFree(buffer);
3688 return(NULL);
3689 }
3690 buffer = tmp;
3691 }
3692 COPY_BUF(buffer, len, c);
3693 if (len > maxLength) {
3694 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3695 xmlFree(buffer);
3696 return(NULL);
3697 }
3698 NEXTL(l);
3699 c = CUR_CHAR(l);
3700 }
3701 buffer[len] = 0;
3702 return(buffer);
3703 }
3704 }
3705 if (len == 0)
3706 return(NULL);
3707 if (len > maxLength) {
3708 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3709 return(NULL);
3710 }
3711 ret = xmlStrndup(buf, len);
3712 if (ret == NULL)
3713 xmlErrMemory(ctxt);
3714 return(ret);
3715 }
3716
3717 /**
3718 * xmlExpandPEsInEntityValue:
3719 * @ctxt: parser context
3720 * @buf: string buffer
3721 * @str: entity value
3722 * @length: size of entity value
3723 * @depth: nesting depth
3724 *
3725 * Validate an entity value and expand parameter entities.
3726 */
3727 static void
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt,xmlSBuf * buf,const xmlChar * str,int length,int depth)3728 xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3729 const xmlChar *str, int length, int depth) {
3730 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3731 const xmlChar *end, *chunk;
3732 int c, l;
3733
3734 if (str == NULL)
3735 return;
3736
3737 depth += 1;
3738 if (depth > maxDepth) {
3739 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3740 "Maximum entity nesting depth exceeded");
3741 return;
3742 }
3743
3744 end = str + length;
3745 chunk = str;
3746
3747 while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3748 c = *str;
3749
3750 if (c >= 0x80) {
3751 l = xmlUTF8MultibyteLen(ctxt, str,
3752 "invalid character in entity value\n");
3753 if (l == 0) {
3754 if (chunk < str)
3755 xmlSBufAddString(buf, chunk, str - chunk);
3756 xmlSBufAddReplChar(buf);
3757 str += 1;
3758 chunk = str;
3759 } else {
3760 str += l;
3761 }
3762 } else if (c == '&') {
3763 if (str[1] == '#') {
3764 if (chunk < str)
3765 xmlSBufAddString(buf, chunk, str - chunk);
3766
3767 c = xmlParseStringCharRef(ctxt, &str);
3768 if (c == 0)
3769 return;
3770
3771 xmlSBufAddChar(buf, c);
3772
3773 chunk = str;
3774 } else {
3775 xmlChar *name;
3776
3777 /*
3778 * General entity references are checked for
3779 * syntactic validity.
3780 */
3781 str++;
3782 name = xmlParseStringName(ctxt, &str);
3783
3784 if ((name == NULL) || (*str++ != ';')) {
3785 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3786 "EntityValue: '&' forbidden except for entities "
3787 "references\n");
3788 xmlFree(name);
3789 return;
3790 }
3791
3792 xmlFree(name);
3793 }
3794 } else if (c == '%') {
3795 xmlEntityPtr ent;
3796
3797 if (chunk < str)
3798 xmlSBufAddString(buf, chunk, str - chunk);
3799
3800 ent = xmlParseStringPEReference(ctxt, &str);
3801 if (ent == NULL)
3802 return;
3803
3804 if (!PARSER_EXTERNAL(ctxt)) {
3805 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3806 return;
3807 }
3808
3809 if (ent->content == NULL) {
3810 /*
3811 * Note: external parsed entities will not be loaded,
3812 * it is not required for a non-validating parser to
3813 * complete external PEReferences coming from the
3814 * internal subset
3815 */
3816 if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3817 ((ctxt->replaceEntities) ||
3818 (ctxt->validate))) {
3819 xmlLoadEntityContent(ctxt, ent);
3820 } else {
3821 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3822 "not validating will not read content for "
3823 "PE entity %s\n", ent->name, NULL);
3824 }
3825 }
3826
3827 /*
3828 * TODO: Skip if ent->content is still NULL.
3829 */
3830
3831 if (xmlParserEntityCheck(ctxt, ent->length))
3832 return;
3833
3834 if (ent->flags & XML_ENT_EXPANDING) {
3835 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3836 xmlHaltParser(ctxt);
3837 return;
3838 }
3839
3840 ent->flags |= XML_ENT_EXPANDING;
3841 xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3842 depth);
3843 ent->flags &= ~XML_ENT_EXPANDING;
3844
3845 chunk = str;
3846 } else {
3847 /* Normal ASCII char */
3848 if (!IS_BYTE_CHAR(c)) {
3849 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3850 "invalid character in entity value\n");
3851 if (chunk < str)
3852 xmlSBufAddString(buf, chunk, str - chunk);
3853 xmlSBufAddReplChar(buf);
3854 str += 1;
3855 chunk = str;
3856 } else {
3857 str += 1;
3858 }
3859 }
3860 }
3861
3862 if (chunk < str)
3863 xmlSBufAddString(buf, chunk, str - chunk);
3864
3865 return;
3866 }
3867
3868 /**
3869 * xmlParseEntityValue:
3870 * @ctxt: an XML parser context
3871 * @orig: if non-NULL store a copy of the original entity value
3872 *
3873 * DEPRECATED: Internal function, don't use.
3874 *
3875 * parse a value for ENTITY declarations
3876 *
3877 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3878 * "'" ([^%&'] | PEReference | Reference)* "'"
3879 *
3880 * Returns the EntityValue parsed with reference substituted or NULL
3881 */
3882 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3883 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3884 unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3885 XML_MAX_HUGE_LENGTH :
3886 XML_MAX_TEXT_LENGTH;
3887 xmlSBuf buf;
3888 const xmlChar *start;
3889 int quote, length;
3890
3891 xmlSBufInit(&buf, maxLength);
3892
3893 GROW;
3894
3895 quote = CUR;
3896 if ((quote != '"') && (quote != '\'')) {
3897 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3898 return(NULL);
3899 }
3900 CUR_PTR++;
3901
3902 length = 0;
3903
3904 /*
3905 * Copy raw content of the entity into a buffer
3906 */
3907 while (1) {
3908 int c;
3909
3910 if (PARSER_STOPPED(ctxt))
3911 goto error;
3912
3913 if (CUR_PTR >= ctxt->input->end) {
3914 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3915 goto error;
3916 }
3917
3918 c = CUR;
3919
3920 if (c == 0) {
3921 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3922 "invalid character in entity value\n");
3923 goto error;
3924 }
3925 if (c == quote)
3926 break;
3927 NEXTL(1);
3928 length += 1;
3929
3930 /*
3931 * TODO: Check growth threshold
3932 */
3933 if (ctxt->input->end - CUR_PTR < 10)
3934 GROW;
3935 }
3936
3937 start = CUR_PTR - length;
3938
3939 if (orig != NULL) {
3940 *orig = xmlStrndup(start, length);
3941 if (*orig == NULL)
3942 xmlErrMemory(ctxt);
3943 }
3944
3945 xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3946
3947 NEXTL(1);
3948
3949 return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3950
3951 error:
3952 xmlSBufCleanup(&buf, ctxt, "entity length too long");
3953 return(NULL);
3954 }
3955
3956 /**
3957 * xmlCheckEntityInAttValue:
3958 * @ctxt: parser context
3959 * @pent: entity
3960 * @depth: nesting depth
3961 *
3962 * Check an entity reference in an attribute value for validity
3963 * without expanding it.
3964 */
3965 static void
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt,xmlEntityPtr pent,int depth)3966 xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3967 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3968 const xmlChar *str;
3969 unsigned long expandedSize = pent->length;
3970 int c, flags;
3971
3972 depth += 1;
3973 if (depth > maxDepth) {
3974 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3975 "Maximum entity nesting depth exceeded");
3976 return;
3977 }
3978
3979 if (pent->flags & XML_ENT_EXPANDING) {
3980 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3981 xmlHaltParser(ctxt);
3982 return;
3983 }
3984
3985 /*
3986 * If we're parsing a default attribute value in DTD content,
3987 * the entity might reference other entities which weren't
3988 * defined yet, so the check isn't reliable.
3989 */
3990 if (ctxt->inSubset == 0)
3991 flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3992 else
3993 flags = XML_ENT_VALIDATED;
3994
3995 str = pent->content;
3996 if (str == NULL)
3997 goto done;
3998
3999 /*
4000 * Note that entity values are already validated. We only check
4001 * for illegal less-than signs and compute the expanded size
4002 * of the entity. No special handling for multi-byte characters
4003 * is needed.
4004 */
4005 while (!PARSER_STOPPED(ctxt)) {
4006 c = *str;
4007
4008 if (c != '&') {
4009 if (c == 0)
4010 break;
4011
4012 if (c == '<')
4013 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4014 "'<' in entity '%s' is not allowed in attributes "
4015 "values\n", pent->name);
4016
4017 str += 1;
4018 } else if (str[1] == '#') {
4019 int val;
4020
4021 val = xmlParseStringCharRef(ctxt, &str);
4022 if (val == 0) {
4023 pent->content[0] = 0;
4024 break;
4025 }
4026 } else {
4027 xmlChar *name;
4028 xmlEntityPtr ent;
4029
4030 name = xmlParseStringEntityRef(ctxt, &str);
4031 if (name == NULL) {
4032 pent->content[0] = 0;
4033 break;
4034 }
4035
4036 ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4037 xmlFree(name);
4038
4039 if ((ent != NULL) &&
4040 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4041 if ((ent->flags & flags) != flags) {
4042 pent->flags |= XML_ENT_EXPANDING;
4043 xmlCheckEntityInAttValue(ctxt, ent, depth);
4044 pent->flags &= ~XML_ENT_EXPANDING;
4045 }
4046
4047 xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4048 xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4049 }
4050 }
4051 }
4052
4053 done:
4054 if (ctxt->inSubset == 0)
4055 pent->expandedSize = expandedSize;
4056
4057 pent->flags |= flags;
4058 }
4059
4060 /**
4061 * xmlExpandEntityInAttValue:
4062 * @ctxt: parser context
4063 * @buf: string buffer
4064 * @str: entity or attribute value
4065 * @pent: entity for entity value, NULL for attribute values
4066 * @normalize: whether to collapse whitespace
4067 * @inSpace: whitespace state
4068 * @depth: nesting depth
4069 * @check: whether to check for amplification
4070 *
4071 * Expand general entity references in an entity or attribute value.
4072 * Perform attribute value normalization.
4073 */
4074 static void
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt,xmlSBuf * buf,const xmlChar * str,xmlEntityPtr pent,int normalize,int * inSpace,int depth,int check)4075 xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4076 const xmlChar *str, xmlEntityPtr pent, int normalize,
4077 int *inSpace, int depth, int check) {
4078 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4079 int c, chunkSize;
4080
4081 if (str == NULL)
4082 return;
4083
4084 depth += 1;
4085 if (depth > maxDepth) {
4086 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4087 "Maximum entity nesting depth exceeded");
4088 return;
4089 }
4090
4091 if (pent != NULL) {
4092 if (pent->flags & XML_ENT_EXPANDING) {
4093 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4094 xmlHaltParser(ctxt);
4095 return;
4096 }
4097
4098 if (check) {
4099 if (xmlParserEntityCheck(ctxt, pent->length))
4100 return;
4101 }
4102 }
4103
4104 chunkSize = 0;
4105
4106 /*
4107 * Note that entity values are already validated. No special
4108 * handling for multi-byte characters is needed.
4109 */
4110 while (!PARSER_STOPPED(ctxt)) {
4111 c = *str;
4112
4113 if (c != '&') {
4114 if (c == 0)
4115 break;
4116
4117 /*
4118 * If this function is called without an entity, it is used to
4119 * expand entities in an attribute content where less-than was
4120 * already unscaped and is allowed.
4121 */
4122 if ((pent != NULL) && (c == '<')) {
4123 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4124 "'<' in entity '%s' is not allowed in attributes "
4125 "values\n", pent->name);
4126 break;
4127 }
4128
4129 if (c <= 0x20) {
4130 if ((normalize) && (*inSpace)) {
4131 /* Skip char */
4132 if (chunkSize > 0) {
4133 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4134 chunkSize = 0;
4135 }
4136 } else if (c < 0x20) {
4137 if (chunkSize > 0) {
4138 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4139 chunkSize = 0;
4140 }
4141
4142 xmlSBufAddCString(buf, " ", 1);
4143 } else {
4144 chunkSize += 1;
4145 }
4146
4147 *inSpace = 1;
4148 } else {
4149 chunkSize += 1;
4150 *inSpace = 0;
4151 }
4152
4153 str += 1;
4154 } else if (str[1] == '#') {
4155 int val;
4156
4157 if (chunkSize > 0) {
4158 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4159 chunkSize = 0;
4160 }
4161
4162 val = xmlParseStringCharRef(ctxt, &str);
4163 if (val == 0) {
4164 if (pent != NULL)
4165 pent->content[0] = 0;
4166 break;
4167 }
4168
4169 if (val == ' ') {
4170 if ((!normalize) || (!*inSpace))
4171 xmlSBufAddCString(buf, " ", 1);
4172 *inSpace = 1;
4173 } else {
4174 xmlSBufAddChar(buf, val);
4175 *inSpace = 0;
4176 }
4177 } else {
4178 xmlChar *name;
4179 xmlEntityPtr ent;
4180
4181 if (chunkSize > 0) {
4182 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4183 chunkSize = 0;
4184 }
4185
4186 name = xmlParseStringEntityRef(ctxt, &str);
4187 if (name == NULL) {
4188 if (pent != NULL)
4189 pent->content[0] = 0;
4190 break;
4191 }
4192
4193 ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4194 xmlFree(name);
4195
4196 if ((ent != NULL) &&
4197 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4198 if (ent->content == NULL) {
4199 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4200 "predefined entity has no content\n");
4201 break;
4202 }
4203
4204 xmlSBufAddString(buf, ent->content, ent->length);
4205
4206 *inSpace = 0;
4207 } else if ((ent != NULL) && (ent->content != NULL)) {
4208 if (pent != NULL)
4209 pent->flags |= XML_ENT_EXPANDING;
4210 xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4211 normalize, inSpace, depth, check);
4212 if (pent != NULL)
4213 pent->flags &= ~XML_ENT_EXPANDING;
4214 }
4215 }
4216 }
4217
4218 if (chunkSize > 0)
4219 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4220
4221 return;
4222 }
4223
4224 /**
4225 * xmlExpandEntitiesInAttValue:
4226 * @ctxt: parser context
4227 * @str: entity or attribute value
4228 * @normalize: whether to collapse whitespace
4229 *
4230 * Expand general entity references in an entity or attribute value.
4231 * Perform attribute value normalization.
4232 *
4233 * Returns the expanded attribtue value.
4234 */
4235 xmlChar *
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt,const xmlChar * str,int normalize)4236 xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4237 int normalize) {
4238 unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4239 XML_MAX_HUGE_LENGTH :
4240 XML_MAX_TEXT_LENGTH;
4241 xmlSBuf buf;
4242 int inSpace = 1;
4243
4244 xmlSBufInit(&buf, maxLength);
4245
4246 xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4247 ctxt->inputNr, /* check */ 0);
4248
4249 if ((normalize) && (inSpace) && (buf.size > 0))
4250 buf.size--;
4251
4252 return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4253 }
4254
4255 /**
4256 * xmlParseAttValueInternal:
4257 * @ctxt: an XML parser context
4258 * @len: attribute len result
4259 * @alloc: whether the attribute was reallocated as a new string
4260 * @normalize: if 1 then further non-CDATA normalization must be done
4261 *
4262 * parse a value for an attribute.
4263 * NOTE: if no normalization is needed, the routine will return pointers
4264 * directly from the data buffer.
4265 *
4266 * 3.3.3 Attribute-Value Normalization:
4267 * Before the value of an attribute is passed to the application or
4268 * checked for validity, the XML processor must normalize it as follows:
4269 * - a character reference is processed by appending the referenced
4270 * character to the attribute value
4271 * - an entity reference is processed by recursively processing the
4272 * replacement text of the entity
4273 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4274 * appending #x20 to the normalized value, except that only a single
4275 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4276 * parsed entity or the literal entity value of an internal parsed entity
4277 * - other characters are processed by appending them to the normalized value
4278 * If the declared value is not CDATA, then the XML processor must further
4279 * process the normalized attribute value by discarding any leading and
4280 * trailing space (#x20) characters, and by replacing sequences of space
4281 * (#x20) characters by a single space (#x20) character.
4282 * All attributes for which no declaration has been read should be treated
4283 * by a non-validating parser as if declared CDATA.
4284 *
4285 * Returns the AttValue parsed or NULL. The value has to be freed by the
4286 * caller if it was copied, this can be detected by val[*len] == 0.
4287 */
4288 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * attlen,int * alloc,int normalize)4289 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4290 int normalize) {
4291 unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4292 XML_MAX_HUGE_LENGTH :
4293 XML_MAX_TEXT_LENGTH;
4294 xmlSBuf buf;
4295 xmlChar *ret;
4296 int c, l, quote, flags, chunkSize;
4297 int inSpace = 1;
4298
4299 xmlSBufInit(&buf, maxLength);
4300
4301 GROW;
4302
4303 quote = CUR;
4304 if ((quote != '"') && (quote != '\'')) {
4305 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4306 return(NULL);
4307 }
4308 NEXTL(1);
4309
4310 if (ctxt->inSubset == 0)
4311 flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4312 else
4313 flags = XML_ENT_VALIDATED;
4314
4315 inSpace = 1;
4316 chunkSize = 0;
4317
4318 while (1) {
4319 if (PARSER_STOPPED(ctxt))
4320 goto error;
4321
4322 if (CUR_PTR >= ctxt->input->end) {
4323 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4324 "AttValue: ' expected\n");
4325 goto error;
4326 }
4327
4328 /*
4329 * TODO: Check growth threshold
4330 */
4331 if (ctxt->input->end - CUR_PTR < 10)
4332 GROW;
4333
4334 c = CUR;
4335
4336 if (c >= 0x80) {
4337 l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4338 "invalid character in attribute value\n");
4339 if (l == 0) {
4340 if (chunkSize > 0) {
4341 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4342 chunkSize = 0;
4343 }
4344 xmlSBufAddReplChar(&buf);
4345 NEXTL(1);
4346 } else {
4347 chunkSize += l;
4348 NEXTL(l);
4349 }
4350
4351 inSpace = 0;
4352 } else if (c != '&') {
4353 if (c > 0x20) {
4354 if (c == quote)
4355 break;
4356
4357 if (c == '<')
4358 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4359
4360 chunkSize += 1;
4361 inSpace = 0;
4362 } else if (!IS_BYTE_CHAR(c)) {
4363 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4364 "invalid character in attribute value\n");
4365 if (chunkSize > 0) {
4366 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4367 chunkSize = 0;
4368 }
4369 xmlSBufAddReplChar(&buf);
4370 inSpace = 0;
4371 } else {
4372 /* Whitespace */
4373 if ((normalize) && (inSpace)) {
4374 /* Skip char */
4375 if (chunkSize > 0) {
4376 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4377 chunkSize = 0;
4378 }
4379 } else if (c < 0x20) {
4380 /* Convert to space */
4381 if (chunkSize > 0) {
4382 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4383 chunkSize = 0;
4384 }
4385
4386 xmlSBufAddCString(&buf, " ", 1);
4387 } else {
4388 chunkSize += 1;
4389 }
4390
4391 inSpace = 1;
4392
4393 if ((c == 0xD) && (NXT(1) == 0xA))
4394 CUR_PTR++;
4395 }
4396
4397 NEXTL(1);
4398 } else if (NXT(1) == '#') {
4399 int val;
4400
4401 if (chunkSize > 0) {
4402 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4403 chunkSize = 0;
4404 }
4405
4406 val = xmlParseCharRef(ctxt);
4407 if (val == 0)
4408 goto error;
4409
4410 if ((val == '&') && (!ctxt->replaceEntities)) {
4411 /*
4412 * The reparsing will be done in xmlStringGetNodeList()
4413 * called by the attribute() function in SAX.c
4414 */
4415 xmlSBufAddCString(&buf, "&", 5);
4416 inSpace = 0;
4417 } else if (val == ' ') {
4418 if ((!normalize) || (!inSpace))
4419 xmlSBufAddCString(&buf, " ", 1);
4420 inSpace = 1;
4421 } else {
4422 xmlSBufAddChar(&buf, val);
4423 inSpace = 0;
4424 }
4425 } else {
4426 const xmlChar *name;
4427 xmlEntityPtr ent;
4428
4429 if (chunkSize > 0) {
4430 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4431 chunkSize = 0;
4432 }
4433
4434 name = xmlParseEntityRefInternal(ctxt);
4435 if (name == NULL) {
4436 /*
4437 * Probably a literal '&' which wasn't escaped.
4438 * TODO: Handle gracefully in recovery mode.
4439 */
4440 continue;
4441 }
4442
4443 ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4444 if (ent == NULL)
4445 continue;
4446
4447 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4448 if ((ent->content[0] == '&') && (!ctxt->replaceEntities))
4449 xmlSBufAddCString(&buf, "&", 5);
4450 else
4451 xmlSBufAddString(&buf, ent->content, ent->length);
4452 inSpace = 0;
4453 } else if (ctxt->replaceEntities) {
4454 xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4455 normalize, &inSpace, ctxt->inputNr,
4456 /* check */ 1);
4457 } else {
4458 if ((ent->flags & flags) != flags)
4459 xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4460
4461 if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4462 ent->content[0] = 0;
4463 goto error;
4464 }
4465
4466 /*
4467 * Just output the reference
4468 */
4469 xmlSBufAddCString(&buf, "&", 1);
4470 xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4471 xmlSBufAddCString(&buf, ";", 1);
4472
4473 inSpace = 0;
4474 }
4475 }
4476 }
4477
4478 if ((buf.mem == NULL) && (alloc != NULL)) {
4479 ret = (xmlChar *) CUR_PTR - chunkSize;
4480
4481 if (attlen != NULL)
4482 *attlen = chunkSize;
4483 if ((normalize) && (inSpace) && (chunkSize > 0))
4484 *attlen -= 1;
4485 *alloc = 0;
4486
4487 /* Report potential error */
4488 xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4489 } else {
4490 if (chunkSize > 0)
4491 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4492
4493 if ((normalize) && (inSpace) && (buf.size > 0))
4494 buf.size--;
4495
4496 ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4497
4498 if (ret != NULL) {
4499 if (attlen != NULL)
4500 *attlen = buf.size;
4501 if (alloc != NULL)
4502 *alloc = 1;
4503 }
4504 }
4505
4506 NEXTL(1);
4507
4508 return(ret);
4509
4510 error:
4511 xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4512 return(NULL);
4513 }
4514
4515 /**
4516 * xmlParseAttValue:
4517 * @ctxt: an XML parser context
4518 *
4519 * DEPRECATED: Internal function, don't use.
4520 *
4521 * parse a value for an attribute
4522 * Note: the parser won't do substitution of entities here, this
4523 * will be handled later in xmlStringGetNodeList
4524 *
4525 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4526 * "'" ([^<&'] | Reference)* "'"
4527 *
4528 * 3.3.3 Attribute-Value Normalization:
4529 * Before the value of an attribute is passed to the application or
4530 * checked for validity, the XML processor must normalize it as follows:
4531 * - a character reference is processed by appending the referenced
4532 * character to the attribute value
4533 * - an entity reference is processed by recursively processing the
4534 * replacement text of the entity
4535 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4536 * appending #x20 to the normalized value, except that only a single
4537 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4538 * parsed entity or the literal entity value of an internal parsed entity
4539 * - other characters are processed by appending them to the normalized value
4540 * If the declared value is not CDATA, then the XML processor must further
4541 * process the normalized attribute value by discarding any leading and
4542 * trailing space (#x20) characters, and by replacing sequences of space
4543 * (#x20) characters by a single space (#x20) character.
4544 * All attributes for which no declaration has been read should be treated
4545 * by a non-validating parser as if declared CDATA.
4546 *
4547 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4548 */
4549
4550
4551 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4552 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4553 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4554 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4555 }
4556
4557 /**
4558 * xmlParseSystemLiteral:
4559 * @ctxt: an XML parser context
4560 *
4561 * DEPRECATED: Internal function, don't use.
4562 *
4563 * parse an XML Literal
4564 *
4565 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4566 *
4567 * Returns the SystemLiteral parsed or NULL
4568 */
4569
4570 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4571 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4572 xmlChar *buf = NULL;
4573 int len = 0;
4574 int size = XML_PARSER_BUFFER_SIZE;
4575 int cur, l;
4576 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4577 XML_MAX_TEXT_LENGTH :
4578 XML_MAX_NAME_LENGTH;
4579 xmlChar stop;
4580
4581 if (RAW == '"') {
4582 NEXT;
4583 stop = '"';
4584 } else if (RAW == '\'') {
4585 NEXT;
4586 stop = '\'';
4587 } else {
4588 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4589 return(NULL);
4590 }
4591
4592 buf = (xmlChar *) xmlMallocAtomic(size);
4593 if (buf == NULL) {
4594 xmlErrMemory(ctxt);
4595 return(NULL);
4596 }
4597 cur = CUR_CHAR(l);
4598 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4599 if (len + 5 >= size) {
4600 xmlChar *tmp;
4601
4602 size *= 2;
4603 tmp = (xmlChar *) xmlRealloc(buf, size);
4604 if (tmp == NULL) {
4605 xmlFree(buf);
4606 xmlErrMemory(ctxt);
4607 return(NULL);
4608 }
4609 buf = tmp;
4610 }
4611 COPY_BUF(buf, len, cur);
4612 if (len > maxLength) {
4613 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4614 xmlFree(buf);
4615 return(NULL);
4616 }
4617 NEXTL(l);
4618 cur = CUR_CHAR(l);
4619 }
4620 buf[len] = 0;
4621 if (!IS_CHAR(cur)) {
4622 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4623 } else {
4624 NEXT;
4625 }
4626 return(buf);
4627 }
4628
4629 /**
4630 * xmlParsePubidLiteral:
4631 * @ctxt: an XML parser context
4632 *
4633 * DEPRECATED: Internal function, don't use.
4634 *
4635 * parse an XML public literal
4636 *
4637 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4638 *
4639 * Returns the PubidLiteral parsed or NULL.
4640 */
4641
4642 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4643 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4644 xmlChar *buf = NULL;
4645 int len = 0;
4646 int size = XML_PARSER_BUFFER_SIZE;
4647 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4648 XML_MAX_TEXT_LENGTH :
4649 XML_MAX_NAME_LENGTH;
4650 xmlChar cur;
4651 xmlChar stop;
4652
4653 if (RAW == '"') {
4654 NEXT;
4655 stop = '"';
4656 } else if (RAW == '\'') {
4657 NEXT;
4658 stop = '\'';
4659 } else {
4660 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4661 return(NULL);
4662 }
4663 buf = (xmlChar *) xmlMallocAtomic(size);
4664 if (buf == NULL) {
4665 xmlErrMemory(ctxt);
4666 return(NULL);
4667 }
4668 cur = CUR;
4669 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4670 (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4671 if (len + 1 >= size) {
4672 xmlChar *tmp;
4673
4674 size *= 2;
4675 tmp = (xmlChar *) xmlRealloc(buf, size);
4676 if (tmp == NULL) {
4677 xmlErrMemory(ctxt);
4678 xmlFree(buf);
4679 return(NULL);
4680 }
4681 buf = tmp;
4682 }
4683 buf[len++] = cur;
4684 if (len > maxLength) {
4685 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4686 xmlFree(buf);
4687 return(NULL);
4688 }
4689 NEXT;
4690 cur = CUR;
4691 }
4692 buf[len] = 0;
4693 if (cur != stop) {
4694 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4695 } else {
4696 NEXTL(1);
4697 }
4698 return(buf);
4699 }
4700
4701 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4702
4703 /*
4704 * used for the test in the inner loop of the char data testing
4705 */
4706 static const unsigned char test_char_data[256] = {
4707 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4708 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4709 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4710 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4711 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4712 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4713 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4714 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4715 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4716 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4717 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4718 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4719 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4720 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4721 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4722 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4723 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4724 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4725 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4726 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4727 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4728 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4729 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4730 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4731 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4732 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4733 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4734 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4735 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4736 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4737 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4738 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4739 };
4740
4741 /**
4742 * xmlParseCharDataInternal:
4743 * @ctxt: an XML parser context
4744 * @partial: buffer may contain partial UTF-8 sequences
4745 *
4746 * Parse character data. Always makes progress if the first char isn't
4747 * '<' or '&'.
4748 *
4749 * The right angle bracket (>) may be represented using the string ">",
4750 * and must, for compatibility, be escaped using ">" or a character
4751 * reference when it appears in the string "]]>" in content, when that
4752 * string is not marking the end of a CDATA section.
4753 *
4754 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4755 */
4756 static void
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt,int partial)4757 xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4758 const xmlChar *in;
4759 int nbchar = 0;
4760 int line = ctxt->input->line;
4761 int col = ctxt->input->col;
4762 int ccol;
4763
4764 GROW;
4765 /*
4766 * Accelerated common case where input don't need to be
4767 * modified before passing it to the handler.
4768 */
4769 in = ctxt->input->cur;
4770 do {
4771 get_more_space:
4772 while (*in == 0x20) { in++; ctxt->input->col++; }
4773 if (*in == 0xA) {
4774 do {
4775 ctxt->input->line++; ctxt->input->col = 1;
4776 in++;
4777 } while (*in == 0xA);
4778 goto get_more_space;
4779 }
4780 if (*in == '<') {
4781 nbchar = in - ctxt->input->cur;
4782 if (nbchar > 0) {
4783 const xmlChar *tmp = ctxt->input->cur;
4784 ctxt->input->cur = in;
4785
4786 if ((ctxt->sax != NULL) &&
4787 (ctxt->disableSAX == 0) &&
4788 (ctxt->sax->ignorableWhitespace !=
4789 ctxt->sax->characters)) {
4790 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4791 if (ctxt->sax->ignorableWhitespace != NULL)
4792 ctxt->sax->ignorableWhitespace(ctxt->userData,
4793 tmp, nbchar);
4794 } else {
4795 if (ctxt->sax->characters != NULL)
4796 ctxt->sax->characters(ctxt->userData,
4797 tmp, nbchar);
4798 if (*ctxt->space == -1)
4799 *ctxt->space = -2;
4800 }
4801 } else if ((ctxt->sax != NULL) &&
4802 (ctxt->disableSAX == 0) &&
4803 (ctxt->sax->characters != NULL)) {
4804 ctxt->sax->characters(ctxt->userData,
4805 tmp, nbchar);
4806 }
4807 }
4808 return;
4809 }
4810
4811 get_more:
4812 ccol = ctxt->input->col;
4813 while (test_char_data[*in]) {
4814 in++;
4815 ccol++;
4816 }
4817 ctxt->input->col = ccol;
4818 if (*in == 0xA) {
4819 do {
4820 ctxt->input->line++; ctxt->input->col = 1;
4821 in++;
4822 } while (*in == 0xA);
4823 goto get_more;
4824 }
4825 if (*in == ']') {
4826 if ((in[1] == ']') && (in[2] == '>')) {
4827 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4828 ctxt->input->cur = in + 1;
4829 return;
4830 }
4831 in++;
4832 ctxt->input->col++;
4833 goto get_more;
4834 }
4835 nbchar = in - ctxt->input->cur;
4836 if (nbchar > 0) {
4837 if ((ctxt->sax != NULL) &&
4838 (ctxt->disableSAX == 0) &&
4839 (ctxt->sax->ignorableWhitespace !=
4840 ctxt->sax->characters) &&
4841 (IS_BLANK_CH(*ctxt->input->cur))) {
4842 const xmlChar *tmp = ctxt->input->cur;
4843 ctxt->input->cur = in;
4844
4845 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4846 if (ctxt->sax->ignorableWhitespace != NULL)
4847 ctxt->sax->ignorableWhitespace(ctxt->userData,
4848 tmp, nbchar);
4849 } else {
4850 if (ctxt->sax->characters != NULL)
4851 ctxt->sax->characters(ctxt->userData,
4852 tmp, nbchar);
4853 if (*ctxt->space == -1)
4854 *ctxt->space = -2;
4855 }
4856 line = ctxt->input->line;
4857 col = ctxt->input->col;
4858 } else if ((ctxt->sax != NULL) &&
4859 (ctxt->disableSAX == 0)) {
4860 if (ctxt->sax->characters != NULL)
4861 ctxt->sax->characters(ctxt->userData,
4862 ctxt->input->cur, nbchar);
4863 line = ctxt->input->line;
4864 col = ctxt->input->col;
4865 }
4866 }
4867 ctxt->input->cur = in;
4868 if (*in == 0xD) {
4869 in++;
4870 if (*in == 0xA) {
4871 ctxt->input->cur = in;
4872 in++;
4873 ctxt->input->line++; ctxt->input->col = 1;
4874 continue; /* while */
4875 }
4876 in--;
4877 }
4878 if (*in == '<') {
4879 return;
4880 }
4881 if (*in == '&') {
4882 return;
4883 }
4884 SHRINK;
4885 GROW;
4886 in = ctxt->input->cur;
4887 } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4888 (*in == 0x09) || (*in == 0x0a));
4889 ctxt->input->line = line;
4890 ctxt->input->col = col;
4891 xmlParseCharDataComplex(ctxt, partial);
4892 }
4893
4894 /**
4895 * xmlParseCharDataComplex:
4896 * @ctxt: an XML parser context
4897 * @cdata: int indicating whether we are within a CDATA section
4898 *
4899 * Always makes progress if the first char isn't '<' or '&'.
4900 *
4901 * parse a CharData section.this is the fallback function
4902 * of xmlParseCharData() when the parsing requires handling
4903 * of non-ASCII characters.
4904 */
4905 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int partial)4906 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4907 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4908 int nbchar = 0;
4909 int cur, l;
4910
4911 cur = CUR_CHAR(l);
4912 while ((cur != '<') && /* checked */
4913 (cur != '&') &&
4914 (IS_CHAR(cur))) {
4915 if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4916 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4917 }
4918 COPY_BUF(buf, nbchar, cur);
4919 /* move current position before possible calling of ctxt->sax->characters */
4920 NEXTL(l);
4921 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4922 buf[nbchar] = 0;
4923
4924 /*
4925 * OK the segment is to be consumed as chars.
4926 */
4927 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4928 if (areBlanks(ctxt, buf, nbchar, 0)) {
4929 if (ctxt->sax->ignorableWhitespace != NULL)
4930 ctxt->sax->ignorableWhitespace(ctxt->userData,
4931 buf, nbchar);
4932 } else {
4933 if (ctxt->sax->characters != NULL)
4934 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4935 if ((ctxt->sax->characters !=
4936 ctxt->sax->ignorableWhitespace) &&
4937 (*ctxt->space == -1))
4938 *ctxt->space = -2;
4939 }
4940 }
4941 nbchar = 0;
4942 SHRINK;
4943 }
4944 cur = CUR_CHAR(l);
4945 }
4946 if (nbchar != 0) {
4947 buf[nbchar] = 0;
4948 /*
4949 * OK the segment is to be consumed as chars.
4950 */
4951 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4952 if (areBlanks(ctxt, buf, nbchar, 0)) {
4953 if (ctxt->sax->ignorableWhitespace != NULL)
4954 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4955 } else {
4956 if (ctxt->sax->characters != NULL)
4957 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4958 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4959 (*ctxt->space == -1))
4960 *ctxt->space = -2;
4961 }
4962 }
4963 }
4964 /*
4965 * cur == 0 can mean
4966 *
4967 * - End of buffer.
4968 * - An actual 0 character.
4969 * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4970 */
4971 if (ctxt->input->cur < ctxt->input->end) {
4972 if ((cur == 0) && (CUR != 0)) {
4973 if (partial == 0) {
4974 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4975 "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4976 NEXTL(1);
4977 }
4978 } else if ((cur != '<') && (cur != '&')) {
4979 /* Generate the error and skip the offending character */
4980 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4981 "PCDATA invalid Char value %d\n", cur);
4982 NEXTL(l);
4983 }
4984 }
4985 }
4986
4987 /**
4988 * xmlParseCharData:
4989 * @ctxt: an XML parser context
4990 * @cdata: unused
4991 *
4992 * DEPRECATED: Internal function, don't use.
4993 */
4994 void
xmlParseCharData(xmlParserCtxtPtr ctxt,ATTRIBUTE_UNUSED int cdata)4995 xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4996 xmlParseCharDataInternal(ctxt, 0);
4997 }
4998
4999 /**
5000 * xmlParseExternalID:
5001 * @ctxt: an XML parser context
5002 * @publicID: a xmlChar** receiving PubidLiteral
5003 * @strict: indicate whether we should restrict parsing to only
5004 * production [75], see NOTE below
5005 *
5006 * DEPRECATED: Internal function, don't use.
5007 *
5008 * Parse an External ID or a Public ID
5009 *
5010 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5011 * 'PUBLIC' S PubidLiteral S SystemLiteral
5012 *
5013 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5014 * | 'PUBLIC' S PubidLiteral S SystemLiteral
5015 *
5016 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5017 *
5018 * Returns the function returns SystemLiteral and in the second
5019 * case publicID receives PubidLiteral, is strict is off
5020 * it is possible to return NULL and have publicID set.
5021 */
5022
5023 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)5024 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5025 xmlChar *URI = NULL;
5026
5027 *publicID = NULL;
5028 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5029 SKIP(6);
5030 if (SKIP_BLANKS == 0) {
5031 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5032 "Space required after 'SYSTEM'\n");
5033 }
5034 URI = xmlParseSystemLiteral(ctxt);
5035 if (URI == NULL) {
5036 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5037 }
5038 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5039 SKIP(6);
5040 if (SKIP_BLANKS == 0) {
5041 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5042 "Space required after 'PUBLIC'\n");
5043 }
5044 *publicID = xmlParsePubidLiteral(ctxt);
5045 if (*publicID == NULL) {
5046 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5047 }
5048 if (strict) {
5049 /*
5050 * We don't handle [83] so "S SystemLiteral" is required.
5051 */
5052 if (SKIP_BLANKS == 0) {
5053 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5054 "Space required after the Public Identifier\n");
5055 }
5056 } else {
5057 /*
5058 * We handle [83] so we return immediately, if
5059 * "S SystemLiteral" is not detected. We skip blanks if no
5060 * system literal was found, but this is harmless since we must
5061 * be at the end of a NotationDecl.
5062 */
5063 if (SKIP_BLANKS == 0) return(NULL);
5064 if ((CUR != '\'') && (CUR != '"')) return(NULL);
5065 }
5066 URI = xmlParseSystemLiteral(ctxt);
5067 if (URI == NULL) {
5068 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5069 }
5070 }
5071 return(URI);
5072 }
5073
5074 /**
5075 * xmlParseCommentComplex:
5076 * @ctxt: an XML parser context
5077 * @buf: the already parsed part of the buffer
5078 * @len: number of bytes in the buffer
5079 * @size: allocated size of the buffer
5080 *
5081 * Skip an XML (SGML) comment <!-- .... -->
5082 * The spec says that "For compatibility, the string "--" (double-hyphen)
5083 * must not occur within comments. "
5084 * This is the slow routine in case the accelerator for ascii didn't work
5085 *
5086 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5087 */
5088 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)5089 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5090 size_t len, size_t size) {
5091 int q, ql;
5092 int r, rl;
5093 int cur, l;
5094 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5095 XML_MAX_HUGE_LENGTH :
5096 XML_MAX_TEXT_LENGTH;
5097
5098 if (buf == NULL) {
5099 len = 0;
5100 size = XML_PARSER_BUFFER_SIZE;
5101 buf = (xmlChar *) xmlMallocAtomic(size);
5102 if (buf == NULL) {
5103 xmlErrMemory(ctxt);
5104 return;
5105 }
5106 }
5107 q = CUR_CHAR(ql);
5108 if (q == 0)
5109 goto not_terminated;
5110 if (!IS_CHAR(q)) {
5111 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5112 "xmlParseComment: invalid xmlChar value %d\n",
5113 q);
5114 xmlFree (buf);
5115 return;
5116 }
5117 NEXTL(ql);
5118 r = CUR_CHAR(rl);
5119 if (r == 0)
5120 goto not_terminated;
5121 if (!IS_CHAR(r)) {
5122 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5123 "xmlParseComment: invalid xmlChar value %d\n",
5124 r);
5125 xmlFree (buf);
5126 return;
5127 }
5128 NEXTL(rl);
5129 cur = CUR_CHAR(l);
5130 if (cur == 0)
5131 goto not_terminated;
5132 while (IS_CHAR(cur) && /* checked */
5133 ((cur != '>') ||
5134 (r != '-') || (q != '-'))) {
5135 if ((r == '-') && (q == '-')) {
5136 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5137 }
5138 if (len + 5 >= size) {
5139 xmlChar *new_buf;
5140 size_t new_size;
5141
5142 new_size = size * 2;
5143 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5144 if (new_buf == NULL) {
5145 xmlFree (buf);
5146 xmlErrMemory(ctxt);
5147 return;
5148 }
5149 buf = new_buf;
5150 size = new_size;
5151 }
5152 COPY_BUF(buf, len, q);
5153 if (len > maxLength) {
5154 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5155 "Comment too big found", NULL);
5156 xmlFree (buf);
5157 return;
5158 }
5159
5160 q = r;
5161 ql = rl;
5162 r = cur;
5163 rl = l;
5164
5165 NEXTL(l);
5166 cur = CUR_CHAR(l);
5167
5168 }
5169 buf[len] = 0;
5170 if (cur == 0) {
5171 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5172 "Comment not terminated \n<!--%.50s\n", buf);
5173 } else if (!IS_CHAR(cur)) {
5174 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5175 "xmlParseComment: invalid xmlChar value %d\n",
5176 cur);
5177 } else {
5178 NEXT;
5179 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5180 (!ctxt->disableSAX))
5181 ctxt->sax->comment(ctxt->userData, buf);
5182 }
5183 xmlFree(buf);
5184 return;
5185 not_terminated:
5186 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5187 "Comment not terminated\n", NULL);
5188 xmlFree(buf);
5189 return;
5190 }
5191
5192 /**
5193 * xmlParseComment:
5194 * @ctxt: an XML parser context
5195 *
5196 * DEPRECATED: Internal function, don't use.
5197 *
5198 * Parse an XML (SGML) comment. Always consumes '<!'.
5199 *
5200 * The spec says that "For compatibility, the string "--" (double-hyphen)
5201 * must not occur within comments. "
5202 *
5203 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5204 */
5205 void
xmlParseComment(xmlParserCtxtPtr ctxt)5206 xmlParseComment(xmlParserCtxtPtr ctxt) {
5207 xmlChar *buf = NULL;
5208 size_t size = XML_PARSER_BUFFER_SIZE;
5209 size_t len = 0;
5210 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5211 XML_MAX_HUGE_LENGTH :
5212 XML_MAX_TEXT_LENGTH;
5213 const xmlChar *in;
5214 size_t nbchar = 0;
5215 int ccol;
5216
5217 /*
5218 * Check that there is a comment right here.
5219 */
5220 if ((RAW != '<') || (NXT(1) != '!'))
5221 return;
5222 SKIP(2);
5223 if ((RAW != '-') || (NXT(1) != '-'))
5224 return;
5225 SKIP(2);
5226 GROW;
5227
5228 /*
5229 * Accelerated common case where input don't need to be
5230 * modified before passing it to the handler.
5231 */
5232 in = ctxt->input->cur;
5233 do {
5234 if (*in == 0xA) {
5235 do {
5236 ctxt->input->line++; ctxt->input->col = 1;
5237 in++;
5238 } while (*in == 0xA);
5239 }
5240 get_more:
5241 ccol = ctxt->input->col;
5242 while (((*in > '-') && (*in <= 0x7F)) ||
5243 ((*in >= 0x20) && (*in < '-')) ||
5244 (*in == 0x09)) {
5245 in++;
5246 ccol++;
5247 }
5248 ctxt->input->col = ccol;
5249 if (*in == 0xA) {
5250 do {
5251 ctxt->input->line++; ctxt->input->col = 1;
5252 in++;
5253 } while (*in == 0xA);
5254 goto get_more;
5255 }
5256 nbchar = in - ctxt->input->cur;
5257 /*
5258 * save current set of data
5259 */
5260 if (nbchar > 0) {
5261 if (buf == NULL) {
5262 if ((*in == '-') && (in[1] == '-'))
5263 size = nbchar + 1;
5264 else
5265 size = XML_PARSER_BUFFER_SIZE + nbchar;
5266 buf = (xmlChar *) xmlMallocAtomic(size);
5267 if (buf == NULL) {
5268 xmlErrMemory(ctxt);
5269 return;
5270 }
5271 len = 0;
5272 } else if (len + nbchar + 1 >= size) {
5273 xmlChar *new_buf;
5274 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5275 new_buf = (xmlChar *) xmlRealloc(buf, size);
5276 if (new_buf == NULL) {
5277 xmlFree (buf);
5278 xmlErrMemory(ctxt);
5279 return;
5280 }
5281 buf = new_buf;
5282 }
5283 memcpy(&buf[len], ctxt->input->cur, nbchar);
5284 len += nbchar;
5285 buf[len] = 0;
5286 }
5287 if (len > maxLength) {
5288 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5289 "Comment too big found", NULL);
5290 xmlFree (buf);
5291 return;
5292 }
5293 ctxt->input->cur = in;
5294 if (*in == 0xA) {
5295 in++;
5296 ctxt->input->line++; ctxt->input->col = 1;
5297 }
5298 if (*in == 0xD) {
5299 in++;
5300 if (*in == 0xA) {
5301 ctxt->input->cur = in;
5302 in++;
5303 ctxt->input->line++; ctxt->input->col = 1;
5304 goto get_more;
5305 }
5306 in--;
5307 }
5308 SHRINK;
5309 GROW;
5310 in = ctxt->input->cur;
5311 if (*in == '-') {
5312 if (in[1] == '-') {
5313 if (in[2] == '>') {
5314 SKIP(3);
5315 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5316 (!ctxt->disableSAX)) {
5317 if (buf != NULL)
5318 ctxt->sax->comment(ctxt->userData, buf);
5319 else
5320 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5321 }
5322 if (buf != NULL)
5323 xmlFree(buf);
5324 return;
5325 }
5326 if (buf != NULL) {
5327 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5328 "Double hyphen within comment: "
5329 "<!--%.50s\n",
5330 buf);
5331 } else
5332 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5333 "Double hyphen within comment\n", NULL);
5334 in++;
5335 ctxt->input->col++;
5336 }
5337 in++;
5338 ctxt->input->col++;
5339 goto get_more;
5340 }
5341 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5342 xmlParseCommentComplex(ctxt, buf, len, size);
5343 return;
5344 }
5345
5346
5347 /**
5348 * xmlParsePITarget:
5349 * @ctxt: an XML parser context
5350 *
5351 * DEPRECATED: Internal function, don't use.
5352 *
5353 * parse the name of a PI
5354 *
5355 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5356 *
5357 * Returns the PITarget name or NULL
5358 */
5359
5360 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5361 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5362 const xmlChar *name;
5363
5364 name = xmlParseName(ctxt);
5365 if ((name != NULL) &&
5366 ((name[0] == 'x') || (name[0] == 'X')) &&
5367 ((name[1] == 'm') || (name[1] == 'M')) &&
5368 ((name[2] == 'l') || (name[2] == 'L'))) {
5369 int i;
5370 if ((name[0] == 'x') && (name[1] == 'm') &&
5371 (name[2] == 'l') && (name[3] == 0)) {
5372 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5373 "XML declaration allowed only at the start of the document\n");
5374 return(name);
5375 } else if (name[3] == 0) {
5376 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5377 return(name);
5378 }
5379 for (i = 0;;i++) {
5380 if (xmlW3CPIs[i] == NULL) break;
5381 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5382 return(name);
5383 }
5384 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5385 "xmlParsePITarget: invalid name prefix 'xml'\n",
5386 NULL, NULL);
5387 }
5388 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5389 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5390 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5391 }
5392 return(name);
5393 }
5394
5395 #ifdef LIBXML_CATALOG_ENABLED
5396 /**
5397 * xmlParseCatalogPI:
5398 * @ctxt: an XML parser context
5399 * @catalog: the PI value string
5400 *
5401 * parse an XML Catalog Processing Instruction.
5402 *
5403 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5404 *
5405 * Occurs only if allowed by the user and if happening in the Misc
5406 * part of the document before any doctype information
5407 * This will add the given catalog to the parsing context in order
5408 * to be used if there is a resolution need further down in the document
5409 */
5410
5411 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5412 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5413 xmlChar *URL = NULL;
5414 const xmlChar *tmp, *base;
5415 xmlChar marker;
5416
5417 tmp = catalog;
5418 while (IS_BLANK_CH(*tmp)) tmp++;
5419 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5420 goto error;
5421 tmp += 7;
5422 while (IS_BLANK_CH(*tmp)) tmp++;
5423 if (*tmp != '=') {
5424 return;
5425 }
5426 tmp++;
5427 while (IS_BLANK_CH(*tmp)) tmp++;
5428 marker = *tmp;
5429 if ((marker != '\'') && (marker != '"'))
5430 goto error;
5431 tmp++;
5432 base = tmp;
5433 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5434 if (*tmp == 0)
5435 goto error;
5436 URL = xmlStrndup(base, tmp - base);
5437 tmp++;
5438 while (IS_BLANK_CH(*tmp)) tmp++;
5439 if (*tmp != 0)
5440 goto error;
5441
5442 if (URL != NULL) {
5443 /*
5444 * Unfortunately, the catalog API doesn't report OOM errors.
5445 * xmlGetLastError isn't very helpful since we don't know
5446 * where the last error came from. We'd have to reset it
5447 * before this call and restore it afterwards.
5448 */
5449 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5450 xmlFree(URL);
5451 }
5452 return;
5453
5454 error:
5455 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5456 "Catalog PI syntax error: %s\n",
5457 catalog, NULL);
5458 if (URL != NULL)
5459 xmlFree(URL);
5460 }
5461 #endif
5462
5463 /**
5464 * xmlParsePI:
5465 * @ctxt: an XML parser context
5466 *
5467 * DEPRECATED: Internal function, don't use.
5468 *
5469 * parse an XML Processing Instruction.
5470 *
5471 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5472 *
5473 * The processing is transferred to SAX once parsed.
5474 */
5475
5476 void
xmlParsePI(xmlParserCtxtPtr ctxt)5477 xmlParsePI(xmlParserCtxtPtr ctxt) {
5478 xmlChar *buf = NULL;
5479 size_t len = 0;
5480 size_t size = XML_PARSER_BUFFER_SIZE;
5481 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5482 XML_MAX_HUGE_LENGTH :
5483 XML_MAX_TEXT_LENGTH;
5484 int cur, l;
5485 const xmlChar *target;
5486
5487 if ((RAW == '<') && (NXT(1) == '?')) {
5488 /*
5489 * this is a Processing Instruction.
5490 */
5491 SKIP(2);
5492
5493 /*
5494 * Parse the target name and check for special support like
5495 * namespace.
5496 */
5497 target = xmlParsePITarget(ctxt);
5498 if (target != NULL) {
5499 if ((RAW == '?') && (NXT(1) == '>')) {
5500 SKIP(2);
5501
5502 /*
5503 * SAX: PI detected.
5504 */
5505 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5506 (ctxt->sax->processingInstruction != NULL))
5507 ctxt->sax->processingInstruction(ctxt->userData,
5508 target, NULL);
5509 return;
5510 }
5511 buf = (xmlChar *) xmlMallocAtomic(size);
5512 if (buf == NULL) {
5513 xmlErrMemory(ctxt);
5514 return;
5515 }
5516 if (SKIP_BLANKS == 0) {
5517 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5518 "ParsePI: PI %s space expected\n", target);
5519 }
5520 cur = CUR_CHAR(l);
5521 while (IS_CHAR(cur) && /* checked */
5522 ((cur != '?') || (NXT(1) != '>'))) {
5523 if (len + 5 >= size) {
5524 xmlChar *tmp;
5525 size_t new_size = size * 2;
5526 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5527 if (tmp == NULL) {
5528 xmlErrMemory(ctxt);
5529 xmlFree(buf);
5530 return;
5531 }
5532 buf = tmp;
5533 size = new_size;
5534 }
5535 COPY_BUF(buf, len, cur);
5536 if (len > maxLength) {
5537 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5538 "PI %s too big found", target);
5539 xmlFree(buf);
5540 return;
5541 }
5542 NEXTL(l);
5543 cur = CUR_CHAR(l);
5544 }
5545 buf[len] = 0;
5546 if (cur != '?') {
5547 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5548 "ParsePI: PI %s never end ...\n", target);
5549 } else {
5550 SKIP(2);
5551
5552 #ifdef LIBXML_CATALOG_ENABLED
5553 if ((ctxt->inSubset == 0) &&
5554 (xmlStrEqual(target, XML_CATALOG_PI))) {
5555 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5556 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5557 (allow == XML_CATA_ALLOW_ALL))
5558 xmlParseCatalogPI(ctxt, buf);
5559 }
5560 #endif
5561
5562
5563 /*
5564 * SAX: PI detected.
5565 */
5566 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5567 (ctxt->sax->processingInstruction != NULL))
5568 ctxt->sax->processingInstruction(ctxt->userData,
5569 target, buf);
5570 }
5571 xmlFree(buf);
5572 } else {
5573 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5574 }
5575 }
5576 }
5577
5578 /**
5579 * xmlParseNotationDecl:
5580 * @ctxt: an XML parser context
5581 *
5582 * DEPRECATED: Internal function, don't use.
5583 *
5584 * Parse a notation declaration. Always consumes '<!'.
5585 *
5586 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5587 *
5588 * Hence there is actually 3 choices:
5589 * 'PUBLIC' S PubidLiteral
5590 * 'PUBLIC' S PubidLiteral S SystemLiteral
5591 * and 'SYSTEM' S SystemLiteral
5592 *
5593 * See the NOTE on xmlParseExternalID().
5594 */
5595
5596 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5597 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5598 const xmlChar *name;
5599 xmlChar *Pubid;
5600 xmlChar *Systemid;
5601
5602 if ((CUR != '<') || (NXT(1) != '!'))
5603 return;
5604 SKIP(2);
5605
5606 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5607 int inputid = ctxt->input->id;
5608 SKIP(8);
5609 if (SKIP_BLANKS_PE == 0) {
5610 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5611 "Space required after '<!NOTATION'\n");
5612 return;
5613 }
5614
5615 name = xmlParseName(ctxt);
5616 if (name == NULL) {
5617 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5618 return;
5619 }
5620 if (xmlStrchr(name, ':') != NULL) {
5621 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5622 "colons are forbidden from notation names '%s'\n",
5623 name, NULL, NULL);
5624 }
5625 if (SKIP_BLANKS_PE == 0) {
5626 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5627 "Space required after the NOTATION name'\n");
5628 return;
5629 }
5630
5631 /*
5632 * Parse the IDs.
5633 */
5634 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5635 SKIP_BLANKS_PE;
5636
5637 if (RAW == '>') {
5638 if (inputid != ctxt->input->id) {
5639 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5640 "Notation declaration doesn't start and stop"
5641 " in the same entity\n");
5642 }
5643 NEXT;
5644 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5645 (ctxt->sax->notationDecl != NULL))
5646 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5647 } else {
5648 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5649 }
5650 if (Systemid != NULL) xmlFree(Systemid);
5651 if (Pubid != NULL) xmlFree(Pubid);
5652 }
5653 }
5654
5655 /**
5656 * xmlParseEntityDecl:
5657 * @ctxt: an XML parser context
5658 *
5659 * DEPRECATED: Internal function, don't use.
5660 *
5661 * Parse an entity declaration. Always consumes '<!'.
5662 *
5663 * [70] EntityDecl ::= GEDecl | PEDecl
5664 *
5665 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5666 *
5667 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5668 *
5669 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5670 *
5671 * [74] PEDef ::= EntityValue | ExternalID
5672 *
5673 * [76] NDataDecl ::= S 'NDATA' S Name
5674 *
5675 * [ VC: Notation Declared ]
5676 * The Name must match the declared name of a notation.
5677 */
5678
5679 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5680 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5681 const xmlChar *name = NULL;
5682 xmlChar *value = NULL;
5683 xmlChar *URI = NULL, *literal = NULL;
5684 const xmlChar *ndata = NULL;
5685 int isParameter = 0;
5686 xmlChar *orig = NULL;
5687
5688 if ((CUR != '<') || (NXT(1) != '!'))
5689 return;
5690 SKIP(2);
5691
5692 /* GROW; done in the caller */
5693 if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5694 int inputid = ctxt->input->id;
5695 SKIP(6);
5696 if (SKIP_BLANKS_PE == 0) {
5697 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5698 "Space required after '<!ENTITY'\n");
5699 }
5700
5701 if (RAW == '%') {
5702 NEXT;
5703 if (SKIP_BLANKS_PE == 0) {
5704 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5705 "Space required after '%%'\n");
5706 }
5707 isParameter = 1;
5708 }
5709
5710 name = xmlParseName(ctxt);
5711 if (name == NULL) {
5712 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5713 "xmlParseEntityDecl: no name\n");
5714 return;
5715 }
5716 if (xmlStrchr(name, ':') != NULL) {
5717 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5718 "colons are forbidden from entities names '%s'\n",
5719 name, NULL, NULL);
5720 }
5721 if (SKIP_BLANKS_PE == 0) {
5722 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5723 "Space required after the entity name\n");
5724 }
5725
5726 /*
5727 * handle the various case of definitions...
5728 */
5729 if (isParameter) {
5730 if ((RAW == '"') || (RAW == '\'')) {
5731 value = xmlParseEntityValue(ctxt, &orig);
5732 if (value) {
5733 if ((ctxt->sax != NULL) &&
5734 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5735 ctxt->sax->entityDecl(ctxt->userData, name,
5736 XML_INTERNAL_PARAMETER_ENTITY,
5737 NULL, NULL, value);
5738 }
5739 } else {
5740 URI = xmlParseExternalID(ctxt, &literal, 1);
5741 if ((URI == NULL) && (literal == NULL)) {
5742 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5743 }
5744 if (URI) {
5745 xmlURIPtr uri;
5746
5747 if (xmlParseURISafe((const char *) URI, &uri) < 0) {
5748 xmlErrMemory(ctxt);
5749 } else if (uri == NULL) {
5750 /*
5751 * This really ought to be a well formedness error
5752 * but the XML Core WG decided otherwise c.f. issue
5753 * E26 of the XML erratas.
5754 */
5755 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5756 "Invalid URI: %s\n", URI);
5757 } else if (uri->fragment != NULL) {
5758 /*
5759 * Okay this is foolish to block those but not
5760 * invalid URIs.
5761 */
5762 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5763 } else {
5764 if ((ctxt->sax != NULL) &&
5765 (!ctxt->disableSAX) &&
5766 (ctxt->sax->entityDecl != NULL))
5767 ctxt->sax->entityDecl(ctxt->userData, name,
5768 XML_EXTERNAL_PARAMETER_ENTITY,
5769 literal, URI, NULL);
5770 }
5771 xmlFreeURI(uri);
5772 }
5773 }
5774 } else {
5775 if ((RAW == '"') || (RAW == '\'')) {
5776 value = xmlParseEntityValue(ctxt, &orig);
5777 if ((ctxt->sax != NULL) &&
5778 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5779 ctxt->sax->entityDecl(ctxt->userData, name,
5780 XML_INTERNAL_GENERAL_ENTITY,
5781 NULL, NULL, value);
5782 /*
5783 * For expat compatibility in SAX mode.
5784 */
5785 if ((ctxt->myDoc == NULL) ||
5786 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5787 if (ctxt->myDoc == NULL) {
5788 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5789 if (ctxt->myDoc == NULL) {
5790 xmlErrMemory(ctxt);
5791 goto done;
5792 }
5793 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5794 }
5795 if (ctxt->myDoc->intSubset == NULL) {
5796 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5797 BAD_CAST "fake", NULL, NULL);
5798 if (ctxt->myDoc->intSubset == NULL) {
5799 xmlErrMemory(ctxt);
5800 goto done;
5801 }
5802 }
5803
5804 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5805 NULL, NULL, value);
5806 }
5807 } else {
5808 URI = xmlParseExternalID(ctxt, &literal, 1);
5809 if ((URI == NULL) && (literal == NULL)) {
5810 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5811 }
5812 if (URI) {
5813 xmlURIPtr uri;
5814
5815 if (xmlParseURISafe((const char *) URI, &uri) < 0) {
5816 xmlErrMemory(ctxt);
5817 } else if (uri == NULL) {
5818 /*
5819 * This really ought to be a well formedness error
5820 * but the XML Core WG decided otherwise c.f. issue
5821 * E26 of the XML erratas.
5822 */
5823 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5824 "Invalid URI: %s\n", URI);
5825 } else if (uri->fragment != NULL) {
5826 /*
5827 * Okay this is foolish to block those but not
5828 * invalid URIs.
5829 */
5830 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5831 }
5832 xmlFreeURI(uri);
5833 }
5834 if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5835 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5836 "Space required before 'NDATA'\n");
5837 }
5838 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5839 SKIP(5);
5840 if (SKIP_BLANKS_PE == 0) {
5841 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5842 "Space required after 'NDATA'\n");
5843 }
5844 ndata = xmlParseName(ctxt);
5845 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5846 (ctxt->sax->unparsedEntityDecl != NULL))
5847 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5848 literal, URI, ndata);
5849 } else {
5850 if ((ctxt->sax != NULL) &&
5851 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5852 ctxt->sax->entityDecl(ctxt->userData, name,
5853 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5854 literal, URI, NULL);
5855 /*
5856 * For expat compatibility in SAX mode.
5857 * assuming the entity replacement was asked for
5858 */
5859 if ((ctxt->replaceEntities != 0) &&
5860 ((ctxt->myDoc == NULL) ||
5861 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5862 if (ctxt->myDoc == NULL) {
5863 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5864 if (ctxt->myDoc == NULL) {
5865 xmlErrMemory(ctxt);
5866 goto done;
5867 }
5868 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5869 }
5870
5871 if (ctxt->myDoc->intSubset == NULL) {
5872 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5873 BAD_CAST "fake", NULL, NULL);
5874 if (ctxt->myDoc->intSubset == NULL) {
5875 xmlErrMemory(ctxt);
5876 goto done;
5877 }
5878 }
5879 xmlSAX2EntityDecl(ctxt, name,
5880 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5881 literal, URI, NULL);
5882 }
5883 }
5884 }
5885 }
5886 SKIP_BLANKS_PE;
5887 if (RAW != '>') {
5888 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5889 "xmlParseEntityDecl: entity %s not terminated\n", name);
5890 xmlHaltParser(ctxt);
5891 } else {
5892 if (inputid != ctxt->input->id) {
5893 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5894 "Entity declaration doesn't start and stop in"
5895 " the same entity\n");
5896 }
5897 NEXT;
5898 }
5899 if (orig != NULL) {
5900 /*
5901 * Ugly mechanism to save the raw entity value.
5902 */
5903 xmlEntityPtr cur = NULL;
5904
5905 if (isParameter) {
5906 if ((ctxt->sax != NULL) &&
5907 (ctxt->sax->getParameterEntity != NULL))
5908 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5909 } else {
5910 if ((ctxt->sax != NULL) &&
5911 (ctxt->sax->getEntity != NULL))
5912 cur = ctxt->sax->getEntity(ctxt->userData, name);
5913 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5914 cur = xmlSAX2GetEntity(ctxt, name);
5915 }
5916 }
5917 if ((cur != NULL) && (cur->orig == NULL)) {
5918 cur->orig = orig;
5919 orig = NULL;
5920 }
5921 }
5922
5923 done:
5924 if (value != NULL) xmlFree(value);
5925 if (URI != NULL) xmlFree(URI);
5926 if (literal != NULL) xmlFree(literal);
5927 if (orig != NULL) xmlFree(orig);
5928 }
5929 }
5930
5931 /**
5932 * xmlParseDefaultDecl:
5933 * @ctxt: an XML parser context
5934 * @value: Receive a possible fixed default value for the attribute
5935 *
5936 * DEPRECATED: Internal function, don't use.
5937 *
5938 * Parse an attribute default declaration
5939 *
5940 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5941 *
5942 * [ VC: Required Attribute ]
5943 * if the default declaration is the keyword #REQUIRED, then the
5944 * attribute must be specified for all elements of the type in the
5945 * attribute-list declaration.
5946 *
5947 * [ VC: Attribute Default Legal ]
5948 * The declared default value must meet the lexical constraints of
5949 * the declared attribute type c.f. xmlValidateAttributeDecl()
5950 *
5951 * [ VC: Fixed Attribute Default ]
5952 * if an attribute has a default value declared with the #FIXED
5953 * keyword, instances of that attribute must match the default value.
5954 *
5955 * [ WFC: No < in Attribute Values ]
5956 * handled in xmlParseAttValue()
5957 *
5958 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5959 * or XML_ATTRIBUTE_FIXED.
5960 */
5961
5962 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5963 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5964 int val;
5965 xmlChar *ret;
5966
5967 *value = NULL;
5968 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5969 SKIP(9);
5970 return(XML_ATTRIBUTE_REQUIRED);
5971 }
5972 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5973 SKIP(8);
5974 return(XML_ATTRIBUTE_IMPLIED);
5975 }
5976 val = XML_ATTRIBUTE_NONE;
5977 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5978 SKIP(6);
5979 val = XML_ATTRIBUTE_FIXED;
5980 if (SKIP_BLANKS_PE == 0) {
5981 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5982 "Space required after '#FIXED'\n");
5983 }
5984 }
5985 ret = xmlParseAttValue(ctxt);
5986 if (ret == NULL) {
5987 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5988 "Attribute default value declaration error\n");
5989 } else
5990 *value = ret;
5991 return(val);
5992 }
5993
5994 /**
5995 * xmlParseNotationType:
5996 * @ctxt: an XML parser context
5997 *
5998 * DEPRECATED: Internal function, don't use.
5999 *
6000 * parse an Notation attribute type.
6001 *
6002 * Note: the leading 'NOTATION' S part has already being parsed...
6003 *
6004 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6005 *
6006 * [ VC: Notation Attributes ]
6007 * Values of this type must match one of the notation names included
6008 * in the declaration; all notation names in the declaration must be declared.
6009 *
6010 * Returns: the notation attribute tree built while parsing
6011 */
6012
6013 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)6014 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
6015 const xmlChar *name;
6016 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6017
6018 if (RAW != '(') {
6019 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
6020 return(NULL);
6021 }
6022 do {
6023 NEXT;
6024 SKIP_BLANKS_PE;
6025 name = xmlParseName(ctxt);
6026 if (name == NULL) {
6027 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6028 "Name expected in NOTATION declaration\n");
6029 xmlFreeEnumeration(ret);
6030 return(NULL);
6031 }
6032 tmp = ret;
6033 while (tmp != NULL) {
6034 if (xmlStrEqual(name, tmp->name)) {
6035 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6036 "standalone: attribute notation value token %s duplicated\n",
6037 name, NULL);
6038 if (!xmlDictOwns(ctxt->dict, name))
6039 xmlFree((xmlChar *) name);
6040 break;
6041 }
6042 tmp = tmp->next;
6043 }
6044 if (tmp == NULL) {
6045 cur = xmlCreateEnumeration(name);
6046 if (cur == NULL) {
6047 xmlErrMemory(ctxt);
6048 xmlFreeEnumeration(ret);
6049 return(NULL);
6050 }
6051 if (last == NULL) ret = last = cur;
6052 else {
6053 last->next = cur;
6054 last = cur;
6055 }
6056 }
6057 SKIP_BLANKS_PE;
6058 } while (RAW == '|');
6059 if (RAW != ')') {
6060 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6061 xmlFreeEnumeration(ret);
6062 return(NULL);
6063 }
6064 NEXT;
6065 return(ret);
6066 }
6067
6068 /**
6069 * xmlParseEnumerationType:
6070 * @ctxt: an XML parser context
6071 *
6072 * DEPRECATED: Internal function, don't use.
6073 *
6074 * parse an Enumeration attribute type.
6075 *
6076 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6077 *
6078 * [ VC: Enumeration ]
6079 * Values of this type must match one of the Nmtoken tokens in
6080 * the declaration
6081 *
6082 * Returns: the enumeration attribute tree built while parsing
6083 */
6084
6085 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)6086 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6087 xmlChar *name;
6088 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6089
6090 if (RAW != '(') {
6091 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6092 return(NULL);
6093 }
6094 do {
6095 NEXT;
6096 SKIP_BLANKS_PE;
6097 name = xmlParseNmtoken(ctxt);
6098 if (name == NULL) {
6099 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6100 return(ret);
6101 }
6102 tmp = ret;
6103 while (tmp != NULL) {
6104 if (xmlStrEqual(name, tmp->name)) {
6105 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6106 "standalone: attribute enumeration value token %s duplicated\n",
6107 name, NULL);
6108 if (!xmlDictOwns(ctxt->dict, name))
6109 xmlFree(name);
6110 break;
6111 }
6112 tmp = tmp->next;
6113 }
6114 if (tmp == NULL) {
6115 cur = xmlCreateEnumeration(name);
6116 if (!xmlDictOwns(ctxt->dict, name))
6117 xmlFree(name);
6118 if (cur == NULL) {
6119 xmlErrMemory(ctxt);
6120 xmlFreeEnumeration(ret);
6121 return(NULL);
6122 }
6123 if (last == NULL) ret = last = cur;
6124 else {
6125 last->next = cur;
6126 last = cur;
6127 }
6128 }
6129 SKIP_BLANKS_PE;
6130 } while (RAW == '|');
6131 if (RAW != ')') {
6132 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6133 return(ret);
6134 }
6135 NEXT;
6136 return(ret);
6137 }
6138
6139 /**
6140 * xmlParseEnumeratedType:
6141 * @ctxt: an XML parser context
6142 * @tree: the enumeration tree built while parsing
6143 *
6144 * DEPRECATED: Internal function, don't use.
6145 *
6146 * parse an Enumerated attribute type.
6147 *
6148 * [57] EnumeratedType ::= NotationType | Enumeration
6149 *
6150 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6151 *
6152 *
6153 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6154 */
6155
6156 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6157 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6158 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6159 SKIP(8);
6160 if (SKIP_BLANKS_PE == 0) {
6161 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6162 "Space required after 'NOTATION'\n");
6163 return(0);
6164 }
6165 *tree = xmlParseNotationType(ctxt);
6166 if (*tree == NULL) return(0);
6167 return(XML_ATTRIBUTE_NOTATION);
6168 }
6169 *tree = xmlParseEnumerationType(ctxt);
6170 if (*tree == NULL) return(0);
6171 return(XML_ATTRIBUTE_ENUMERATION);
6172 }
6173
6174 /**
6175 * xmlParseAttributeType:
6176 * @ctxt: an XML parser context
6177 * @tree: the enumeration tree built while parsing
6178 *
6179 * DEPRECATED: Internal function, don't use.
6180 *
6181 * parse the Attribute list def for an element
6182 *
6183 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6184 *
6185 * [55] StringType ::= 'CDATA'
6186 *
6187 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6188 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6189 *
6190 * Validity constraints for attribute values syntax are checked in
6191 * xmlValidateAttributeValue()
6192 *
6193 * [ VC: ID ]
6194 * Values of type ID must match the Name production. A name must not
6195 * appear more than once in an XML document as a value of this type;
6196 * i.e., ID values must uniquely identify the elements which bear them.
6197 *
6198 * [ VC: One ID per Element Type ]
6199 * No element type may have more than one ID attribute specified.
6200 *
6201 * [ VC: ID Attribute Default ]
6202 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6203 *
6204 * [ VC: IDREF ]
6205 * Values of type IDREF must match the Name production, and values
6206 * of type IDREFS must match Names; each IDREF Name must match the value
6207 * of an ID attribute on some element in the XML document; i.e. IDREF
6208 * values must match the value of some ID attribute.
6209 *
6210 * [ VC: Entity Name ]
6211 * Values of type ENTITY must match the Name production, values
6212 * of type ENTITIES must match Names; each Entity Name must match the
6213 * name of an unparsed entity declared in the DTD.
6214 *
6215 * [ VC: Name Token ]
6216 * Values of type NMTOKEN must match the Nmtoken production; values
6217 * of type NMTOKENS must match Nmtokens.
6218 *
6219 * Returns the attribute type
6220 */
6221 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6222 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6223 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6224 SKIP(5);
6225 return(XML_ATTRIBUTE_CDATA);
6226 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6227 SKIP(6);
6228 return(XML_ATTRIBUTE_IDREFS);
6229 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6230 SKIP(5);
6231 return(XML_ATTRIBUTE_IDREF);
6232 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6233 SKIP(2);
6234 return(XML_ATTRIBUTE_ID);
6235 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6236 SKIP(6);
6237 return(XML_ATTRIBUTE_ENTITY);
6238 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6239 SKIP(8);
6240 return(XML_ATTRIBUTE_ENTITIES);
6241 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6242 SKIP(8);
6243 return(XML_ATTRIBUTE_NMTOKENS);
6244 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6245 SKIP(7);
6246 return(XML_ATTRIBUTE_NMTOKEN);
6247 }
6248 return(xmlParseEnumeratedType(ctxt, tree));
6249 }
6250
6251 /**
6252 * xmlParseAttributeListDecl:
6253 * @ctxt: an XML parser context
6254 *
6255 * DEPRECATED: Internal function, don't use.
6256 *
6257 * Parse an attribute list declaration for an element. Always consumes '<!'.
6258 *
6259 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6260 *
6261 * [53] AttDef ::= S Name S AttType S DefaultDecl
6262 *
6263 */
6264 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)6265 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6266 const xmlChar *elemName;
6267 const xmlChar *attrName;
6268 xmlEnumerationPtr tree;
6269
6270 if ((CUR != '<') || (NXT(1) != '!'))
6271 return;
6272 SKIP(2);
6273
6274 if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6275 int inputid = ctxt->input->id;
6276
6277 SKIP(7);
6278 if (SKIP_BLANKS_PE == 0) {
6279 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6280 "Space required after '<!ATTLIST'\n");
6281 }
6282 elemName = xmlParseName(ctxt);
6283 if (elemName == NULL) {
6284 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6285 "ATTLIST: no name for Element\n");
6286 return;
6287 }
6288 SKIP_BLANKS_PE;
6289 GROW;
6290 while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6291 int type;
6292 int def;
6293 xmlChar *defaultValue = NULL;
6294
6295 GROW;
6296 tree = NULL;
6297 attrName = xmlParseName(ctxt);
6298 if (attrName == NULL) {
6299 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6300 "ATTLIST: no name for Attribute\n");
6301 break;
6302 }
6303 GROW;
6304 if (SKIP_BLANKS_PE == 0) {
6305 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6306 "Space required after the attribute name\n");
6307 break;
6308 }
6309
6310 type = xmlParseAttributeType(ctxt, &tree);
6311 if (type <= 0) {
6312 break;
6313 }
6314
6315 GROW;
6316 if (SKIP_BLANKS_PE == 0) {
6317 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6318 "Space required after the attribute type\n");
6319 if (tree != NULL)
6320 xmlFreeEnumeration(tree);
6321 break;
6322 }
6323
6324 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6325 if (def <= 0) {
6326 if (defaultValue != NULL)
6327 xmlFree(defaultValue);
6328 if (tree != NULL)
6329 xmlFreeEnumeration(tree);
6330 break;
6331 }
6332 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6333 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6334
6335 GROW;
6336 if (RAW != '>') {
6337 if (SKIP_BLANKS_PE == 0) {
6338 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6339 "Space required after the attribute default value\n");
6340 if (defaultValue != NULL)
6341 xmlFree(defaultValue);
6342 if (tree != NULL)
6343 xmlFreeEnumeration(tree);
6344 break;
6345 }
6346 }
6347 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6348 (ctxt->sax->attributeDecl != NULL))
6349 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6350 type, def, defaultValue, tree);
6351 else if (tree != NULL)
6352 xmlFreeEnumeration(tree);
6353
6354 if ((ctxt->sax2) && (defaultValue != NULL) &&
6355 (def != XML_ATTRIBUTE_IMPLIED) &&
6356 (def != XML_ATTRIBUTE_REQUIRED)) {
6357 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6358 }
6359 if (ctxt->sax2) {
6360 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6361 }
6362 if (defaultValue != NULL)
6363 xmlFree(defaultValue);
6364 GROW;
6365 }
6366 if (RAW == '>') {
6367 if (inputid != ctxt->input->id) {
6368 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6369 "Attribute list declaration doesn't start and"
6370 " stop in the same entity\n");
6371 }
6372 NEXT;
6373 }
6374 }
6375 }
6376
6377 /**
6378 * xmlParseElementMixedContentDecl:
6379 * @ctxt: an XML parser context
6380 * @inputchk: the input used for the current entity, needed for boundary checks
6381 *
6382 * DEPRECATED: Internal function, don't use.
6383 *
6384 * parse the declaration for a Mixed Element content
6385 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6386 *
6387 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6388 * '(' S? '#PCDATA' S? ')'
6389 *
6390 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6391 *
6392 * [ VC: No Duplicate Types ]
6393 * The same name must not appear more than once in a single
6394 * mixed-content declaration.
6395 *
6396 * returns: the list of the xmlElementContentPtr describing the element choices
6397 */
6398 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6399 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6400 xmlElementContentPtr ret = NULL, cur = NULL, n;
6401 const xmlChar *elem = NULL;
6402
6403 GROW;
6404 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6405 SKIP(7);
6406 SKIP_BLANKS_PE;
6407 if (RAW == ')') {
6408 if (ctxt->input->id != inputchk) {
6409 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6410 "Element content declaration doesn't start and"
6411 " stop in the same entity\n");
6412 }
6413 NEXT;
6414 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6415 if (ret == NULL)
6416 goto mem_error;
6417 if (RAW == '*') {
6418 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6419 NEXT;
6420 }
6421 return(ret);
6422 }
6423 if ((RAW == '(') || (RAW == '|')) {
6424 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6425 if (ret == NULL)
6426 goto mem_error;
6427 }
6428 while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6429 NEXT;
6430 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6431 if (n == NULL)
6432 goto mem_error;
6433 if (elem == NULL) {
6434 n->c1 = cur;
6435 if (cur != NULL)
6436 cur->parent = n;
6437 ret = cur = n;
6438 } else {
6439 cur->c2 = n;
6440 n->parent = cur;
6441 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6442 if (n->c1 == NULL)
6443 goto mem_error;
6444 n->c1->parent = n;
6445 cur = n;
6446 }
6447 SKIP_BLANKS_PE;
6448 elem = xmlParseName(ctxt);
6449 if (elem == NULL) {
6450 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6451 "xmlParseElementMixedContentDecl : Name expected\n");
6452 xmlFreeDocElementContent(ctxt->myDoc, ret);
6453 return(NULL);
6454 }
6455 SKIP_BLANKS_PE;
6456 GROW;
6457 }
6458 if ((RAW == ')') && (NXT(1) == '*')) {
6459 if (elem != NULL) {
6460 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6461 XML_ELEMENT_CONTENT_ELEMENT);
6462 if (cur->c2 == NULL)
6463 goto mem_error;
6464 cur->c2->parent = cur;
6465 }
6466 if (ret != NULL)
6467 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6468 if (ctxt->input->id != inputchk) {
6469 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6470 "Element content declaration doesn't start and"
6471 " stop in the same entity\n");
6472 }
6473 SKIP(2);
6474 } else {
6475 xmlFreeDocElementContent(ctxt->myDoc, ret);
6476 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6477 return(NULL);
6478 }
6479
6480 } else {
6481 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6482 }
6483 return(ret);
6484
6485 mem_error:
6486 xmlErrMemory(ctxt);
6487 xmlFreeDocElementContent(ctxt->myDoc, ret);
6488 return(NULL);
6489 }
6490
6491 /**
6492 * xmlParseElementChildrenContentDeclPriv:
6493 * @ctxt: an XML parser context
6494 * @inputchk: the input used for the current entity, needed for boundary checks
6495 * @depth: the level of recursion
6496 *
6497 * parse the declaration for a Mixed Element content
6498 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6499 *
6500 *
6501 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6502 *
6503 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6504 *
6505 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6506 *
6507 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6508 *
6509 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6510 * TODO Parameter-entity replacement text must be properly nested
6511 * with parenthesized groups. That is to say, if either of the
6512 * opening or closing parentheses in a choice, seq, or Mixed
6513 * construct is contained in the replacement text for a parameter
6514 * entity, both must be contained in the same replacement text. For
6515 * interoperability, if a parameter-entity reference appears in a
6516 * choice, seq, or Mixed construct, its replacement text should not
6517 * be empty, and neither the first nor last non-blank character of
6518 * the replacement text should be a connector (| or ,).
6519 *
6520 * Returns the tree of xmlElementContentPtr describing the element
6521 * hierarchy.
6522 */
6523 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6524 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6525 int depth) {
6526 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6527 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6528 const xmlChar *elem;
6529 xmlChar type = 0;
6530
6531 if (depth > maxDepth) {
6532 xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6533 "xmlParseElementChildrenContentDecl : depth %d too deep, "
6534 "use XML_PARSE_HUGE\n", depth);
6535 return(NULL);
6536 }
6537 SKIP_BLANKS_PE;
6538 GROW;
6539 if (RAW == '(') {
6540 int inputid = ctxt->input->id;
6541
6542 /* Recurse on first child */
6543 NEXT;
6544 SKIP_BLANKS_PE;
6545 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6546 depth + 1);
6547 if (cur == NULL)
6548 return(NULL);
6549 SKIP_BLANKS_PE;
6550 GROW;
6551 } else {
6552 elem = xmlParseName(ctxt);
6553 if (elem == NULL) {
6554 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6555 return(NULL);
6556 }
6557 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6558 if (cur == NULL) {
6559 xmlErrMemory(ctxt);
6560 return(NULL);
6561 }
6562 GROW;
6563 if (RAW == '?') {
6564 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6565 NEXT;
6566 } else if (RAW == '*') {
6567 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6568 NEXT;
6569 } else if (RAW == '+') {
6570 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6571 NEXT;
6572 } else {
6573 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6574 }
6575 GROW;
6576 }
6577 SKIP_BLANKS_PE;
6578 while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6579 /*
6580 * Each loop we parse one separator and one element.
6581 */
6582 if (RAW == ',') {
6583 if (type == 0) type = CUR;
6584
6585 /*
6586 * Detect "Name | Name , Name" error
6587 */
6588 else if (type != CUR) {
6589 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6590 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6591 type);
6592 if ((last != NULL) && (last != ret))
6593 xmlFreeDocElementContent(ctxt->myDoc, last);
6594 if (ret != NULL)
6595 xmlFreeDocElementContent(ctxt->myDoc, ret);
6596 return(NULL);
6597 }
6598 NEXT;
6599
6600 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6601 if (op == NULL) {
6602 xmlErrMemory(ctxt);
6603 if ((last != NULL) && (last != ret))
6604 xmlFreeDocElementContent(ctxt->myDoc, last);
6605 xmlFreeDocElementContent(ctxt->myDoc, ret);
6606 return(NULL);
6607 }
6608 if (last == NULL) {
6609 op->c1 = ret;
6610 if (ret != NULL)
6611 ret->parent = op;
6612 ret = cur = op;
6613 } else {
6614 cur->c2 = op;
6615 if (op != NULL)
6616 op->parent = cur;
6617 op->c1 = last;
6618 if (last != NULL)
6619 last->parent = op;
6620 cur =op;
6621 last = NULL;
6622 }
6623 } else if (RAW == '|') {
6624 if (type == 0) type = CUR;
6625
6626 /*
6627 * Detect "Name , Name | Name" error
6628 */
6629 else if (type != CUR) {
6630 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6631 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6632 type);
6633 if ((last != NULL) && (last != ret))
6634 xmlFreeDocElementContent(ctxt->myDoc, last);
6635 if (ret != NULL)
6636 xmlFreeDocElementContent(ctxt->myDoc, ret);
6637 return(NULL);
6638 }
6639 NEXT;
6640
6641 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6642 if (op == NULL) {
6643 xmlErrMemory(ctxt);
6644 if ((last != NULL) && (last != ret))
6645 xmlFreeDocElementContent(ctxt->myDoc, last);
6646 if (ret != NULL)
6647 xmlFreeDocElementContent(ctxt->myDoc, ret);
6648 return(NULL);
6649 }
6650 if (last == NULL) {
6651 op->c1 = ret;
6652 if (ret != NULL)
6653 ret->parent = op;
6654 ret = cur = op;
6655 } else {
6656 cur->c2 = op;
6657 if (op != NULL)
6658 op->parent = cur;
6659 op->c1 = last;
6660 if (last != NULL)
6661 last->parent = op;
6662 cur =op;
6663 last = NULL;
6664 }
6665 } else {
6666 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6667 if ((last != NULL) && (last != ret))
6668 xmlFreeDocElementContent(ctxt->myDoc, last);
6669 if (ret != NULL)
6670 xmlFreeDocElementContent(ctxt->myDoc, ret);
6671 return(NULL);
6672 }
6673 GROW;
6674 SKIP_BLANKS_PE;
6675 GROW;
6676 if (RAW == '(') {
6677 int inputid = ctxt->input->id;
6678 /* Recurse on second child */
6679 NEXT;
6680 SKIP_BLANKS_PE;
6681 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6682 depth + 1);
6683 if (last == NULL) {
6684 if (ret != NULL)
6685 xmlFreeDocElementContent(ctxt->myDoc, ret);
6686 return(NULL);
6687 }
6688 SKIP_BLANKS_PE;
6689 } else {
6690 elem = xmlParseName(ctxt);
6691 if (elem == NULL) {
6692 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6693 if (ret != NULL)
6694 xmlFreeDocElementContent(ctxt->myDoc, ret);
6695 return(NULL);
6696 }
6697 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6698 if (last == NULL) {
6699 xmlErrMemory(ctxt);
6700 if (ret != NULL)
6701 xmlFreeDocElementContent(ctxt->myDoc, ret);
6702 return(NULL);
6703 }
6704 if (RAW == '?') {
6705 last->ocur = XML_ELEMENT_CONTENT_OPT;
6706 NEXT;
6707 } else if (RAW == '*') {
6708 last->ocur = XML_ELEMENT_CONTENT_MULT;
6709 NEXT;
6710 } else if (RAW == '+') {
6711 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6712 NEXT;
6713 } else {
6714 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6715 }
6716 }
6717 SKIP_BLANKS_PE;
6718 GROW;
6719 }
6720 if ((cur != NULL) && (last != NULL)) {
6721 cur->c2 = last;
6722 if (last != NULL)
6723 last->parent = cur;
6724 }
6725 if (ctxt->input->id != inputchk) {
6726 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6727 "Element content declaration doesn't start and stop in"
6728 " the same entity\n");
6729 }
6730 NEXT;
6731 if (RAW == '?') {
6732 if (ret != NULL) {
6733 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6734 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6735 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6736 else
6737 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6738 }
6739 NEXT;
6740 } else if (RAW == '*') {
6741 if (ret != NULL) {
6742 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6743 cur = ret;
6744 /*
6745 * Some normalization:
6746 * (a | b* | c?)* == (a | b | c)*
6747 */
6748 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6749 if ((cur->c1 != NULL) &&
6750 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6751 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6752 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6753 if ((cur->c2 != NULL) &&
6754 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6755 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6756 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6757 cur = cur->c2;
6758 }
6759 }
6760 NEXT;
6761 } else if (RAW == '+') {
6762 if (ret != NULL) {
6763 int found = 0;
6764
6765 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6766 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6767 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6768 else
6769 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6770 /*
6771 * Some normalization:
6772 * (a | b*)+ == (a | b)*
6773 * (a | b?)+ == (a | b)*
6774 */
6775 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6776 if ((cur->c1 != NULL) &&
6777 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6778 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6779 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6780 found = 1;
6781 }
6782 if ((cur->c2 != NULL) &&
6783 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6784 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6785 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6786 found = 1;
6787 }
6788 cur = cur->c2;
6789 }
6790 if (found)
6791 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6792 }
6793 NEXT;
6794 }
6795 return(ret);
6796 }
6797
6798 /**
6799 * xmlParseElementChildrenContentDecl:
6800 * @ctxt: an XML parser context
6801 * @inputchk: the input used for the current entity, needed for boundary checks
6802 *
6803 * DEPRECATED: Internal function, don't use.
6804 *
6805 * parse the declaration for a Mixed Element content
6806 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6807 *
6808 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6809 *
6810 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6811 *
6812 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6813 *
6814 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6815 *
6816 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6817 * TODO Parameter-entity replacement text must be properly nested
6818 * with parenthesized groups. That is to say, if either of the
6819 * opening or closing parentheses in a choice, seq, or Mixed
6820 * construct is contained in the replacement text for a parameter
6821 * entity, both must be contained in the same replacement text. For
6822 * interoperability, if a parameter-entity reference appears in a
6823 * choice, seq, or Mixed construct, its replacement text should not
6824 * be empty, and neither the first nor last non-blank character of
6825 * the replacement text should be a connector (| or ,).
6826 *
6827 * Returns the tree of xmlElementContentPtr describing the element
6828 * hierarchy.
6829 */
6830 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6831 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6832 /* stub left for API/ABI compat */
6833 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6834 }
6835
6836 /**
6837 * xmlParseElementContentDecl:
6838 * @ctxt: an XML parser context
6839 * @name: the name of the element being defined.
6840 * @result: the Element Content pointer will be stored here if any
6841 *
6842 * DEPRECATED: Internal function, don't use.
6843 *
6844 * parse the declaration for an Element content either Mixed or Children,
6845 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6846 *
6847 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6848 *
6849 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6850 */
6851
6852 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6853 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6854 xmlElementContentPtr *result) {
6855
6856 xmlElementContentPtr tree = NULL;
6857 int inputid = ctxt->input->id;
6858 int res;
6859
6860 *result = NULL;
6861
6862 if (RAW != '(') {
6863 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6864 "xmlParseElementContentDecl : %s '(' expected\n", name);
6865 return(-1);
6866 }
6867 NEXT;
6868 GROW;
6869 SKIP_BLANKS_PE;
6870 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6871 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6872 res = XML_ELEMENT_TYPE_MIXED;
6873 } else {
6874 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6875 res = XML_ELEMENT_TYPE_ELEMENT;
6876 }
6877 SKIP_BLANKS_PE;
6878 *result = tree;
6879 return(res);
6880 }
6881
6882 /**
6883 * xmlParseElementDecl:
6884 * @ctxt: an XML parser context
6885 *
6886 * DEPRECATED: Internal function, don't use.
6887 *
6888 * Parse an element declaration. Always consumes '<!'.
6889 *
6890 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6891 *
6892 * [ VC: Unique Element Type Declaration ]
6893 * No element type may be declared more than once
6894 *
6895 * Returns the type of the element, or -1 in case of error
6896 */
6897 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6898 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6899 const xmlChar *name;
6900 int ret = -1;
6901 xmlElementContentPtr content = NULL;
6902
6903 if ((CUR != '<') || (NXT(1) != '!'))
6904 return(ret);
6905 SKIP(2);
6906
6907 /* GROW; done in the caller */
6908 if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6909 int inputid = ctxt->input->id;
6910
6911 SKIP(7);
6912 if (SKIP_BLANKS_PE == 0) {
6913 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6914 "Space required after 'ELEMENT'\n");
6915 return(-1);
6916 }
6917 name = xmlParseName(ctxt);
6918 if (name == NULL) {
6919 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6920 "xmlParseElementDecl: no name for Element\n");
6921 return(-1);
6922 }
6923 if (SKIP_BLANKS_PE == 0) {
6924 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6925 "Space required after the element name\n");
6926 }
6927 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6928 SKIP(5);
6929 /*
6930 * Element must always be empty.
6931 */
6932 ret = XML_ELEMENT_TYPE_EMPTY;
6933 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6934 (NXT(2) == 'Y')) {
6935 SKIP(3);
6936 /*
6937 * Element is a generic container.
6938 */
6939 ret = XML_ELEMENT_TYPE_ANY;
6940 } else if (RAW == '(') {
6941 ret = xmlParseElementContentDecl(ctxt, name, &content);
6942 } else {
6943 /*
6944 * [ WFC: PEs in Internal Subset ] error handling.
6945 */
6946 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6947 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6948 return(-1);
6949 }
6950
6951 SKIP_BLANKS_PE;
6952
6953 if (RAW != '>') {
6954 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6955 if (content != NULL) {
6956 xmlFreeDocElementContent(ctxt->myDoc, content);
6957 }
6958 } else {
6959 if (inputid != ctxt->input->id) {
6960 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6961 "Element declaration doesn't start and stop in"
6962 " the same entity\n");
6963 }
6964
6965 NEXT;
6966 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6967 (ctxt->sax->elementDecl != NULL)) {
6968 if (content != NULL)
6969 content->parent = NULL;
6970 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6971 content);
6972 if ((content != NULL) && (content->parent == NULL)) {
6973 /*
6974 * this is a trick: if xmlAddElementDecl is called,
6975 * instead of copying the full tree it is plugged directly
6976 * if called from the parser. Avoid duplicating the
6977 * interfaces or change the API/ABI
6978 */
6979 xmlFreeDocElementContent(ctxt->myDoc, content);
6980 }
6981 } else if (content != NULL) {
6982 xmlFreeDocElementContent(ctxt->myDoc, content);
6983 }
6984 }
6985 }
6986 return(ret);
6987 }
6988
6989 /**
6990 * xmlParseConditionalSections
6991 * @ctxt: an XML parser context
6992 *
6993 * Parse a conditional section. Always consumes '<!['.
6994 *
6995 * [61] conditionalSect ::= includeSect | ignoreSect
6996 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6997 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6998 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6999 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
7000 */
7001
7002 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)7003 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
7004 int *inputIds = NULL;
7005 size_t inputIdsSize = 0;
7006 size_t depth = 0;
7007
7008 while (PARSER_STOPPED(ctxt) == 0) {
7009 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7010 int id = ctxt->input->id;
7011
7012 SKIP(3);
7013 SKIP_BLANKS_PE;
7014
7015 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
7016 SKIP(7);
7017 SKIP_BLANKS_PE;
7018 if (RAW != '[') {
7019 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7020 xmlHaltParser(ctxt);
7021 goto error;
7022 }
7023 if (ctxt->input->id != id) {
7024 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7025 "All markup of the conditional section is"
7026 " not in the same entity\n");
7027 }
7028 NEXT;
7029
7030 if (inputIdsSize <= depth) {
7031 int *tmp;
7032
7033 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
7034 tmp = (int *) xmlRealloc(inputIds,
7035 inputIdsSize * sizeof(int));
7036 if (tmp == NULL) {
7037 xmlErrMemory(ctxt);
7038 goto error;
7039 }
7040 inputIds = tmp;
7041 }
7042 inputIds[depth] = id;
7043 depth++;
7044 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7045 size_t ignoreDepth = 0;
7046
7047 SKIP(6);
7048 SKIP_BLANKS_PE;
7049 if (RAW != '[') {
7050 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7051 xmlHaltParser(ctxt);
7052 goto error;
7053 }
7054 if (ctxt->input->id != id) {
7055 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7056 "All markup of the conditional section is"
7057 " not in the same entity\n");
7058 }
7059 NEXT;
7060
7061 while (PARSER_STOPPED(ctxt) == 0) {
7062 if (RAW == 0) {
7063 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7064 goto error;
7065 }
7066 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7067 SKIP(3);
7068 ignoreDepth++;
7069 /* Check for integer overflow */
7070 if (ignoreDepth == 0) {
7071 xmlErrMemory(ctxt);
7072 goto error;
7073 }
7074 } else if ((RAW == ']') && (NXT(1) == ']') &&
7075 (NXT(2) == '>')) {
7076 SKIP(3);
7077 if (ignoreDepth == 0)
7078 break;
7079 ignoreDepth--;
7080 } else {
7081 NEXT;
7082 }
7083 }
7084
7085 if (ctxt->input->id != id) {
7086 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7087 "All markup of the conditional section is"
7088 " not in the same entity\n");
7089 }
7090 } else {
7091 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7092 xmlHaltParser(ctxt);
7093 goto error;
7094 }
7095 } else if ((depth > 0) &&
7096 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7097 depth--;
7098 if (ctxt->input->id != inputIds[depth]) {
7099 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7100 "All markup of the conditional section is not"
7101 " in the same entity\n");
7102 }
7103 SKIP(3);
7104 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7105 xmlParseMarkupDecl(ctxt);
7106 } else {
7107 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7108 xmlHaltParser(ctxt);
7109 goto error;
7110 }
7111
7112 if (depth == 0)
7113 break;
7114
7115 SKIP_BLANKS_PE;
7116 SHRINK;
7117 GROW;
7118 }
7119
7120 error:
7121 xmlFree(inputIds);
7122 }
7123
7124 /**
7125 * xmlParseMarkupDecl:
7126 * @ctxt: an XML parser context
7127 *
7128 * DEPRECATED: Internal function, don't use.
7129 *
7130 * Parse markup declarations. Always consumes '<!' or '<?'.
7131 *
7132 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7133 * NotationDecl | PI | Comment
7134 *
7135 * [ VC: Proper Declaration/PE Nesting ]
7136 * Parameter-entity replacement text must be properly nested with
7137 * markup declarations. That is to say, if either the first character
7138 * or the last character of a markup declaration (markupdecl above) is
7139 * contained in the replacement text for a parameter-entity reference,
7140 * both must be contained in the same replacement text.
7141 *
7142 * [ WFC: PEs in Internal Subset ]
7143 * In the internal DTD subset, parameter-entity references can occur
7144 * only where markup declarations can occur, not within markup declarations.
7145 * (This does not apply to references that occur in external parameter
7146 * entities or to the external subset.)
7147 */
7148 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)7149 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7150 GROW;
7151 if (CUR == '<') {
7152 if (NXT(1) == '!') {
7153 switch (NXT(2)) {
7154 case 'E':
7155 if (NXT(3) == 'L')
7156 xmlParseElementDecl(ctxt);
7157 else if (NXT(3) == 'N')
7158 xmlParseEntityDecl(ctxt);
7159 else
7160 SKIP(2);
7161 break;
7162 case 'A':
7163 xmlParseAttributeListDecl(ctxt);
7164 break;
7165 case 'N':
7166 xmlParseNotationDecl(ctxt);
7167 break;
7168 case '-':
7169 xmlParseComment(ctxt);
7170 break;
7171 default:
7172 /* there is an error but it will be detected later */
7173 SKIP(2);
7174 break;
7175 }
7176 } else if (NXT(1) == '?') {
7177 xmlParsePI(ctxt);
7178 }
7179 }
7180 }
7181
7182 /**
7183 * xmlParseTextDecl:
7184 * @ctxt: an XML parser context
7185 *
7186 * DEPRECATED: Internal function, don't use.
7187 *
7188 * parse an XML declaration header for external entities
7189 *
7190 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7191 */
7192
7193 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)7194 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7195 xmlChar *version;
7196
7197 /*
7198 * We know that '<?xml' is here.
7199 */
7200 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7201 SKIP(5);
7202 } else {
7203 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7204 return;
7205 }
7206
7207 if (SKIP_BLANKS == 0) {
7208 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7209 "Space needed after '<?xml'\n");
7210 }
7211
7212 /*
7213 * We may have the VersionInfo here.
7214 */
7215 version = xmlParseVersionInfo(ctxt);
7216 if (version == NULL) {
7217 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7218 if (version == NULL) {
7219 xmlErrMemory(ctxt);
7220 return;
7221 }
7222 } else {
7223 if (SKIP_BLANKS == 0) {
7224 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7225 "Space needed here\n");
7226 }
7227 }
7228 ctxt->input->version = version;
7229
7230 /*
7231 * We must have the encoding declaration
7232 */
7233 xmlParseEncodingDecl(ctxt);
7234
7235 SKIP_BLANKS;
7236 if ((RAW == '?') && (NXT(1) == '>')) {
7237 SKIP(2);
7238 } else if (RAW == '>') {
7239 /* Deprecated old WD ... */
7240 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7241 NEXT;
7242 } else {
7243 int c;
7244
7245 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7246 while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7247 NEXT;
7248 if (c == '>')
7249 break;
7250 }
7251 }
7252 }
7253
7254 /**
7255 * xmlParseExternalSubset:
7256 * @ctxt: an XML parser context
7257 * @ExternalID: the external identifier
7258 * @SystemID: the system identifier (or URL)
7259 *
7260 * parse Markup declarations from an external subset
7261 *
7262 * [30] extSubset ::= textDecl? extSubsetDecl
7263 *
7264 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7265 */
7266 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7267 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7268 const xmlChar *SystemID) {
7269 int oldInputNr;
7270
7271 xmlCtxtInitializeLate(ctxt);
7272
7273 xmlDetectEncoding(ctxt);
7274
7275 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7276 xmlParseTextDecl(ctxt);
7277 }
7278 if (ctxt->myDoc == NULL) {
7279 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7280 if (ctxt->myDoc == NULL) {
7281 xmlErrMemory(ctxt);
7282 return;
7283 }
7284 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7285 }
7286 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7287 (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7288 xmlErrMemory(ctxt);
7289 }
7290
7291 ctxt->inSubset = 2;
7292 oldInputNr = ctxt->inputNr;
7293
7294 SKIP_BLANKS_PE;
7295 while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7296 (!PARSER_STOPPED(ctxt))) {
7297 GROW;
7298 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7299 xmlParseConditionalSections(ctxt);
7300 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7301 xmlParseMarkupDecl(ctxt);
7302 } else {
7303 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7304 xmlHaltParser(ctxt);
7305 return;
7306 }
7307 SKIP_BLANKS_PE;
7308 SHRINK;
7309 }
7310
7311 while (ctxt->inputNr > oldInputNr)
7312 xmlPopPE(ctxt);
7313
7314 if (RAW != 0) {
7315 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7316 }
7317 }
7318
7319 /**
7320 * xmlParseReference:
7321 * @ctxt: an XML parser context
7322 *
7323 * DEPRECATED: Internal function, don't use.
7324 *
7325 * parse and handle entity references in content, depending on the SAX
7326 * interface, this may end-up in a call to character() if this is a
7327 * CharRef, a predefined entity, if there is no reference() callback.
7328 * or if the parser was asked to switch to that mode.
7329 *
7330 * Always consumes '&'.
7331 *
7332 * [67] Reference ::= EntityRef | CharRef
7333 */
7334 void
xmlParseReference(xmlParserCtxtPtr ctxt)7335 xmlParseReference(xmlParserCtxtPtr ctxt) {
7336 xmlEntityPtr ent = NULL;
7337 const xmlChar *name;
7338 xmlChar *val;
7339
7340 if (RAW != '&')
7341 return;
7342
7343 /*
7344 * Simple case of a CharRef
7345 */
7346 if (NXT(1) == '#') {
7347 int i = 0;
7348 xmlChar out[16];
7349 int value = xmlParseCharRef(ctxt);
7350
7351 if (value == 0)
7352 return;
7353
7354 /*
7355 * Just encode the value in UTF-8
7356 */
7357 COPY_BUF(out, i, value);
7358 out[i] = 0;
7359 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7360 (!ctxt->disableSAX))
7361 ctxt->sax->characters(ctxt->userData, out, i);
7362 return;
7363 }
7364
7365 /*
7366 * We are seeing an entity reference
7367 */
7368 name = xmlParseEntityRefInternal(ctxt);
7369 if (name != NULL)
7370 ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7371 if (ent == NULL) return;
7372 if (!ctxt->wellFormed)
7373 return;
7374
7375 /* special case of predefined entities */
7376 if ((ent->name == NULL) ||
7377 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7378 val = ent->content;
7379 if (val == NULL) return;
7380 /*
7381 * inline the entity.
7382 */
7383 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7384 (!ctxt->disableSAX))
7385 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7386 return;
7387 }
7388
7389 /*
7390 * The first reference to the entity trigger a parsing phase
7391 * where the ent->children is filled with the result from
7392 * the parsing.
7393 * Note: external parsed entities will not be loaded, it is not
7394 * required for a non-validating parser, unless the parsing option
7395 * of validating, or substituting entities were given. Doing so is
7396 * far more secure as the parser will only process data coming from
7397 * the document entity by default.
7398 *
7399 * FIXME: This doesn't work correctly since entities can be
7400 * expanded with different namespace declarations in scope.
7401 * For example:
7402 *
7403 * <!DOCTYPE doc [
7404 * <!ENTITY ent "<ns:elem/>">
7405 * ]>
7406 * <doc>
7407 * <decl1 xmlns:ns="urn:ns1">
7408 * &ent;
7409 * </decl1>
7410 * <decl2 xmlns:ns="urn:ns2">
7411 * &ent;
7412 * </decl2>
7413 * </doc>
7414 *
7415 * Proposed fix:
7416 *
7417 * - Ignore current namespace declarations when parsing the
7418 * entity. If a prefix can't be resolved, don't report an error
7419 * but mark it as unresolved.
7420 * - Try to resolve these prefixes when expanding the entity.
7421 * This will require a specialized version of xmlStaticCopyNode
7422 * which can also make use of the namespace hash table to avoid
7423 * quadratic behavior.
7424 *
7425 * Alternatively, we could simply reparse the entity on each
7426 * expansion like we already do with custom SAX callbacks.
7427 * External entity content should be cached in this case.
7428 */
7429 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7430 (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7431 ((ctxt->replaceEntities) ||
7432 (ctxt->validate)))) {
7433 if ((ent->flags & XML_ENT_PARSED) == 0) {
7434 xmlCtxtParseEntity(ctxt, ent);
7435 } else if (ent->children == NULL) {
7436 /*
7437 * Probably running in SAX mode and the callbacks don't
7438 * build the entity content. Parse the entity again.
7439 *
7440 * This will also be triggered in normal tree builder mode
7441 * if an entity happens to be empty, causing unnecessary
7442 * reloads. It's hard to come up with a reliable check in
7443 * which mode we're running.
7444 */
7445 xmlCtxtParseEntity(ctxt, ent);
7446 }
7447 }
7448
7449 /*
7450 * We also check for amplification if entities aren't substituted.
7451 * They might be expanded later.
7452 */
7453 if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7454 return;
7455
7456 if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7457 return;
7458
7459 if (ctxt->replaceEntities == 0) {
7460 /*
7461 * Create a reference
7462 */
7463 if (ctxt->sax->reference != NULL)
7464 ctxt->sax->reference(ctxt->userData, ent->name);
7465 } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7466 xmlNodePtr copy, cur;
7467
7468 /*
7469 * Seems we are generating the DOM content, copy the tree
7470 */
7471 cur = ent->children;
7472
7473 /*
7474 * Handle first text node with SAX to coalesce text efficiently
7475 */
7476 if ((cur->type == XML_TEXT_NODE) ||
7477 (cur->type == XML_CDATA_SECTION_NODE)) {
7478 int len = xmlStrlen(cur->content);
7479
7480 if ((cur->type == XML_TEXT_NODE) ||
7481 (ctxt->sax->cdataBlock == NULL)) {
7482 if (ctxt->sax->characters != NULL)
7483 ctxt->sax->characters(ctxt, cur->content, len);
7484 } else {
7485 if (ctxt->sax->cdataBlock != NULL)
7486 ctxt->sax->cdataBlock(ctxt, cur->content, len);
7487 }
7488
7489 cur = cur->next;
7490 }
7491
7492 while (cur != NULL) {
7493 xmlNodePtr last;
7494
7495 /*
7496 * Handle last text node with SAX to coalesce text efficiently
7497 */
7498 if ((cur->next == NULL) &&
7499 ((cur->type == XML_TEXT_NODE) ||
7500 (cur->type == XML_CDATA_SECTION_NODE))) {
7501 int len = xmlStrlen(cur->content);
7502
7503 if ((cur->type == XML_TEXT_NODE) ||
7504 (ctxt->sax->cdataBlock == NULL)) {
7505 if (ctxt->sax->characters != NULL)
7506 ctxt->sax->characters(ctxt, cur->content, len);
7507 } else {
7508 if (ctxt->sax->cdataBlock != NULL)
7509 ctxt->sax->cdataBlock(ctxt, cur->content, len);
7510 }
7511
7512 break;
7513 }
7514
7515 /*
7516 * Reset coalesce buffer stats only for non-text nodes.
7517 */
7518 ctxt->nodemem = 0;
7519 ctxt->nodelen = 0;
7520
7521 copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7522
7523 if (copy == NULL) {
7524 xmlErrMemory(ctxt);
7525 break;
7526 }
7527
7528 if (ctxt->parseMode == XML_PARSE_READER) {
7529 /* Needed for reader */
7530 copy->extra = cur->extra;
7531 /* Maybe needed for reader */
7532 copy->_private = cur->_private;
7533 }
7534
7535 copy->parent = ctxt->node;
7536 last = ctxt->node->last;
7537 if (last == NULL) {
7538 ctxt->node->children = copy;
7539 } else {
7540 last->next = copy;
7541 copy->prev = last;
7542 }
7543 ctxt->node->last = copy;
7544
7545 cur = cur->next;
7546 }
7547 }
7548 }
7549
7550 static xmlEntityPtr
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt,const xmlChar * name,int inAttr)7551 xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7552 xmlEntityPtr ent;
7553
7554 /*
7555 * Predefined entities override any extra definition
7556 */
7557 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7558 ent = xmlGetPredefinedEntity(name);
7559 if (ent != NULL)
7560 return(ent);
7561 }
7562
7563 /*
7564 * Ask first SAX for entity resolution, otherwise try the
7565 * entities which may have stored in the parser context.
7566 */
7567 if (ctxt->sax != NULL) {
7568 if (ctxt->sax->getEntity != NULL)
7569 ent = ctxt->sax->getEntity(ctxt->userData, name);
7570 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7571 (ctxt->options & XML_PARSE_OLDSAX))
7572 ent = xmlGetPredefinedEntity(name);
7573 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7574 (ctxt->userData==ctxt)) {
7575 ent = xmlSAX2GetEntity(ctxt, name);
7576 }
7577 }
7578 /*
7579 * [ WFC: Entity Declared ]
7580 * In a document without any DTD, a document with only an
7581 * internal DTD subset which contains no parameter entity
7582 * references, or a document with "standalone='yes'", the
7583 * Name given in the entity reference must match that in an
7584 * entity declaration, except that well-formed documents
7585 * need not declare any of the following entities: amp, lt,
7586 * gt, apos, quot.
7587 * The declaration of a parameter entity must precede any
7588 * reference to it.
7589 * Similarly, the declaration of a general entity must
7590 * precede any reference to it which appears in a default
7591 * value in an attribute-list declaration. Note that if
7592 * entities are declared in the external subset or in
7593 * external parameter entities, a non-validating processor
7594 * is not obligated to read and process their declarations;
7595 * for such documents, the rule that an entity must be
7596 * declared is a well-formedness constraint only if
7597 * standalone='yes'.
7598 */
7599 if (ent == NULL) {
7600 if ((ctxt->standalone == 1) ||
7601 ((ctxt->hasExternalSubset == 0) &&
7602 (ctxt->hasPErefs == 0))) {
7603 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7604 "Entity '%s' not defined\n", name);
7605 } else {
7606 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7607 "Entity '%s' not defined\n", name);
7608 if ((ctxt->inSubset == 0) &&
7609 (ctxt->sax != NULL) &&
7610 (ctxt->disableSAX == 0) &&
7611 (ctxt->sax->reference != NULL)) {
7612 ctxt->sax->reference(ctxt->userData, name);
7613 }
7614 }
7615 ctxt->valid = 0;
7616 }
7617
7618 /*
7619 * [ WFC: Parsed Entity ]
7620 * An entity reference must not contain the name of an
7621 * unparsed entity
7622 */
7623 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7624 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7625 "Entity reference to unparsed entity %s\n", name);
7626 ent = NULL;
7627 }
7628
7629 /*
7630 * [ WFC: No External Entity References ]
7631 * Attribute values cannot contain direct or indirect
7632 * entity references to external entities.
7633 */
7634 else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7635 if (inAttr) {
7636 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7637 "Attribute references external entity '%s'\n", name);
7638 ent = NULL;
7639 }
7640 }
7641
7642 return(ent);
7643 }
7644
7645 /**
7646 * xmlParseEntityRefInternal:
7647 * @ctxt: an XML parser context
7648 * @inAttr: whether we are in an attribute value
7649 *
7650 * Parse an entity reference. Always consumes '&'.
7651 *
7652 * [68] EntityRef ::= '&' Name ';'
7653 *
7654 * Returns the name, or NULL in case of error.
7655 */
7656 static const xmlChar *
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt)7657 xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7658 const xmlChar *name;
7659
7660 GROW;
7661
7662 if (RAW != '&')
7663 return(NULL);
7664 NEXT;
7665 name = xmlParseName(ctxt);
7666 if (name == NULL) {
7667 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7668 "xmlParseEntityRef: no name\n");
7669 return(NULL);
7670 }
7671 if (RAW != ';') {
7672 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7673 return(NULL);
7674 }
7675 NEXT;
7676
7677 return(name);
7678 }
7679
7680 /**
7681 * xmlParseEntityRef:
7682 * @ctxt: an XML parser context
7683 *
7684 * DEPRECATED: Internal function, don't use.
7685 *
7686 * Returns the xmlEntityPtr if found, or NULL otherwise.
7687 */
7688 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7689 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7690 const xmlChar *name;
7691
7692 if (ctxt == NULL)
7693 return(NULL);
7694
7695 name = xmlParseEntityRefInternal(ctxt);
7696 if (name == NULL)
7697 return(NULL);
7698
7699 return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7700 }
7701
7702 /**
7703 * xmlParseStringEntityRef:
7704 * @ctxt: an XML parser context
7705 * @str: a pointer to an index in the string
7706 *
7707 * parse ENTITY references declarations, but this version parses it from
7708 * a string value.
7709 *
7710 * [68] EntityRef ::= '&' Name ';'
7711 *
7712 * [ WFC: Entity Declared ]
7713 * In a document without any DTD, a document with only an internal DTD
7714 * subset which contains no parameter entity references, or a document
7715 * with "standalone='yes'", the Name given in the entity reference
7716 * must match that in an entity declaration, except that well-formed
7717 * documents need not declare any of the following entities: amp, lt,
7718 * gt, apos, quot. The declaration of a parameter entity must precede
7719 * any reference to it. Similarly, the declaration of a general entity
7720 * must precede any reference to it which appears in a default value in an
7721 * attribute-list declaration. Note that if entities are declared in the
7722 * external subset or in external parameter entities, a non-validating
7723 * processor is not obligated to read and process their declarations;
7724 * for such documents, the rule that an entity must be declared is a
7725 * well-formedness constraint only if standalone='yes'.
7726 *
7727 * [ WFC: Parsed Entity ]
7728 * An entity reference must not contain the name of an unparsed entity
7729 *
7730 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7731 * is updated to the current location in the string.
7732 */
7733 static xmlChar *
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7734 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7735 xmlChar *name;
7736 const xmlChar *ptr;
7737 xmlChar cur;
7738
7739 if ((str == NULL) || (*str == NULL))
7740 return(NULL);
7741 ptr = *str;
7742 cur = *ptr;
7743 if (cur != '&')
7744 return(NULL);
7745
7746 ptr++;
7747 name = xmlParseStringName(ctxt, &ptr);
7748 if (name == NULL) {
7749 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7750 "xmlParseStringEntityRef: no name\n");
7751 *str = ptr;
7752 return(NULL);
7753 }
7754 if (*ptr != ';') {
7755 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7756 xmlFree(name);
7757 *str = ptr;
7758 return(NULL);
7759 }
7760 ptr++;
7761
7762 *str = ptr;
7763 return(name);
7764 }
7765
7766 /**
7767 * xmlParsePEReference:
7768 * @ctxt: an XML parser context
7769 *
7770 * DEPRECATED: Internal function, don't use.
7771 *
7772 * Parse a parameter entity reference. Always consumes '%'.
7773 *
7774 * The entity content is handled directly by pushing it's content as
7775 * a new input stream.
7776 *
7777 * [69] PEReference ::= '%' Name ';'
7778 *
7779 * [ WFC: No Recursion ]
7780 * A parsed entity must not contain a recursive
7781 * reference to itself, either directly or indirectly.
7782 *
7783 * [ WFC: Entity Declared ]
7784 * In a document without any DTD, a document with only an internal DTD
7785 * subset which contains no parameter entity references, or a document
7786 * with "standalone='yes'", ... ... The declaration of a parameter
7787 * entity must precede any reference to it...
7788 *
7789 * [ VC: Entity Declared ]
7790 * In a document with an external subset or external parameter entities
7791 * with "standalone='no'", ... ... The declaration of a parameter entity
7792 * must precede any reference to it...
7793 *
7794 * [ WFC: In DTD ]
7795 * Parameter-entity references may only appear in the DTD.
7796 * NOTE: misleading but this is handled.
7797 */
7798 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7799 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7800 {
7801 const xmlChar *name;
7802 xmlEntityPtr entity = NULL;
7803 xmlParserInputPtr input;
7804
7805 if (RAW != '%')
7806 return;
7807 NEXT;
7808 name = xmlParseName(ctxt);
7809 if (name == NULL) {
7810 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7811 return;
7812 }
7813 if (RAW != ';') {
7814 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7815 return;
7816 }
7817
7818 NEXT;
7819
7820 /*
7821 * Request the entity from SAX
7822 */
7823 if ((ctxt->sax != NULL) &&
7824 (ctxt->sax->getParameterEntity != NULL))
7825 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7826 if (entity == NULL) {
7827 /*
7828 * [ WFC: Entity Declared ]
7829 * In a document without any DTD, a document with only an
7830 * internal DTD subset which contains no parameter entity
7831 * references, or a document with "standalone='yes'", ...
7832 * ... The declaration of a parameter entity must precede
7833 * any reference to it...
7834 */
7835 if ((ctxt->standalone == 1) ||
7836 ((ctxt->hasExternalSubset == 0) &&
7837 (ctxt->hasPErefs == 0))) {
7838 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7839 "PEReference: %%%s; not found\n",
7840 name);
7841 } else {
7842 /*
7843 * [ VC: Entity Declared ]
7844 * In a document with an external subset or external
7845 * parameter entities with "standalone='no'", ...
7846 * ... The declaration of a parameter entity must
7847 * precede any reference to it...
7848 */
7849 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7850 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7851 "PEReference: %%%s; not found\n",
7852 name, NULL);
7853 } else
7854 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7855 "PEReference: %%%s; not found\n",
7856 name, NULL);
7857 ctxt->valid = 0;
7858 }
7859 } else {
7860 /*
7861 * Internal checking in case the entity quest barfed
7862 */
7863 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7864 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7865 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7866 "Internal: %%%s; is not a parameter entity\n",
7867 name, NULL);
7868 } else {
7869 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7870 ((ctxt->options & XML_PARSE_NO_XXE) ||
7871 ((ctxt->loadsubset == 0) &&
7872 (ctxt->replaceEntities == 0) &&
7873 (ctxt->validate == 0))))
7874 return;
7875
7876 if (entity->flags & XML_ENT_EXPANDING) {
7877 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7878 xmlHaltParser(ctxt);
7879 return;
7880 }
7881
7882 input = xmlNewEntityInputStream(ctxt, entity);
7883 if (xmlPushInput(ctxt, input) < 0) {
7884 xmlFreeInputStream(input);
7885 return;
7886 }
7887
7888 entity->flags |= XML_ENT_EXPANDING;
7889
7890 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7891 xmlDetectEncoding(ctxt);
7892
7893 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7894 (IS_BLANK_CH(NXT(5)))) {
7895 xmlParseTextDecl(ctxt);
7896 }
7897 }
7898 }
7899 }
7900 ctxt->hasPErefs = 1;
7901 }
7902
7903 /**
7904 * xmlLoadEntityContent:
7905 * @ctxt: an XML parser context
7906 * @entity: an unloaded system entity
7907 *
7908 * Load the original content of the given system entity from the
7909 * ExternalID/SystemID given. This is to be used for Included in Literal
7910 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7911 *
7912 * Returns 0 in case of success and -1 in case of failure
7913 */
7914 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7915 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7916 xmlParserInputPtr oldinput, input = NULL;
7917 xmlParserInputPtr *oldinputTab;
7918 const xmlChar *oldencoding;
7919 xmlChar *content = NULL;
7920 size_t length, i;
7921 int oldinputNr, oldinputMax;
7922 int ret = -1;
7923 int res;
7924
7925 if ((ctxt == NULL) || (entity == NULL) ||
7926 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7927 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7928 (entity->content != NULL)) {
7929 xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7930 "xmlLoadEntityContent parameter error");
7931 return(-1);
7932 }
7933
7934 input = xmlLoadExternalEntity((char *) entity->URI,
7935 (char *) entity->ExternalID, ctxt);
7936 if (input == NULL)
7937 return(-1);
7938
7939 oldinput = ctxt->input;
7940 oldinputNr = ctxt->inputNr;
7941 oldinputMax = ctxt->inputMax;
7942 oldinputTab = ctxt->inputTab;
7943 oldencoding = ctxt->encoding;
7944
7945 ctxt->input = NULL;
7946 ctxt->inputNr = 0;
7947 ctxt->inputMax = 1;
7948 ctxt->encoding = NULL;
7949 ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7950 if (ctxt->inputTab == NULL) {
7951 xmlErrMemory(ctxt);
7952 xmlFreeInputStream(input);
7953 goto error;
7954 }
7955
7956 xmlBufResetInput(input->buf->buffer, input);
7957
7958 inputPush(ctxt, input);
7959
7960 xmlDetectEncoding(ctxt);
7961
7962 /*
7963 * Parse a possible text declaration first
7964 */
7965 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7966 xmlParseTextDecl(ctxt);
7967 /*
7968 * An XML-1.0 document can't reference an entity not XML-1.0
7969 */
7970 if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7971 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7972 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7973 "Version mismatch between document and entity\n");
7974 }
7975 }
7976
7977 length = input->cur - input->base;
7978 xmlBufShrink(input->buf->buffer, length);
7979 xmlSaturatedAdd(&ctxt->sizeentities, length);
7980
7981 while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7982 ;
7983
7984 xmlBufResetInput(input->buf->buffer, input);
7985
7986 if (res < 0) {
7987 xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7988 goto error;
7989 }
7990
7991 length = xmlBufUse(input->buf->buffer);
7992 content = xmlBufDetach(input->buf->buffer);
7993
7994 if (length > INT_MAX) {
7995 xmlErrMemory(ctxt);
7996 goto error;
7997 }
7998
7999 for (i = 0; i < length; ) {
8000 int clen = length - i;
8001 int c = xmlGetUTF8Char(content + i, &clen);
8002
8003 if ((c < 0) || (!IS_CHAR(c))) {
8004 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8005 "xmlLoadEntityContent: invalid char value %d\n",
8006 content[i]);
8007 goto error;
8008 }
8009 i += clen;
8010 }
8011
8012 xmlSaturatedAdd(&ctxt->sizeentities, length);
8013 entity->content = content;
8014 entity->length = length;
8015 content = NULL;
8016 ret = 0;
8017
8018 error:
8019 while (ctxt->inputNr > 0)
8020 xmlFreeInputStream(inputPop(ctxt));
8021 xmlFree(ctxt->inputTab);
8022 xmlFree((xmlChar *) ctxt->encoding);
8023
8024 ctxt->input = oldinput;
8025 ctxt->inputNr = oldinputNr;
8026 ctxt->inputMax = oldinputMax;
8027 ctxt->inputTab = oldinputTab;
8028 ctxt->encoding = oldencoding;
8029
8030 xmlFree(content);
8031
8032 return(ret);
8033 }
8034
8035 /**
8036 * xmlParseStringPEReference:
8037 * @ctxt: an XML parser context
8038 * @str: a pointer to an index in the string
8039 *
8040 * parse PEReference declarations
8041 *
8042 * [69] PEReference ::= '%' Name ';'
8043 *
8044 * [ WFC: No Recursion ]
8045 * A parsed entity must not contain a recursive
8046 * reference to itself, either directly or indirectly.
8047 *
8048 * [ WFC: Entity Declared ]
8049 * In a document without any DTD, a document with only an internal DTD
8050 * subset which contains no parameter entity references, or a document
8051 * with "standalone='yes'", ... ... The declaration of a parameter
8052 * entity must precede any reference to it...
8053 *
8054 * [ VC: Entity Declared ]
8055 * In a document with an external subset or external parameter entities
8056 * with "standalone='no'", ... ... The declaration of a parameter entity
8057 * must precede any reference to it...
8058 *
8059 * [ WFC: In DTD ]
8060 * Parameter-entity references may only appear in the DTD.
8061 * NOTE: misleading but this is handled.
8062 *
8063 * Returns the string of the entity content.
8064 * str is updated to the current value of the index
8065 */
8066 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8067 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8068 const xmlChar *ptr;
8069 xmlChar cur;
8070 xmlChar *name;
8071 xmlEntityPtr entity = NULL;
8072
8073 if ((str == NULL) || (*str == NULL)) return(NULL);
8074 ptr = *str;
8075 cur = *ptr;
8076 if (cur != '%')
8077 return(NULL);
8078 ptr++;
8079 name = xmlParseStringName(ctxt, &ptr);
8080 if (name == NULL) {
8081 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8082 "xmlParseStringPEReference: no name\n");
8083 *str = ptr;
8084 return(NULL);
8085 }
8086 cur = *ptr;
8087 if (cur != ';') {
8088 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8089 xmlFree(name);
8090 *str = ptr;
8091 return(NULL);
8092 }
8093 ptr++;
8094
8095 /*
8096 * Request the entity from SAX
8097 */
8098 if ((ctxt->sax != NULL) &&
8099 (ctxt->sax->getParameterEntity != NULL))
8100 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8101 if (entity == NULL) {
8102 /*
8103 * [ WFC: Entity Declared ]
8104 * In a document without any DTD, a document with only an
8105 * internal DTD subset which contains no parameter entity
8106 * references, or a document with "standalone='yes'", ...
8107 * ... The declaration of a parameter entity must precede
8108 * any reference to it...
8109 */
8110 if ((ctxt->standalone == 1) ||
8111 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8112 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8113 "PEReference: %%%s; not found\n", name);
8114 } else {
8115 /*
8116 * [ VC: Entity Declared ]
8117 * In a document with an external subset or external
8118 * parameter entities with "standalone='no'", ...
8119 * ... The declaration of a parameter entity must
8120 * precede any reference to it...
8121 */
8122 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8123 "PEReference: %%%s; not found\n",
8124 name, NULL);
8125 ctxt->valid = 0;
8126 }
8127 } else {
8128 /*
8129 * Internal checking in case the entity quest barfed
8130 */
8131 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8132 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8133 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8134 "%%%s; is not a parameter entity\n",
8135 name, NULL);
8136 }
8137 }
8138 ctxt->hasPErefs = 1;
8139 xmlFree(name);
8140 *str = ptr;
8141 return(entity);
8142 }
8143
8144 /**
8145 * xmlParseDocTypeDecl:
8146 * @ctxt: an XML parser context
8147 *
8148 * DEPRECATED: Internal function, don't use.
8149 *
8150 * parse a DOCTYPE declaration
8151 *
8152 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8153 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8154 *
8155 * [ VC: Root Element Type ]
8156 * The Name in the document type declaration must match the element
8157 * type of the root element.
8158 */
8159
8160 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8161 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8162 const xmlChar *name = NULL;
8163 xmlChar *ExternalID = NULL;
8164 xmlChar *URI = NULL;
8165
8166 /*
8167 * We know that '<!DOCTYPE' has been detected.
8168 */
8169 SKIP(9);
8170
8171 SKIP_BLANKS;
8172
8173 /*
8174 * Parse the DOCTYPE name.
8175 */
8176 name = xmlParseName(ctxt);
8177 if (name == NULL) {
8178 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8179 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8180 }
8181 ctxt->intSubName = name;
8182
8183 SKIP_BLANKS;
8184
8185 /*
8186 * Check for SystemID and ExternalID
8187 */
8188 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8189
8190 if ((URI != NULL) || (ExternalID != NULL)) {
8191 ctxt->hasExternalSubset = 1;
8192 }
8193 ctxt->extSubURI = URI;
8194 ctxt->extSubSystem = ExternalID;
8195
8196 SKIP_BLANKS;
8197
8198 /*
8199 * Create and update the internal subset.
8200 */
8201 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8202 (!ctxt->disableSAX))
8203 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8204
8205 /*
8206 * Is there any internal subset declarations ?
8207 * they are handled separately in xmlParseInternalSubset()
8208 */
8209 if (RAW == '[')
8210 return;
8211
8212 /*
8213 * We should be at the end of the DOCTYPE declaration.
8214 */
8215 if (RAW != '>') {
8216 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8217 }
8218 NEXT;
8219 }
8220
8221 /**
8222 * xmlParseInternalSubset:
8223 * @ctxt: an XML parser context
8224 *
8225 * parse the internal subset declaration
8226 *
8227 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8228 */
8229
8230 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8231 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8232 /*
8233 * Is there any DTD definition ?
8234 */
8235 if (RAW == '[') {
8236 int oldInputNr = ctxt->inputNr;
8237
8238 NEXT;
8239 /*
8240 * Parse the succession of Markup declarations and
8241 * PEReferences.
8242 * Subsequence (markupdecl | PEReference | S)*
8243 */
8244 SKIP_BLANKS;
8245 while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8246 (PARSER_STOPPED(ctxt) == 0)) {
8247
8248 /*
8249 * Conditional sections are allowed from external entities included
8250 * by PE References in the internal subset.
8251 */
8252 if ((PARSER_EXTERNAL(ctxt)) &&
8253 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8254 xmlParseConditionalSections(ctxt);
8255 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8256 xmlParseMarkupDecl(ctxt);
8257 } else if (RAW == '%') {
8258 xmlParsePEReference(ctxt);
8259 } else {
8260 xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8261 break;
8262 }
8263 SKIP_BLANKS_PE;
8264 SHRINK;
8265 GROW;
8266 }
8267
8268 while (ctxt->inputNr > oldInputNr)
8269 xmlPopPE(ctxt);
8270
8271 if (RAW == ']') {
8272 NEXT;
8273 SKIP_BLANKS;
8274 }
8275 }
8276
8277 /*
8278 * We should be at the end of the DOCTYPE declaration.
8279 */
8280 if ((ctxt->wellFormed) && (RAW != '>')) {
8281 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8282 return;
8283 }
8284 NEXT;
8285 }
8286
8287 #ifdef LIBXML_SAX1_ENABLED
8288 /**
8289 * xmlParseAttribute:
8290 * @ctxt: an XML parser context
8291 * @value: a xmlChar ** used to store the value of the attribute
8292 *
8293 * DEPRECATED: Internal function, don't use.
8294 *
8295 * parse an attribute
8296 *
8297 * [41] Attribute ::= Name Eq AttValue
8298 *
8299 * [ WFC: No External Entity References ]
8300 * Attribute values cannot contain direct or indirect entity references
8301 * to external entities.
8302 *
8303 * [ WFC: No < in Attribute Values ]
8304 * The replacement text of any entity referred to directly or indirectly in
8305 * an attribute value (other than "<") must not contain a <.
8306 *
8307 * [ VC: Attribute Value Type ]
8308 * The attribute must have been declared; the value must be of the type
8309 * declared for it.
8310 *
8311 * [25] Eq ::= S? '=' S?
8312 *
8313 * With namespace:
8314 *
8315 * [NS 11] Attribute ::= QName Eq AttValue
8316 *
8317 * Also the case QName == xmlns:??? is handled independently as a namespace
8318 * definition.
8319 *
8320 * Returns the attribute name, and the value in *value.
8321 */
8322
8323 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8324 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8325 const xmlChar *name;
8326 xmlChar *val;
8327
8328 *value = NULL;
8329 GROW;
8330 name = xmlParseName(ctxt);
8331 if (name == NULL) {
8332 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8333 "error parsing attribute name\n");
8334 return(NULL);
8335 }
8336
8337 /*
8338 * read the value
8339 */
8340 SKIP_BLANKS;
8341 if (RAW == '=') {
8342 NEXT;
8343 SKIP_BLANKS;
8344 val = xmlParseAttValue(ctxt);
8345 } else {
8346 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8347 "Specification mandates value for attribute %s\n", name);
8348 return(name);
8349 }
8350
8351 /*
8352 * Check that xml:lang conforms to the specification
8353 * No more registered as an error, just generate a warning now
8354 * since this was deprecated in XML second edition
8355 */
8356 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8357 if (!xmlCheckLanguageID(val)) {
8358 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8359 "Malformed value for xml:lang : %s\n",
8360 val, NULL);
8361 }
8362 }
8363
8364 /*
8365 * Check that xml:space conforms to the specification
8366 */
8367 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8368 if (xmlStrEqual(val, BAD_CAST "default"))
8369 *(ctxt->space) = 0;
8370 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8371 *(ctxt->space) = 1;
8372 else {
8373 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8374 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8375 val, NULL);
8376 }
8377 }
8378
8379 *value = val;
8380 return(name);
8381 }
8382
8383 /**
8384 * xmlParseStartTag:
8385 * @ctxt: an XML parser context
8386 *
8387 * DEPRECATED: Internal function, don't use.
8388 *
8389 * Parse a start tag. Always consumes '<'.
8390 *
8391 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8392 *
8393 * [ WFC: Unique Att Spec ]
8394 * No attribute name may appear more than once in the same start-tag or
8395 * empty-element tag.
8396 *
8397 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8398 *
8399 * [ WFC: Unique Att Spec ]
8400 * No attribute name may appear more than once in the same start-tag or
8401 * empty-element tag.
8402 *
8403 * With namespace:
8404 *
8405 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8406 *
8407 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8408 *
8409 * Returns the element name parsed
8410 */
8411
8412 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8413 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8414 const xmlChar *name;
8415 const xmlChar *attname;
8416 xmlChar *attvalue;
8417 const xmlChar **atts = ctxt->atts;
8418 int nbatts = 0;
8419 int maxatts = ctxt->maxatts;
8420 int i;
8421
8422 if (RAW != '<') return(NULL);
8423 NEXT1;
8424
8425 name = xmlParseName(ctxt);
8426 if (name == NULL) {
8427 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8428 "xmlParseStartTag: invalid element name\n");
8429 return(NULL);
8430 }
8431
8432 /*
8433 * Now parse the attributes, it ends up with the ending
8434 *
8435 * (S Attribute)* S?
8436 */
8437 SKIP_BLANKS;
8438 GROW;
8439
8440 while (((RAW != '>') &&
8441 ((RAW != '/') || (NXT(1) != '>')) &&
8442 (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8443 attname = xmlParseAttribute(ctxt, &attvalue);
8444 if (attname == NULL)
8445 break;
8446 if (attvalue != NULL) {
8447 /*
8448 * [ WFC: Unique Att Spec ]
8449 * No attribute name may appear more than once in the same
8450 * start-tag or empty-element tag.
8451 */
8452 for (i = 0; i < nbatts;i += 2) {
8453 if (xmlStrEqual(atts[i], attname)) {
8454 xmlErrAttributeDup(ctxt, NULL, attname);
8455 xmlFree(attvalue);
8456 goto failed;
8457 }
8458 }
8459 /*
8460 * Add the pair to atts
8461 */
8462 if (atts == NULL) {
8463 maxatts = 22; /* allow for 10 attrs by default */
8464 atts = (const xmlChar **)
8465 xmlMalloc(maxatts * sizeof(xmlChar *));
8466 if (atts == NULL) {
8467 xmlErrMemory(ctxt);
8468 if (attvalue != NULL)
8469 xmlFree(attvalue);
8470 goto failed;
8471 }
8472 ctxt->atts = atts;
8473 ctxt->maxatts = maxatts;
8474 } else if (nbatts + 4 > maxatts) {
8475 const xmlChar **n;
8476
8477 maxatts *= 2;
8478 n = (const xmlChar **) xmlRealloc((void *) atts,
8479 maxatts * sizeof(const xmlChar *));
8480 if (n == NULL) {
8481 xmlErrMemory(ctxt);
8482 if (attvalue != NULL)
8483 xmlFree(attvalue);
8484 goto failed;
8485 }
8486 atts = n;
8487 ctxt->atts = atts;
8488 ctxt->maxatts = maxatts;
8489 }
8490 atts[nbatts++] = attname;
8491 atts[nbatts++] = attvalue;
8492 atts[nbatts] = NULL;
8493 atts[nbatts + 1] = NULL;
8494 } else {
8495 if (attvalue != NULL)
8496 xmlFree(attvalue);
8497 }
8498
8499 failed:
8500
8501 GROW
8502 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8503 break;
8504 if (SKIP_BLANKS == 0) {
8505 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8506 "attributes construct error\n");
8507 }
8508 SHRINK;
8509 GROW;
8510 }
8511
8512 /*
8513 * SAX: Start of Element !
8514 */
8515 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8516 (!ctxt->disableSAX)) {
8517 if (nbatts > 0)
8518 ctxt->sax->startElement(ctxt->userData, name, atts);
8519 else
8520 ctxt->sax->startElement(ctxt->userData, name, NULL);
8521 }
8522
8523 if (atts != NULL) {
8524 /* Free only the content strings */
8525 for (i = 1;i < nbatts;i+=2)
8526 if (atts[i] != NULL)
8527 xmlFree((xmlChar *) atts[i]);
8528 }
8529 return(name);
8530 }
8531
8532 /**
8533 * xmlParseEndTag1:
8534 * @ctxt: an XML parser context
8535 * @line: line of the start tag
8536 * @nsNr: number of namespaces on the start tag
8537 *
8538 * Parse an end tag. Always consumes '</'.
8539 *
8540 * [42] ETag ::= '</' Name S? '>'
8541 *
8542 * With namespace
8543 *
8544 * [NS 9] ETag ::= '</' QName S? '>'
8545 */
8546
8547 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8548 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8549 const xmlChar *name;
8550
8551 GROW;
8552 if ((RAW != '<') || (NXT(1) != '/')) {
8553 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8554 "xmlParseEndTag: '</' not found\n");
8555 return;
8556 }
8557 SKIP(2);
8558
8559 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8560
8561 /*
8562 * We should definitely be at the ending "S? '>'" part
8563 */
8564 GROW;
8565 SKIP_BLANKS;
8566 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8567 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8568 } else
8569 NEXT1;
8570
8571 /*
8572 * [ WFC: Element Type Match ]
8573 * The Name in an element's end-tag must match the element type in the
8574 * start-tag.
8575 *
8576 */
8577 if (name != (xmlChar*)1) {
8578 if (name == NULL) name = BAD_CAST "unparsable";
8579 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8580 "Opening and ending tag mismatch: %s line %d and %s\n",
8581 ctxt->name, line, name);
8582 }
8583
8584 /*
8585 * SAX: End of Tag
8586 */
8587 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8588 (!ctxt->disableSAX))
8589 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8590
8591 namePop(ctxt);
8592 spacePop(ctxt);
8593 return;
8594 }
8595
8596 /**
8597 * xmlParseEndTag:
8598 * @ctxt: an XML parser context
8599 *
8600 * DEPRECATED: Internal function, don't use.
8601 *
8602 * parse an end of tag
8603 *
8604 * [42] ETag ::= '</' Name S? '>'
8605 *
8606 * With namespace
8607 *
8608 * [NS 9] ETag ::= '</' QName S? '>'
8609 */
8610
8611 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8612 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8613 xmlParseEndTag1(ctxt, 0);
8614 }
8615 #endif /* LIBXML_SAX1_ENABLED */
8616
8617 /************************************************************************
8618 * *
8619 * SAX 2 specific operations *
8620 * *
8621 ************************************************************************/
8622
8623 /**
8624 * xmlParseQNameHashed:
8625 * @ctxt: an XML parser context
8626 * @prefix: pointer to store the prefix part
8627 *
8628 * parse an XML Namespace QName
8629 *
8630 * [6] QName ::= (Prefix ':')? LocalPart
8631 * [7] Prefix ::= NCName
8632 * [8] LocalPart ::= NCName
8633 *
8634 * Returns the Name parsed or NULL
8635 */
8636
8637 static xmlHashedString
xmlParseQNameHashed(xmlParserCtxtPtr ctxt,xmlHashedString * prefix)8638 xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8639 xmlHashedString l, p;
8640 int start, isNCName = 0;
8641
8642 l.name = NULL;
8643 p.name = NULL;
8644
8645 GROW;
8646 start = CUR_PTR - BASE_PTR;
8647
8648 l = xmlParseNCName(ctxt);
8649 if (l.name != NULL) {
8650 isNCName = 1;
8651 if (CUR == ':') {
8652 NEXT;
8653 p = l;
8654 l = xmlParseNCName(ctxt);
8655 }
8656 }
8657 if ((l.name == NULL) || (CUR == ':')) {
8658 xmlChar *tmp;
8659
8660 l.name = NULL;
8661 p.name = NULL;
8662 if ((isNCName == 0) && (CUR != ':'))
8663 return(l);
8664 tmp = xmlParseNmtoken(ctxt);
8665 if (tmp != NULL)
8666 xmlFree(tmp);
8667 l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8668 CUR_PTR - (BASE_PTR + start));
8669 if (l.name == NULL) {
8670 xmlErrMemory(ctxt);
8671 return(l);
8672 }
8673 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8674 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8675 }
8676
8677 *prefix = p;
8678 return(l);
8679 }
8680
8681 /**
8682 * xmlParseQName:
8683 * @ctxt: an XML parser context
8684 * @prefix: pointer to store the prefix part
8685 *
8686 * parse an XML Namespace QName
8687 *
8688 * [6] QName ::= (Prefix ':')? LocalPart
8689 * [7] Prefix ::= NCName
8690 * [8] LocalPart ::= NCName
8691 *
8692 * Returns the Name parsed or NULL
8693 */
8694
8695 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8696 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8697 xmlHashedString n, p;
8698
8699 n = xmlParseQNameHashed(ctxt, &p);
8700 if (n.name == NULL)
8701 return(NULL);
8702 *prefix = p.name;
8703 return(n.name);
8704 }
8705
8706 /**
8707 * xmlParseQNameAndCompare:
8708 * @ctxt: an XML parser context
8709 * @name: the localname
8710 * @prefix: the prefix, if any.
8711 *
8712 * parse an XML name and compares for match
8713 * (specialized for endtag parsing)
8714 *
8715 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8716 * and the name for mismatch
8717 */
8718
8719 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8720 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8721 xmlChar const *prefix) {
8722 const xmlChar *cmp;
8723 const xmlChar *in;
8724 const xmlChar *ret;
8725 const xmlChar *prefix2;
8726
8727 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8728
8729 GROW;
8730 in = ctxt->input->cur;
8731
8732 cmp = prefix;
8733 while (*in != 0 && *in == *cmp) {
8734 ++in;
8735 ++cmp;
8736 }
8737 if ((*cmp == 0) && (*in == ':')) {
8738 in++;
8739 cmp = name;
8740 while (*in != 0 && *in == *cmp) {
8741 ++in;
8742 ++cmp;
8743 }
8744 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8745 /* success */
8746 ctxt->input->col += in - ctxt->input->cur;
8747 ctxt->input->cur = in;
8748 return((const xmlChar*) 1);
8749 }
8750 }
8751 /*
8752 * all strings coms from the dictionary, equality can be done directly
8753 */
8754 ret = xmlParseQName (ctxt, &prefix2);
8755 if (ret == NULL)
8756 return(NULL);
8757 if ((ret == name) && (prefix == prefix2))
8758 return((const xmlChar*) 1);
8759 return ret;
8760 }
8761
8762 /**
8763 * xmlParseAttribute2:
8764 * @ctxt: an XML parser context
8765 * @pref: the element prefix
8766 * @elem: the element name
8767 * @prefix: a xmlChar ** used to store the value of the attribute prefix
8768 * @value: a xmlChar ** used to store the value of the attribute
8769 * @len: an int * to save the length of the attribute
8770 * @alloc: an int * to indicate if the attribute was allocated
8771 *
8772 * parse an attribute in the new SAX2 framework.
8773 *
8774 * Returns the attribute name, and the value in *value, .
8775 */
8776
8777 static xmlHashedString
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,xmlHashedString * hprefix,xmlChar ** value,int * len,int * alloc)8778 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8779 const xmlChar * pref, const xmlChar * elem,
8780 xmlHashedString * hprefix, xmlChar ** value,
8781 int *len, int *alloc)
8782 {
8783 xmlHashedString hname;
8784 const xmlChar *prefix, *name;
8785 xmlChar *val = NULL, *internal_val = NULL;
8786 int normalize = 0;
8787
8788 *value = NULL;
8789 GROW;
8790 hname = xmlParseQNameHashed(ctxt, hprefix);
8791 if (hname.name == NULL) {
8792 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8793 "error parsing attribute name\n");
8794 return(hname);
8795 }
8796 name = hname.name;
8797 if (hprefix->name != NULL)
8798 prefix = hprefix->name;
8799 else
8800 prefix = NULL;
8801
8802 /*
8803 * get the type if needed
8804 */
8805 if (ctxt->attsSpecial != NULL) {
8806 int type;
8807
8808 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8809 pref, elem,
8810 prefix, name);
8811 if (type != 0)
8812 normalize = 1;
8813 }
8814
8815 /*
8816 * read the value
8817 */
8818 SKIP_BLANKS;
8819 if (RAW == '=') {
8820 NEXT;
8821 SKIP_BLANKS;
8822 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8823 if (val == NULL)
8824 goto error;
8825 } else {
8826 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8827 "Specification mandates value for attribute %s\n",
8828 name);
8829 goto error;
8830 }
8831
8832 if (prefix == ctxt->str_xml) {
8833 /*
8834 * Check that xml:lang conforms to the specification
8835 * No more registered as an error, just generate a warning now
8836 * since this was deprecated in XML second edition
8837 */
8838 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8839 internal_val = xmlStrndup(val, *len);
8840 if (internal_val == NULL)
8841 goto mem_error;
8842 if (!xmlCheckLanguageID(internal_val)) {
8843 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8844 "Malformed value for xml:lang : %s\n",
8845 internal_val, NULL);
8846 }
8847 }
8848
8849 /*
8850 * Check that xml:space conforms to the specification
8851 */
8852 if (xmlStrEqual(name, BAD_CAST "space")) {
8853 internal_val = xmlStrndup(val, *len);
8854 if (internal_val == NULL)
8855 goto mem_error;
8856 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8857 *(ctxt->space) = 0;
8858 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8859 *(ctxt->space) = 1;
8860 else {
8861 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8862 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8863 internal_val, NULL);
8864 }
8865 }
8866 if (internal_val) {
8867 xmlFree(internal_val);
8868 }
8869 }
8870
8871 *value = val;
8872 return (hname);
8873
8874 mem_error:
8875 xmlErrMemory(ctxt);
8876 error:
8877 if ((val != NULL) && (*alloc != 0))
8878 xmlFree(val);
8879 return(hname);
8880 }
8881
8882 /**
8883 * xmlAttrHashInsert:
8884 * @ctxt: parser context
8885 * @size: size of the hash table
8886 * @name: attribute name
8887 * @uri: namespace uri
8888 * @hashValue: combined hash value of name and uri
8889 * @aindex: attribute index (this is a multiple of 5)
8890 *
8891 * Inserts a new attribute into the hash table.
8892 *
8893 * Returns INT_MAX if no existing attribute was found, the attribute
8894 * index if an attribute was found, -1 if a memory allocation failed.
8895 */
8896 static int
xmlAttrHashInsert(xmlParserCtxtPtr ctxt,unsigned size,const xmlChar * name,const xmlChar * uri,unsigned hashValue,int aindex)8897 xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8898 const xmlChar *uri, unsigned hashValue, int aindex) {
8899 xmlAttrHashBucket *table = ctxt->attrHash;
8900 xmlAttrHashBucket *bucket;
8901 unsigned hindex;
8902
8903 hindex = hashValue & (size - 1);
8904 bucket = &table[hindex];
8905
8906 while (bucket->index >= 0) {
8907 const xmlChar **atts = &ctxt->atts[bucket->index];
8908
8909 if (name == atts[0]) {
8910 int nsIndex = (int) (ptrdiff_t) atts[2];
8911
8912 if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8913 (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) :
8914 (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8915 return(bucket->index);
8916 }
8917
8918 hindex++;
8919 bucket++;
8920 if (hindex >= size) {
8921 hindex = 0;
8922 bucket = table;
8923 }
8924 }
8925
8926 bucket->index = aindex;
8927
8928 return(INT_MAX);
8929 }
8930
8931 /**
8932 * xmlParseStartTag2:
8933 * @ctxt: an XML parser context
8934 *
8935 * Parse a start tag. Always consumes '<'.
8936 *
8937 * This routine is called when running SAX2 parsing
8938 *
8939 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8940 *
8941 * [ WFC: Unique Att Spec ]
8942 * No attribute name may appear more than once in the same start-tag or
8943 * empty-element tag.
8944 *
8945 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8946 *
8947 * [ WFC: Unique Att Spec ]
8948 * No attribute name may appear more than once in the same start-tag or
8949 * empty-element tag.
8950 *
8951 * With namespace:
8952 *
8953 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8954 *
8955 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8956 *
8957 * Returns the element name parsed
8958 */
8959
8960 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * nbNsPtr)8961 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8962 const xmlChar **URI, int *nbNsPtr) {
8963 xmlHashedString hlocalname;
8964 xmlHashedString hprefix;
8965 xmlHashedString hattname;
8966 xmlHashedString haprefix;
8967 const xmlChar *localname;
8968 const xmlChar *prefix;
8969 const xmlChar *attname;
8970 const xmlChar *aprefix;
8971 const xmlChar *uri;
8972 xmlChar *attvalue = NULL;
8973 const xmlChar **atts = ctxt->atts;
8974 unsigned attrHashSize = 0;
8975 int maxatts = ctxt->maxatts;
8976 int nratts, nbatts, nbdef;
8977 int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8978 int alloc = 0;
8979
8980 if (RAW != '<') return(NULL);
8981 NEXT1;
8982
8983 nbatts = 0;
8984 nratts = 0;
8985 nbdef = 0;
8986 nbNs = 0;
8987 nbTotalDef = 0;
8988 attval = 0;
8989
8990 if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8991 xmlErrMemory(ctxt);
8992 return(NULL);
8993 }
8994
8995 hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8996 if (hlocalname.name == NULL) {
8997 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8998 "StartTag: invalid element name\n");
8999 return(NULL);
9000 }
9001 localname = hlocalname.name;
9002 prefix = hprefix.name;
9003
9004 /*
9005 * Now parse the attributes, it ends up with the ending
9006 *
9007 * (S Attribute)* S?
9008 */
9009 SKIP_BLANKS;
9010 GROW;
9011
9012 /*
9013 * The ctxt->atts array will be ultimately passed to the SAX callback
9014 * containing five xmlChar pointers for each attribute:
9015 *
9016 * [0] attribute name
9017 * [1] attribute prefix
9018 * [2] namespace URI
9019 * [3] attribute value
9020 * [4] end of attribute value
9021 *
9022 * To save memory, we reuse this array temporarily and store integers
9023 * in these pointer variables.
9024 *
9025 * [0] attribute name
9026 * [1] attribute prefix
9027 * [2] hash value of attribute prefix, and later namespace index
9028 * [3] for non-allocated values: ptrdiff_t offset into input buffer
9029 * [4] for non-allocated values: ptrdiff_t offset into input buffer
9030 *
9031 * The ctxt->attallocs array contains an additional unsigned int for
9032 * each attribute, containing the hash value of the attribute name
9033 * and the alloc flag in bit 31.
9034 */
9035
9036 while (((RAW != '>') &&
9037 ((RAW != '/') || (NXT(1) != '>')) &&
9038 (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9039 int len = -1;
9040
9041 hattname = xmlParseAttribute2(ctxt, prefix, localname,
9042 &haprefix, &attvalue, &len,
9043 &alloc);
9044 if (hattname.name == NULL)
9045 break;
9046 if (attvalue == NULL)
9047 goto next_attr;
9048 attname = hattname.name;
9049 aprefix = haprefix.name;
9050 if (len < 0) len = xmlStrlen(attvalue);
9051
9052 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9053 xmlHashedString huri;
9054 xmlURIPtr parsedUri;
9055
9056 huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9057 uri = huri.name;
9058 if (uri == NULL) {
9059 xmlErrMemory(ctxt);
9060 goto next_attr;
9061 }
9062 if (*uri != 0) {
9063 if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9064 xmlErrMemory(ctxt);
9065 goto next_attr;
9066 }
9067 if (parsedUri == NULL) {
9068 xmlNsErr(ctxt, XML_WAR_NS_URI,
9069 "xmlns: '%s' is not a valid URI\n",
9070 uri, NULL, NULL);
9071 } else {
9072 if (parsedUri->scheme == NULL) {
9073 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9074 "xmlns: URI %s is not absolute\n",
9075 uri, NULL, NULL);
9076 }
9077 xmlFreeURI(parsedUri);
9078 }
9079 if (uri == ctxt->str_xml_ns) {
9080 if (attname != ctxt->str_xml) {
9081 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9082 "xml namespace URI cannot be the default namespace\n",
9083 NULL, NULL, NULL);
9084 }
9085 goto next_attr;
9086 }
9087 if ((len == 29) &&
9088 (xmlStrEqual(uri,
9089 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9090 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9091 "reuse of the xmlns namespace name is forbidden\n",
9092 NULL, NULL, NULL);
9093 goto next_attr;
9094 }
9095 }
9096
9097 if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9098 nbNs++;
9099 } else if (aprefix == ctxt->str_xmlns) {
9100 xmlHashedString huri;
9101 xmlURIPtr parsedUri;
9102
9103 huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9104 uri = huri.name;
9105 if (uri == NULL) {
9106 xmlErrMemory(ctxt);
9107 goto next_attr;
9108 }
9109
9110 if (attname == ctxt->str_xml) {
9111 if (uri != ctxt->str_xml_ns) {
9112 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9113 "xml namespace prefix mapped to wrong URI\n",
9114 NULL, NULL, NULL);
9115 }
9116 /*
9117 * Do not keep a namespace definition node
9118 */
9119 goto next_attr;
9120 }
9121 if (uri == ctxt->str_xml_ns) {
9122 if (attname != ctxt->str_xml) {
9123 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9124 "xml namespace URI mapped to wrong prefix\n",
9125 NULL, NULL, NULL);
9126 }
9127 goto next_attr;
9128 }
9129 if (attname == ctxt->str_xmlns) {
9130 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9131 "redefinition of the xmlns prefix is forbidden\n",
9132 NULL, NULL, NULL);
9133 goto next_attr;
9134 }
9135 if ((len == 29) &&
9136 (xmlStrEqual(uri,
9137 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9138 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9139 "reuse of the xmlns namespace name is forbidden\n",
9140 NULL, NULL, NULL);
9141 goto next_attr;
9142 }
9143 if ((uri == NULL) || (uri[0] == 0)) {
9144 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9145 "xmlns:%s: Empty XML namespace is not allowed\n",
9146 attname, NULL, NULL);
9147 goto next_attr;
9148 } else {
9149 if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9150 xmlErrMemory(ctxt);
9151 goto next_attr;
9152 }
9153 if (parsedUri == NULL) {
9154 xmlNsErr(ctxt, XML_WAR_NS_URI,
9155 "xmlns:%s: '%s' is not a valid URI\n",
9156 attname, uri, NULL);
9157 } else {
9158 if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9159 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9160 "xmlns:%s: URI %s is not absolute\n",
9161 attname, uri, NULL);
9162 }
9163 xmlFreeURI(parsedUri);
9164 }
9165 }
9166
9167 if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9168 nbNs++;
9169 } else {
9170 /*
9171 * Populate attributes array, see above for repurposing
9172 * of xmlChar pointers.
9173 */
9174 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9175 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9176 goto next_attr;
9177 }
9178 maxatts = ctxt->maxatts;
9179 atts = ctxt->atts;
9180 }
9181 ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9182 ((unsigned) alloc << 31);
9183 atts[nbatts++] = attname;
9184 atts[nbatts++] = aprefix;
9185 atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9186 if (alloc) {
9187 atts[nbatts++] = attvalue;
9188 attvalue += len;
9189 atts[nbatts++] = attvalue;
9190 } else {
9191 /*
9192 * attvalue points into the input buffer which can be
9193 * reallocated. Store differences to input->base instead.
9194 * The pointers will be reconstructed later.
9195 */
9196 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9197 attvalue += len;
9198 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9199 }
9200 /*
9201 * tag if some deallocation is needed
9202 */
9203 if (alloc != 0) attval = 1;
9204 attvalue = NULL; /* moved into atts */
9205 }
9206
9207 next_attr:
9208 if ((attvalue != NULL) && (alloc != 0)) {
9209 xmlFree(attvalue);
9210 attvalue = NULL;
9211 }
9212
9213 GROW
9214 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9215 break;
9216 if (SKIP_BLANKS == 0) {
9217 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9218 "attributes construct error\n");
9219 break;
9220 }
9221 GROW;
9222 }
9223
9224 /*
9225 * Namespaces from default attributes
9226 */
9227 if (ctxt->attsDefault != NULL) {
9228 xmlDefAttrsPtr defaults;
9229
9230 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9231 if (defaults != NULL) {
9232 for (i = 0; i < defaults->nbAttrs; i++) {
9233 xmlDefAttr *attr = &defaults->attrs[i];
9234
9235 attname = attr->name.name;
9236 aprefix = attr->prefix.name;
9237
9238 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9239 xmlParserEntityCheck(ctxt, attr->expandedSize);
9240
9241 if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9242 nbNs++;
9243 } else if (aprefix == ctxt->str_xmlns) {
9244 xmlParserEntityCheck(ctxt, attr->expandedSize);
9245
9246 if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9247 NULL, 1) > 0)
9248 nbNs++;
9249 } else {
9250 nbTotalDef += 1;
9251 }
9252 }
9253 }
9254 }
9255
9256 /*
9257 * Resolve attribute namespaces
9258 */
9259 for (i = 0; i < nbatts; i += 5) {
9260 attname = atts[i];
9261 aprefix = atts[i+1];
9262
9263 /*
9264 * The default namespace does not apply to attribute names.
9265 */
9266 if (aprefix == NULL) {
9267 nsIndex = NS_INDEX_EMPTY;
9268 } else if (aprefix == ctxt->str_xml) {
9269 nsIndex = NS_INDEX_XML;
9270 } else {
9271 haprefix.name = aprefix;
9272 haprefix.hashValue = (size_t) atts[i+2];
9273 nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9274
9275 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9276 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9277 "Namespace prefix %s for %s on %s is not defined\n",
9278 aprefix, attname, localname);
9279 nsIndex = NS_INDEX_EMPTY;
9280 }
9281 }
9282
9283 atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9284 }
9285
9286 /*
9287 * Maximum number of attributes including default attributes.
9288 */
9289 maxAtts = nratts + nbTotalDef;
9290
9291 /*
9292 * Verify that attribute names are unique.
9293 */
9294 if (maxAtts > 1) {
9295 attrHashSize = 4;
9296 while (attrHashSize / 2 < (unsigned) maxAtts)
9297 attrHashSize *= 2;
9298
9299 if (attrHashSize > ctxt->attrHashMax) {
9300 xmlAttrHashBucket *tmp;
9301
9302 tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9303 if (tmp == NULL) {
9304 xmlErrMemory(ctxt);
9305 goto done;
9306 }
9307
9308 ctxt->attrHash = tmp;
9309 ctxt->attrHashMax = attrHashSize;
9310 }
9311
9312 memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9313
9314 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9315 const xmlChar *nsuri;
9316 unsigned hashValue, nameHashValue, uriHashValue;
9317 int res;
9318
9319 attname = atts[i];
9320 aprefix = atts[i+1];
9321 nsIndex = (ptrdiff_t) atts[i+2];
9322 /* Hash values always have bit 31 set, see dict.c */
9323 nameHashValue = ctxt->attallocs[j] | 0x80000000;
9324
9325 if (nsIndex == NS_INDEX_EMPTY) {
9326 /*
9327 * Prefix with empty namespace means an undeclared
9328 * prefix which was already reported above.
9329 */
9330 if (aprefix != NULL)
9331 continue;
9332 nsuri = NULL;
9333 uriHashValue = URI_HASH_EMPTY;
9334 } else if (nsIndex == NS_INDEX_XML) {
9335 nsuri = ctxt->str_xml_ns;
9336 uriHashValue = URI_HASH_XML;
9337 } else {
9338 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9339 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9340 }
9341
9342 hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9343 res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9344 hashValue, i);
9345 if (res < 0)
9346 continue;
9347
9348 /*
9349 * [ WFC: Unique Att Spec ]
9350 * No attribute name may appear more than once in the same
9351 * start-tag or empty-element tag.
9352 * As extended by the Namespace in XML REC.
9353 */
9354 if (res < INT_MAX) {
9355 if (aprefix == atts[res+1]) {
9356 xmlErrAttributeDup(ctxt, aprefix, attname);
9357 } else {
9358 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9359 "Namespaced Attribute %s in '%s' redefined\n",
9360 attname, nsuri, NULL);
9361 }
9362 }
9363 }
9364 }
9365
9366 /*
9367 * Default attributes
9368 */
9369 if (ctxt->attsDefault != NULL) {
9370 xmlDefAttrsPtr defaults;
9371
9372 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9373 if (defaults != NULL) {
9374 for (i = 0; i < defaults->nbAttrs; i++) {
9375 xmlDefAttr *attr = &defaults->attrs[i];
9376 const xmlChar *nsuri;
9377 unsigned hashValue, uriHashValue;
9378 int res;
9379
9380 attname = attr->name.name;
9381 aprefix = attr->prefix.name;
9382
9383 if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9384 continue;
9385 if (aprefix == ctxt->str_xmlns)
9386 continue;
9387
9388 if (aprefix == NULL) {
9389 nsIndex = NS_INDEX_EMPTY;
9390 nsuri = NULL;
9391 uriHashValue = URI_HASH_EMPTY;
9392 } if (aprefix == ctxt->str_xml) {
9393 nsIndex = NS_INDEX_XML;
9394 nsuri = ctxt->str_xml_ns;
9395 uriHashValue = URI_HASH_XML;
9396 } else if (aprefix != NULL) {
9397 nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9398 if ((nsIndex == INT_MAX) ||
9399 (nsIndex < ctxt->nsdb->minNsIndex)) {
9400 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9401 "Namespace prefix %s for %s on %s is not "
9402 "defined\n",
9403 aprefix, attname, localname);
9404 nsIndex = NS_INDEX_EMPTY;
9405 nsuri = NULL;
9406 uriHashValue = URI_HASH_EMPTY;
9407 } else {
9408 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9409 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9410 }
9411 }
9412
9413 /*
9414 * Check whether the attribute exists
9415 */
9416 if (maxAtts > 1) {
9417 hashValue = xmlDictCombineHash(attr->name.hashValue,
9418 uriHashValue);
9419 res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9420 hashValue, nbatts);
9421 if (res < 0)
9422 continue;
9423 if (res < INT_MAX) {
9424 if (aprefix == atts[res+1])
9425 continue;
9426 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9427 "Namespaced Attribute %s in '%s' redefined\n",
9428 attname, nsuri, NULL);
9429 }
9430 }
9431
9432 xmlParserEntityCheck(ctxt, attr->expandedSize);
9433
9434 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9435 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9436 localname = NULL;
9437 goto done;
9438 }
9439 maxatts = ctxt->maxatts;
9440 atts = ctxt->atts;
9441 }
9442
9443 atts[nbatts++] = attname;
9444 atts[nbatts++] = aprefix;
9445 atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
9446 atts[nbatts++] = attr->value.name;
9447 atts[nbatts++] = attr->valueEnd;
9448 if ((ctxt->standalone == 1) && (attr->external != 0)) {
9449 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9450 "standalone: attribute %s on %s defaulted "
9451 "from external subset\n",
9452 attname, localname);
9453 }
9454 nbdef++;
9455 }
9456 }
9457 }
9458
9459 /*
9460 * Reconstruct attribute pointers
9461 */
9462 for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9463 /* namespace URI */
9464 nsIndex = (ptrdiff_t) atts[i+2];
9465 if (nsIndex == INT_MAX)
9466 atts[i+2] = NULL;
9467 else if (nsIndex == INT_MAX - 1)
9468 atts[i+2] = ctxt->str_xml_ns;
9469 else
9470 atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9471
9472 if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9473 atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3]; /* value */
9474 atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4]; /* valuend */
9475 }
9476 }
9477
9478 uri = xmlParserNsLookupUri(ctxt, &hprefix);
9479 if ((prefix != NULL) && (uri == NULL)) {
9480 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9481 "Namespace prefix %s on %s is not defined\n",
9482 prefix, localname, NULL);
9483 }
9484 *pref = prefix;
9485 *URI = uri;
9486
9487 /*
9488 * SAX callback
9489 */
9490 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9491 (!ctxt->disableSAX)) {
9492 if (nbNs > 0)
9493 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9494 nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9495 nbatts / 5, nbdef, atts);
9496 else
9497 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9498 0, NULL, nbatts / 5, nbdef, atts);
9499 }
9500
9501 done:
9502 /*
9503 * Free allocated attribute values
9504 */
9505 if (attval != 0) {
9506 for (i = 0, j = 0; j < nratts; i += 5, j++)
9507 if (ctxt->attallocs[j] & 0x80000000)
9508 xmlFree((xmlChar *) atts[i+3]);
9509 }
9510
9511 *nbNsPtr = nbNs;
9512 return(localname);
9513 }
9514
9515 /**
9516 * xmlParseEndTag2:
9517 * @ctxt: an XML parser context
9518 * @line: line of the start tag
9519 * @nsNr: number of namespaces on the start tag
9520 *
9521 * Parse an end tag. Always consumes '</'.
9522 *
9523 * [42] ETag ::= '</' Name S? '>'
9524 *
9525 * With namespace
9526 *
9527 * [NS 9] ETag ::= '</' QName S? '>'
9528 */
9529
9530 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)9531 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9532 const xmlChar *name;
9533
9534 GROW;
9535 if ((RAW != '<') || (NXT(1) != '/')) {
9536 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9537 return;
9538 }
9539 SKIP(2);
9540
9541 if (tag->prefix == NULL)
9542 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9543 else
9544 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9545
9546 /*
9547 * We should definitely be at the ending "S? '>'" part
9548 */
9549 GROW;
9550 SKIP_BLANKS;
9551 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9552 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9553 } else
9554 NEXT1;
9555
9556 /*
9557 * [ WFC: Element Type Match ]
9558 * The Name in an element's end-tag must match the element type in the
9559 * start-tag.
9560 *
9561 */
9562 if (name != (xmlChar*)1) {
9563 if (name == NULL) name = BAD_CAST "unparsable";
9564 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9565 "Opening and ending tag mismatch: %s line %d and %s\n",
9566 ctxt->name, tag->line, name);
9567 }
9568
9569 /*
9570 * SAX: End of Tag
9571 */
9572 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9573 (!ctxt->disableSAX))
9574 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9575 tag->URI);
9576
9577 spacePop(ctxt);
9578 if (tag->nsNr != 0)
9579 xmlParserNsPop(ctxt, tag->nsNr);
9580 }
9581
9582 /**
9583 * xmlParseCDSect:
9584 * @ctxt: an XML parser context
9585 *
9586 * DEPRECATED: Internal function, don't use.
9587 *
9588 * Parse escaped pure raw content. Always consumes '<!['.
9589 *
9590 * [18] CDSect ::= CDStart CData CDEnd
9591 *
9592 * [19] CDStart ::= '<![CDATA['
9593 *
9594 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9595 *
9596 * [21] CDEnd ::= ']]>'
9597 */
9598 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9599 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9600 xmlChar *buf = NULL;
9601 int len = 0;
9602 int size = XML_PARSER_BUFFER_SIZE;
9603 int r, rl;
9604 int s, sl;
9605 int cur, l;
9606 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9607 XML_MAX_HUGE_LENGTH :
9608 XML_MAX_TEXT_LENGTH;
9609
9610 if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9611 return;
9612 SKIP(3);
9613
9614 if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9615 return;
9616 SKIP(6);
9617
9618 r = CUR_CHAR(rl);
9619 if (!IS_CHAR(r)) {
9620 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9621 goto out;
9622 }
9623 NEXTL(rl);
9624 s = CUR_CHAR(sl);
9625 if (!IS_CHAR(s)) {
9626 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9627 goto out;
9628 }
9629 NEXTL(sl);
9630 cur = CUR_CHAR(l);
9631 buf = (xmlChar *) xmlMallocAtomic(size);
9632 if (buf == NULL) {
9633 xmlErrMemory(ctxt);
9634 goto out;
9635 }
9636 while (IS_CHAR(cur) &&
9637 ((r != ']') || (s != ']') || (cur != '>'))) {
9638 if (len + 5 >= size) {
9639 xmlChar *tmp;
9640
9641 tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9642 if (tmp == NULL) {
9643 xmlErrMemory(ctxt);
9644 goto out;
9645 }
9646 buf = tmp;
9647 size *= 2;
9648 }
9649 COPY_BUF(buf, len, r);
9650 if (len > maxLength) {
9651 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9652 "CData section too big found\n");
9653 goto out;
9654 }
9655 r = s;
9656 rl = sl;
9657 s = cur;
9658 sl = l;
9659 NEXTL(l);
9660 cur = CUR_CHAR(l);
9661 }
9662 buf[len] = 0;
9663 if (cur != '>') {
9664 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9665 "CData section not finished\n%.50s\n", buf);
9666 goto out;
9667 }
9668 NEXTL(l);
9669
9670 /*
9671 * OK the buffer is to be consumed as cdata.
9672 */
9673 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9674 if (ctxt->sax->cdataBlock != NULL)
9675 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9676 else if (ctxt->sax->characters != NULL)
9677 ctxt->sax->characters(ctxt->userData, buf, len);
9678 }
9679
9680 out:
9681 xmlFree(buf);
9682 }
9683
9684 /**
9685 * xmlParseContentInternal:
9686 * @ctxt: an XML parser context
9687 *
9688 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9689 * unexpected EOF to the caller.
9690 */
9691
9692 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)9693 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9694 int oldNameNr = ctxt->nameNr;
9695 int oldSpaceNr = ctxt->spaceNr;
9696 int oldNodeNr = ctxt->nodeNr;
9697
9698 GROW;
9699 while ((ctxt->input->cur < ctxt->input->end) &&
9700 (PARSER_STOPPED(ctxt) == 0)) {
9701 const xmlChar *cur = ctxt->input->cur;
9702
9703 /*
9704 * First case : a Processing Instruction.
9705 */
9706 if ((*cur == '<') && (cur[1] == '?')) {
9707 xmlParsePI(ctxt);
9708 }
9709
9710 /*
9711 * Second case : a CDSection
9712 */
9713 /* 2.6.0 test was *cur not RAW */
9714 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9715 xmlParseCDSect(ctxt);
9716 }
9717
9718 /*
9719 * Third case : a comment
9720 */
9721 else if ((*cur == '<') && (NXT(1) == '!') &&
9722 (NXT(2) == '-') && (NXT(3) == '-')) {
9723 xmlParseComment(ctxt);
9724 }
9725
9726 /*
9727 * Fourth case : a sub-element.
9728 */
9729 else if (*cur == '<') {
9730 if (NXT(1) == '/') {
9731 if (ctxt->nameNr <= oldNameNr)
9732 break;
9733 xmlParseElementEnd(ctxt);
9734 } else {
9735 xmlParseElementStart(ctxt);
9736 }
9737 }
9738
9739 /*
9740 * Fifth case : a reference. If if has not been resolved,
9741 * parsing returns it's Name, create the node
9742 */
9743
9744 else if (*cur == '&') {
9745 xmlParseReference(ctxt);
9746 }
9747
9748 /*
9749 * Last case, text. Note that References are handled directly.
9750 */
9751 else {
9752 xmlParseCharDataInternal(ctxt, 0);
9753 }
9754
9755 SHRINK;
9756 GROW;
9757 }
9758
9759 if ((ctxt->nameNr > oldNameNr) &&
9760 (ctxt->input->cur >= ctxt->input->end) &&
9761 (ctxt->wellFormed)) {
9762 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9763 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9764 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9765 "Premature end of data in tag %s line %d\n",
9766 name, line, NULL);
9767 }
9768
9769 /*
9770 * Clean up in error case
9771 */
9772
9773 while (ctxt->nodeNr > oldNodeNr)
9774 nodePop(ctxt);
9775
9776 while (ctxt->nameNr > oldNameNr) {
9777 xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9778
9779 if (tag->nsNr != 0)
9780 xmlParserNsPop(ctxt, tag->nsNr);
9781
9782 namePop(ctxt);
9783 }
9784
9785 while (ctxt->spaceNr > oldSpaceNr)
9786 spacePop(ctxt);
9787 }
9788
9789 /**
9790 * xmlParseContent:
9791 * @ctxt: an XML parser context
9792 *
9793 * Parse XML element content. This is useful if you're only interested
9794 * in custom SAX callbacks. If you want a node list, use
9795 * xmlParseInNodeContext.
9796 */
9797 void
xmlParseContent(xmlParserCtxtPtr ctxt)9798 xmlParseContent(xmlParserCtxtPtr ctxt) {
9799 if ((ctxt == NULL) || (ctxt->input == NULL))
9800 return;
9801
9802 xmlCtxtInitializeLate(ctxt);
9803
9804 xmlParseContentInternal(ctxt);
9805
9806 if (ctxt->input->cur < ctxt->input->end)
9807 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9808 }
9809
9810 /**
9811 * xmlParseElement:
9812 * @ctxt: an XML parser context
9813 *
9814 * DEPRECATED: Internal function, don't use.
9815 *
9816 * parse an XML element
9817 *
9818 * [39] element ::= EmptyElemTag | STag content ETag
9819 *
9820 * [ WFC: Element Type Match ]
9821 * The Name in an element's end-tag must match the element type in the
9822 * start-tag.
9823 *
9824 */
9825
9826 void
xmlParseElement(xmlParserCtxtPtr ctxt)9827 xmlParseElement(xmlParserCtxtPtr ctxt) {
9828 if (xmlParseElementStart(ctxt) != 0)
9829 return;
9830
9831 xmlParseContentInternal(ctxt);
9832
9833 if (ctxt->input->cur >= ctxt->input->end) {
9834 if (ctxt->wellFormed) {
9835 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9836 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9837 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9838 "Premature end of data in tag %s line %d\n",
9839 name, line, NULL);
9840 }
9841 return;
9842 }
9843
9844 xmlParseElementEnd(ctxt);
9845 }
9846
9847 /**
9848 * xmlParseElementStart:
9849 * @ctxt: an XML parser context
9850 *
9851 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9852 * opening tag was parsed, 1 if an empty element was parsed.
9853 *
9854 * Always consumes '<'.
9855 */
9856 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)9857 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9858 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9859 const xmlChar *name;
9860 const xmlChar *prefix = NULL;
9861 const xmlChar *URI = NULL;
9862 xmlParserNodeInfo node_info;
9863 int line;
9864 xmlNodePtr cur;
9865 int nbNs = 0;
9866
9867 if (ctxt->nameNr > maxDepth) {
9868 xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9869 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9870 ctxt->nameNr);
9871 xmlHaltParser(ctxt);
9872 return(-1);
9873 }
9874
9875 /* Capture start position */
9876 if (ctxt->record_info) {
9877 node_info.begin_pos = ctxt->input->consumed +
9878 (CUR_PTR - ctxt->input->base);
9879 node_info.begin_line = ctxt->input->line;
9880 }
9881
9882 if (ctxt->spaceNr == 0)
9883 spacePush(ctxt, -1);
9884 else if (*ctxt->space == -2)
9885 spacePush(ctxt, -1);
9886 else
9887 spacePush(ctxt, *ctxt->space);
9888
9889 line = ctxt->input->line;
9890 #ifdef LIBXML_SAX1_ENABLED
9891 if (ctxt->sax2)
9892 #endif /* LIBXML_SAX1_ENABLED */
9893 name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9894 #ifdef LIBXML_SAX1_ENABLED
9895 else
9896 name = xmlParseStartTag(ctxt);
9897 #endif /* LIBXML_SAX1_ENABLED */
9898 if (name == NULL) {
9899 spacePop(ctxt);
9900 return(-1);
9901 }
9902 nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9903 cur = ctxt->node;
9904
9905 #ifdef LIBXML_VALID_ENABLED
9906 /*
9907 * [ VC: Root Element Type ]
9908 * The Name in the document type declaration must match the element
9909 * type of the root element.
9910 */
9911 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9912 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9913 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9914 #endif /* LIBXML_VALID_ENABLED */
9915
9916 /*
9917 * Check for an Empty Element.
9918 */
9919 if ((RAW == '/') && (NXT(1) == '>')) {
9920 SKIP(2);
9921 if (ctxt->sax2) {
9922 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9923 (!ctxt->disableSAX))
9924 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9925 #ifdef LIBXML_SAX1_ENABLED
9926 } else {
9927 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9928 (!ctxt->disableSAX))
9929 ctxt->sax->endElement(ctxt->userData, name);
9930 #endif /* LIBXML_SAX1_ENABLED */
9931 }
9932 namePop(ctxt);
9933 spacePop(ctxt);
9934 if (nbNs > 0)
9935 xmlParserNsPop(ctxt, nbNs);
9936 if (cur != NULL && ctxt->record_info) {
9937 node_info.node = cur;
9938 node_info.end_pos = ctxt->input->consumed +
9939 (CUR_PTR - ctxt->input->base);
9940 node_info.end_line = ctxt->input->line;
9941 xmlParserAddNodeInfo(ctxt, &node_info);
9942 }
9943 return(1);
9944 }
9945 if (RAW == '>') {
9946 NEXT1;
9947 if (cur != NULL && ctxt->record_info) {
9948 node_info.node = cur;
9949 node_info.end_pos = 0;
9950 node_info.end_line = 0;
9951 xmlParserAddNodeInfo(ctxt, &node_info);
9952 }
9953 } else {
9954 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9955 "Couldn't find end of Start Tag %s line %d\n",
9956 name, line, NULL);
9957
9958 /*
9959 * end of parsing of this node.
9960 */
9961 nodePop(ctxt);
9962 namePop(ctxt);
9963 spacePop(ctxt);
9964 if (nbNs > 0)
9965 xmlParserNsPop(ctxt, nbNs);
9966 return(-1);
9967 }
9968
9969 return(0);
9970 }
9971
9972 /**
9973 * xmlParseElementEnd:
9974 * @ctxt: an XML parser context
9975 *
9976 * Parse the end of an XML element. Always consumes '</'.
9977 */
9978 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)9979 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9980 xmlNodePtr cur = ctxt->node;
9981
9982 if (ctxt->nameNr <= 0) {
9983 if ((RAW == '<') && (NXT(1) == '/'))
9984 SKIP(2);
9985 return;
9986 }
9987
9988 /*
9989 * parse the end of tag: '</' should be here.
9990 */
9991 if (ctxt->sax2) {
9992 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9993 namePop(ctxt);
9994 }
9995 #ifdef LIBXML_SAX1_ENABLED
9996 else
9997 xmlParseEndTag1(ctxt, 0);
9998 #endif /* LIBXML_SAX1_ENABLED */
9999
10000 /*
10001 * Capture end position
10002 */
10003 if (cur != NULL && ctxt->record_info) {
10004 xmlParserNodeInfoPtr node_info;
10005
10006 node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10007 if (node_info != NULL) {
10008 node_info->end_pos = ctxt->input->consumed +
10009 (CUR_PTR - ctxt->input->base);
10010 node_info->end_line = ctxt->input->line;
10011 }
10012 }
10013 }
10014
10015 /**
10016 * xmlParseVersionNum:
10017 * @ctxt: an XML parser context
10018 *
10019 * DEPRECATED: Internal function, don't use.
10020 *
10021 * parse the XML version value.
10022 *
10023 * [26] VersionNum ::= '1.' [0-9]+
10024 *
10025 * In practice allow [0-9].[0-9]+ at that level
10026 *
10027 * Returns the string giving the XML version number, or NULL
10028 */
10029 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10030 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10031 xmlChar *buf = NULL;
10032 int len = 0;
10033 int size = 10;
10034 xmlChar cur;
10035
10036 buf = (xmlChar *) xmlMallocAtomic(size);
10037 if (buf == NULL) {
10038 xmlErrMemory(ctxt);
10039 return(NULL);
10040 }
10041 cur = CUR;
10042 if (!((cur >= '0') && (cur <= '9'))) {
10043 xmlFree(buf);
10044 return(NULL);
10045 }
10046 buf[len++] = cur;
10047 NEXT;
10048 cur=CUR;
10049 if (cur != '.') {
10050 xmlFree(buf);
10051 return(NULL);
10052 }
10053 buf[len++] = cur;
10054 NEXT;
10055 cur=CUR;
10056 while ((cur >= '0') && (cur <= '9')) {
10057 if (len + 1 >= size) {
10058 xmlChar *tmp;
10059
10060 size *= 2;
10061 tmp = (xmlChar *) xmlRealloc(buf, size);
10062 if (tmp == NULL) {
10063 xmlFree(buf);
10064 xmlErrMemory(ctxt);
10065 return(NULL);
10066 }
10067 buf = tmp;
10068 }
10069 buf[len++] = cur;
10070 NEXT;
10071 cur=CUR;
10072 }
10073 buf[len] = 0;
10074 return(buf);
10075 }
10076
10077 /**
10078 * xmlParseVersionInfo:
10079 * @ctxt: an XML parser context
10080 *
10081 * DEPRECATED: Internal function, don't use.
10082 *
10083 * parse the XML version.
10084 *
10085 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10086 *
10087 * [25] Eq ::= S? '=' S?
10088 *
10089 * Returns the version string, e.g. "1.0"
10090 */
10091
10092 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10093 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10094 xmlChar *version = NULL;
10095
10096 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10097 SKIP(7);
10098 SKIP_BLANKS;
10099 if (RAW != '=') {
10100 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10101 return(NULL);
10102 }
10103 NEXT;
10104 SKIP_BLANKS;
10105 if (RAW == '"') {
10106 NEXT;
10107 version = xmlParseVersionNum(ctxt);
10108 if (RAW != '"') {
10109 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10110 } else
10111 NEXT;
10112 } else if (RAW == '\''){
10113 NEXT;
10114 version = xmlParseVersionNum(ctxt);
10115 if (RAW != '\'') {
10116 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10117 } else
10118 NEXT;
10119 } else {
10120 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10121 }
10122 }
10123 return(version);
10124 }
10125
10126 /**
10127 * xmlParseEncName:
10128 * @ctxt: an XML parser context
10129 *
10130 * DEPRECATED: Internal function, don't use.
10131 *
10132 * parse the XML encoding name
10133 *
10134 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10135 *
10136 * Returns the encoding name value or NULL
10137 */
10138 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10139 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10140 xmlChar *buf = NULL;
10141 int len = 0;
10142 int size = 10;
10143 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10144 XML_MAX_TEXT_LENGTH :
10145 XML_MAX_NAME_LENGTH;
10146 xmlChar cur;
10147
10148 cur = CUR;
10149 if (((cur >= 'a') && (cur <= 'z')) ||
10150 ((cur >= 'A') && (cur <= 'Z'))) {
10151 buf = (xmlChar *) xmlMallocAtomic(size);
10152 if (buf == NULL) {
10153 xmlErrMemory(ctxt);
10154 return(NULL);
10155 }
10156
10157 buf[len++] = cur;
10158 NEXT;
10159 cur = CUR;
10160 while (((cur >= 'a') && (cur <= 'z')) ||
10161 ((cur >= 'A') && (cur <= 'Z')) ||
10162 ((cur >= '0') && (cur <= '9')) ||
10163 (cur == '.') || (cur == '_') ||
10164 (cur == '-')) {
10165 if (len + 1 >= size) {
10166 xmlChar *tmp;
10167
10168 size *= 2;
10169 tmp = (xmlChar *) xmlRealloc(buf, size);
10170 if (tmp == NULL) {
10171 xmlErrMemory(ctxt);
10172 xmlFree(buf);
10173 return(NULL);
10174 }
10175 buf = tmp;
10176 }
10177 buf[len++] = cur;
10178 if (len > maxLength) {
10179 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10180 xmlFree(buf);
10181 return(NULL);
10182 }
10183 NEXT;
10184 cur = CUR;
10185 }
10186 buf[len] = 0;
10187 } else {
10188 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10189 }
10190 return(buf);
10191 }
10192
10193 /**
10194 * xmlParseEncodingDecl:
10195 * @ctxt: an XML parser context
10196 *
10197 * DEPRECATED: Internal function, don't use.
10198 *
10199 * parse the XML encoding declaration
10200 *
10201 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10202 *
10203 * this setups the conversion filters.
10204 *
10205 * Returns the encoding value or NULL
10206 */
10207
10208 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10209 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10210 xmlChar *encoding = NULL;
10211
10212 SKIP_BLANKS;
10213 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10214 return(NULL);
10215
10216 SKIP(8);
10217 SKIP_BLANKS;
10218 if (RAW != '=') {
10219 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10220 return(NULL);
10221 }
10222 NEXT;
10223 SKIP_BLANKS;
10224 if (RAW == '"') {
10225 NEXT;
10226 encoding = xmlParseEncName(ctxt);
10227 if (RAW != '"') {
10228 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10229 xmlFree((xmlChar *) encoding);
10230 return(NULL);
10231 } else
10232 NEXT;
10233 } else if (RAW == '\''){
10234 NEXT;
10235 encoding = xmlParseEncName(ctxt);
10236 if (RAW != '\'') {
10237 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10238 xmlFree((xmlChar *) encoding);
10239 return(NULL);
10240 } else
10241 NEXT;
10242 } else {
10243 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10244 }
10245
10246 if (encoding == NULL)
10247 return(NULL);
10248
10249 xmlSetDeclaredEncoding(ctxt, encoding);
10250
10251 return(ctxt->encoding);
10252 }
10253
10254 /**
10255 * xmlParseSDDecl:
10256 * @ctxt: an XML parser context
10257 *
10258 * DEPRECATED: Internal function, don't use.
10259 *
10260 * parse the XML standalone declaration
10261 *
10262 * [32] SDDecl ::= S 'standalone' Eq
10263 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10264 *
10265 * [ VC: Standalone Document Declaration ]
10266 * TODO The standalone document declaration must have the value "no"
10267 * if any external markup declarations contain declarations of:
10268 * - attributes with default values, if elements to which these
10269 * attributes apply appear in the document without specifications
10270 * of values for these attributes, or
10271 * - entities (other than amp, lt, gt, apos, quot), if references
10272 * to those entities appear in the document, or
10273 * - attributes with values subject to normalization, where the
10274 * attribute appears in the document with a value which will change
10275 * as a result of normalization, or
10276 * - element types with element content, if white space occurs directly
10277 * within any instance of those types.
10278 *
10279 * Returns:
10280 * 1 if standalone="yes"
10281 * 0 if standalone="no"
10282 * -2 if standalone attribute is missing or invalid
10283 * (A standalone value of -2 means that the XML declaration was found,
10284 * but no value was specified for the standalone attribute).
10285 */
10286
10287 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10288 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10289 int standalone = -2;
10290
10291 SKIP_BLANKS;
10292 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10293 SKIP(10);
10294 SKIP_BLANKS;
10295 if (RAW != '=') {
10296 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10297 return(standalone);
10298 }
10299 NEXT;
10300 SKIP_BLANKS;
10301 if (RAW == '\''){
10302 NEXT;
10303 if ((RAW == 'n') && (NXT(1) == 'o')) {
10304 standalone = 0;
10305 SKIP(2);
10306 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10307 (NXT(2) == 's')) {
10308 standalone = 1;
10309 SKIP(3);
10310 } else {
10311 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10312 }
10313 if (RAW != '\'') {
10314 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10315 } else
10316 NEXT;
10317 } else if (RAW == '"'){
10318 NEXT;
10319 if ((RAW == 'n') && (NXT(1) == 'o')) {
10320 standalone = 0;
10321 SKIP(2);
10322 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10323 (NXT(2) == 's')) {
10324 standalone = 1;
10325 SKIP(3);
10326 } else {
10327 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10328 }
10329 if (RAW != '"') {
10330 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10331 } else
10332 NEXT;
10333 } else {
10334 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10335 }
10336 }
10337 return(standalone);
10338 }
10339
10340 /**
10341 * xmlParseXMLDecl:
10342 * @ctxt: an XML parser context
10343 *
10344 * DEPRECATED: Internal function, don't use.
10345 *
10346 * parse an XML declaration header
10347 *
10348 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10349 */
10350
10351 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10352 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10353 xmlChar *version;
10354
10355 /*
10356 * This value for standalone indicates that the document has an
10357 * XML declaration but it does not have a standalone attribute.
10358 * It will be overwritten later if a standalone attribute is found.
10359 */
10360
10361 ctxt->standalone = -2;
10362
10363 /*
10364 * We know that '<?xml' is here.
10365 */
10366 SKIP(5);
10367
10368 if (!IS_BLANK_CH(RAW)) {
10369 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10370 "Blank needed after '<?xml'\n");
10371 }
10372 SKIP_BLANKS;
10373
10374 /*
10375 * We must have the VersionInfo here.
10376 */
10377 version = xmlParseVersionInfo(ctxt);
10378 if (version == NULL) {
10379 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10380 } else {
10381 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10382 /*
10383 * Changed here for XML-1.0 5th edition
10384 */
10385 if (ctxt->options & XML_PARSE_OLD10) {
10386 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10387 "Unsupported version '%s'\n",
10388 version);
10389 } else {
10390 if ((version[0] == '1') && ((version[1] == '.'))) {
10391 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10392 "Unsupported version '%s'\n",
10393 version, NULL);
10394 } else {
10395 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10396 "Unsupported version '%s'\n",
10397 version);
10398 }
10399 }
10400 }
10401 if (ctxt->version != NULL)
10402 xmlFree((void *) ctxt->version);
10403 ctxt->version = version;
10404 }
10405
10406 /*
10407 * We may have the encoding declaration
10408 */
10409 if (!IS_BLANK_CH(RAW)) {
10410 if ((RAW == '?') && (NXT(1) == '>')) {
10411 SKIP(2);
10412 return;
10413 }
10414 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10415 }
10416 xmlParseEncodingDecl(ctxt);
10417
10418 /*
10419 * We may have the standalone status.
10420 */
10421 if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10422 if ((RAW == '?') && (NXT(1) == '>')) {
10423 SKIP(2);
10424 return;
10425 }
10426 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10427 }
10428
10429 /*
10430 * We can grow the input buffer freely at that point
10431 */
10432 GROW;
10433
10434 SKIP_BLANKS;
10435 ctxt->standalone = xmlParseSDDecl(ctxt);
10436
10437 SKIP_BLANKS;
10438 if ((RAW == '?') && (NXT(1) == '>')) {
10439 SKIP(2);
10440 } else if (RAW == '>') {
10441 /* Deprecated old WD ... */
10442 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10443 NEXT;
10444 } else {
10445 int c;
10446
10447 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10448 while ((PARSER_STOPPED(ctxt) == 0) &&
10449 ((c = CUR) != 0)) {
10450 NEXT;
10451 if (c == '>')
10452 break;
10453 }
10454 }
10455 }
10456
10457 /**
10458 * xmlParseMisc:
10459 * @ctxt: an XML parser context
10460 *
10461 * DEPRECATED: Internal function, don't use.
10462 *
10463 * parse an XML Misc* optional field.
10464 *
10465 * [27] Misc ::= Comment | PI | S
10466 */
10467
10468 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10469 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10470 while (PARSER_STOPPED(ctxt) == 0) {
10471 SKIP_BLANKS;
10472 GROW;
10473 if ((RAW == '<') && (NXT(1) == '?')) {
10474 xmlParsePI(ctxt);
10475 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10476 xmlParseComment(ctxt);
10477 } else {
10478 break;
10479 }
10480 }
10481 }
10482
10483 static void
xmlFinishDocument(xmlParserCtxtPtr ctxt)10484 xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10485 xmlDocPtr doc;
10486
10487 /*
10488 * SAX: end of the document processing.
10489 */
10490 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10491 ctxt->sax->endDocument(ctxt->userData);
10492
10493 doc = ctxt->myDoc;
10494 if (doc != NULL) {
10495 if (ctxt->wellFormed) {
10496 doc->properties |= XML_DOC_WELLFORMED;
10497 if (ctxt->valid)
10498 doc->properties |= XML_DOC_DTDVALID;
10499 if (ctxt->nsWellFormed)
10500 doc->properties |= XML_DOC_NSVALID;
10501 }
10502
10503 if (ctxt->options & XML_PARSE_OLD10)
10504 doc->properties |= XML_DOC_OLD10;
10505
10506 /*
10507 * Remove locally kept entity definitions if the tree was not built
10508 */
10509 if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10510 xmlFreeDoc(doc);
10511 ctxt->myDoc = NULL;
10512 }
10513 }
10514 }
10515
10516 /**
10517 * xmlParseDocument:
10518 * @ctxt: an XML parser context
10519 *
10520 * Parse an XML document and invoke the SAX handlers. This is useful
10521 * if you're only interested in custom SAX callbacks. If you want a
10522 * document tree, use xmlCtxtParseDocument.
10523 *
10524 * Returns 0, -1 in case of error.
10525 */
10526
10527 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10528 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10529 if ((ctxt == NULL) || (ctxt->input == NULL))
10530 return(-1);
10531
10532 GROW;
10533
10534 /*
10535 * SAX: detecting the level.
10536 */
10537 xmlCtxtInitializeLate(ctxt);
10538
10539 /*
10540 * Document locator is unused. Only for backward compatibility.
10541 */
10542 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10543 xmlSAXLocator copy = xmlDefaultSAXLocator;
10544 ctxt->sax->setDocumentLocator(ctxt->userData, ©);
10545 }
10546
10547 xmlDetectEncoding(ctxt);
10548
10549 if (CUR == 0) {
10550 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10551 return(-1);
10552 }
10553
10554 GROW;
10555 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10556
10557 /*
10558 * Note that we will switch encoding on the fly.
10559 */
10560 xmlParseXMLDecl(ctxt);
10561 SKIP_BLANKS;
10562 } else {
10563 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10564 if (ctxt->version == NULL) {
10565 xmlErrMemory(ctxt);
10566 return(-1);
10567 }
10568 }
10569 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10570 ctxt->sax->startDocument(ctxt->userData);
10571 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10572 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10573 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10574 }
10575
10576 /*
10577 * The Misc part of the Prolog
10578 */
10579 xmlParseMisc(ctxt);
10580
10581 /*
10582 * Then possibly doc type declaration(s) and more Misc
10583 * (doctypedecl Misc*)?
10584 */
10585 GROW;
10586 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10587
10588 ctxt->inSubset = 1;
10589 xmlParseDocTypeDecl(ctxt);
10590 if (RAW == '[') {
10591 xmlParseInternalSubset(ctxt);
10592 }
10593
10594 /*
10595 * Create and update the external subset.
10596 */
10597 ctxt->inSubset = 2;
10598 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10599 (!ctxt->disableSAX))
10600 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10601 ctxt->extSubSystem, ctxt->extSubURI);
10602 ctxt->inSubset = 0;
10603
10604 xmlCleanSpecialAttr(ctxt);
10605
10606 xmlParseMisc(ctxt);
10607 }
10608
10609 /*
10610 * Time to start parsing the tree itself
10611 */
10612 GROW;
10613 if (RAW != '<') {
10614 if (ctxt->wellFormed)
10615 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10616 "Start tag expected, '<' not found\n");
10617 } else {
10618 xmlParseElement(ctxt);
10619
10620 /*
10621 * The Misc part at the end
10622 */
10623 xmlParseMisc(ctxt);
10624
10625 if (ctxt->input->cur < ctxt->input->end) {
10626 if (ctxt->wellFormed)
10627 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10628 } else if ((ctxt->input->buf != NULL) &&
10629 (ctxt->input->buf->encoder != NULL) &&
10630 (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
10631 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
10632 "Truncated multi-byte sequence at EOF\n");
10633 }
10634 }
10635
10636 ctxt->instate = XML_PARSER_EOF;
10637 xmlFinishDocument(ctxt);
10638
10639 if (! ctxt->wellFormed) {
10640 ctxt->valid = 0;
10641 return(-1);
10642 }
10643
10644 return(0);
10645 }
10646
10647 /**
10648 * xmlParseExtParsedEnt:
10649 * @ctxt: an XML parser context
10650 *
10651 * parse a general parsed entity
10652 * An external general parsed entity is well-formed if it matches the
10653 * production labeled extParsedEnt.
10654 *
10655 * [78] extParsedEnt ::= TextDecl? content
10656 *
10657 * Returns 0, -1 in case of error. the parser context is augmented
10658 * as a result of the parsing.
10659 */
10660
10661 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10662 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10663 if ((ctxt == NULL) || (ctxt->input == NULL))
10664 return(-1);
10665
10666 xmlCtxtInitializeLate(ctxt);
10667
10668 /*
10669 * Document locator is unused. Only for backward compatibility.
10670 */
10671 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10672 xmlSAXLocator copy = xmlDefaultSAXLocator;
10673 ctxt->sax->setDocumentLocator(ctxt->userData, ©);
10674 }
10675
10676 xmlDetectEncoding(ctxt);
10677
10678 if (CUR == 0) {
10679 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10680 }
10681
10682 /*
10683 * Check for the XMLDecl in the Prolog.
10684 */
10685 GROW;
10686 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10687
10688 /*
10689 * Note that we will switch encoding on the fly.
10690 */
10691 xmlParseXMLDecl(ctxt);
10692 SKIP_BLANKS;
10693 } else {
10694 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10695 }
10696 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10697 ctxt->sax->startDocument(ctxt->userData);
10698
10699 /*
10700 * Doing validity checking on chunk doesn't make sense
10701 */
10702 ctxt->options &= ~XML_PARSE_DTDVALID;
10703 ctxt->validate = 0;
10704 ctxt->depth = 0;
10705
10706 xmlParseContentInternal(ctxt);
10707
10708 if (ctxt->input->cur < ctxt->input->end)
10709 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10710
10711 /*
10712 * SAX: end of the document processing.
10713 */
10714 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10715 ctxt->sax->endDocument(ctxt->userData);
10716
10717 if (! ctxt->wellFormed) return(-1);
10718 return(0);
10719 }
10720
10721 #ifdef LIBXML_PUSH_ENABLED
10722 /************************************************************************
10723 * *
10724 * Progressive parsing interfaces *
10725 * *
10726 ************************************************************************/
10727
10728 /**
10729 * xmlParseLookupChar:
10730 * @ctxt: an XML parser context
10731 * @c: character
10732 *
10733 * Check whether the input buffer contains a character.
10734 */
10735 static int
xmlParseLookupChar(xmlParserCtxtPtr ctxt,int c)10736 xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10737 const xmlChar *cur;
10738
10739 if (ctxt->checkIndex == 0) {
10740 cur = ctxt->input->cur + 1;
10741 } else {
10742 cur = ctxt->input->cur + ctxt->checkIndex;
10743 }
10744
10745 if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10746 size_t index = ctxt->input->end - ctxt->input->cur;
10747
10748 if (index > LONG_MAX) {
10749 ctxt->checkIndex = 0;
10750 return(1);
10751 }
10752 ctxt->checkIndex = index;
10753 return(0);
10754 } else {
10755 ctxt->checkIndex = 0;
10756 return(1);
10757 }
10758 }
10759
10760 /**
10761 * xmlParseLookupString:
10762 * @ctxt: an XML parser context
10763 * @startDelta: delta to apply at the start
10764 * @str: string
10765 * @strLen: length of string
10766 *
10767 * Check whether the input buffer contains a string.
10768 */
10769 static const xmlChar *
xmlParseLookupString(xmlParserCtxtPtr ctxt,size_t startDelta,const char * str,size_t strLen)10770 xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10771 const char *str, size_t strLen) {
10772 const xmlChar *cur, *term;
10773
10774 if (ctxt->checkIndex == 0) {
10775 cur = ctxt->input->cur + startDelta;
10776 } else {
10777 cur = ctxt->input->cur + ctxt->checkIndex;
10778 }
10779
10780 term = BAD_CAST strstr((const char *) cur, str);
10781 if (term == NULL) {
10782 const xmlChar *end = ctxt->input->end;
10783 size_t index;
10784
10785 /* Rescan (strLen - 1) characters. */
10786 if ((size_t) (end - cur) < strLen)
10787 end = cur;
10788 else
10789 end -= strLen - 1;
10790 index = end - ctxt->input->cur;
10791 if (index > LONG_MAX) {
10792 ctxt->checkIndex = 0;
10793 return(ctxt->input->end - strLen);
10794 }
10795 ctxt->checkIndex = index;
10796 } else {
10797 ctxt->checkIndex = 0;
10798 }
10799
10800 return(term);
10801 }
10802
10803 /**
10804 * xmlParseLookupCharData:
10805 * @ctxt: an XML parser context
10806 *
10807 * Check whether the input buffer contains terminated char data.
10808 */
10809 static int
xmlParseLookupCharData(xmlParserCtxtPtr ctxt)10810 xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10811 const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10812 const xmlChar *end = ctxt->input->end;
10813 size_t index;
10814
10815 while (cur < end) {
10816 if ((*cur == '<') || (*cur == '&')) {
10817 ctxt->checkIndex = 0;
10818 return(1);
10819 }
10820 cur++;
10821 }
10822
10823 index = cur - ctxt->input->cur;
10824 if (index > LONG_MAX) {
10825 ctxt->checkIndex = 0;
10826 return(1);
10827 }
10828 ctxt->checkIndex = index;
10829 return(0);
10830 }
10831
10832 /**
10833 * xmlParseLookupGt:
10834 * @ctxt: an XML parser context
10835 *
10836 * Check whether there's enough data in the input buffer to finish parsing
10837 * a start tag. This has to take quotes into account.
10838 */
10839 static int
xmlParseLookupGt(xmlParserCtxtPtr ctxt)10840 xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10841 const xmlChar *cur;
10842 const xmlChar *end = ctxt->input->end;
10843 int state = ctxt->endCheckState;
10844 size_t index;
10845
10846 if (ctxt->checkIndex == 0)
10847 cur = ctxt->input->cur + 1;
10848 else
10849 cur = ctxt->input->cur + ctxt->checkIndex;
10850
10851 while (cur < end) {
10852 if (state) {
10853 if (*cur == state)
10854 state = 0;
10855 } else if (*cur == '\'' || *cur == '"') {
10856 state = *cur;
10857 } else if (*cur == '>') {
10858 ctxt->checkIndex = 0;
10859 ctxt->endCheckState = 0;
10860 return(1);
10861 }
10862 cur++;
10863 }
10864
10865 index = cur - ctxt->input->cur;
10866 if (index > LONG_MAX) {
10867 ctxt->checkIndex = 0;
10868 ctxt->endCheckState = 0;
10869 return(1);
10870 }
10871 ctxt->checkIndex = index;
10872 ctxt->endCheckState = state;
10873 return(0);
10874 }
10875
10876 /**
10877 * xmlParseLookupInternalSubset:
10878 * @ctxt: an XML parser context
10879 *
10880 * Check whether there's enough data in the input buffer to finish parsing
10881 * the internal subset.
10882 */
10883 static int
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt)10884 xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10885 /*
10886 * Sorry, but progressive parsing of the internal subset is not
10887 * supported. We first check that the full content of the internal
10888 * subset is available and parsing is launched only at that point.
10889 * Internal subset ends with "']' S? '>'" in an unescaped section and
10890 * not in a ']]>' sequence which are conditional sections.
10891 */
10892 const xmlChar *cur, *start;
10893 const xmlChar *end = ctxt->input->end;
10894 int state = ctxt->endCheckState;
10895 size_t index;
10896
10897 if (ctxt->checkIndex == 0) {
10898 cur = ctxt->input->cur + 1;
10899 } else {
10900 cur = ctxt->input->cur + ctxt->checkIndex;
10901 }
10902 start = cur;
10903
10904 while (cur < end) {
10905 if (state == '-') {
10906 if ((*cur == '-') &&
10907 (cur[1] == '-') &&
10908 (cur[2] == '>')) {
10909 state = 0;
10910 cur += 3;
10911 start = cur;
10912 continue;
10913 }
10914 }
10915 else if (state == ']') {
10916 if (*cur == '>') {
10917 ctxt->checkIndex = 0;
10918 ctxt->endCheckState = 0;
10919 return(1);
10920 }
10921 if (IS_BLANK_CH(*cur)) {
10922 state = ' ';
10923 } else if (*cur != ']') {
10924 state = 0;
10925 start = cur;
10926 continue;
10927 }
10928 }
10929 else if (state == ' ') {
10930 if (*cur == '>') {
10931 ctxt->checkIndex = 0;
10932 ctxt->endCheckState = 0;
10933 return(1);
10934 }
10935 if (!IS_BLANK_CH(*cur)) {
10936 state = 0;
10937 start = cur;
10938 continue;
10939 }
10940 }
10941 else if (state != 0) {
10942 if (*cur == state) {
10943 state = 0;
10944 start = cur + 1;
10945 }
10946 }
10947 else if (*cur == '<') {
10948 if ((cur[1] == '!') &&
10949 (cur[2] == '-') &&
10950 (cur[3] == '-')) {
10951 state = '-';
10952 cur += 4;
10953 /* Don't treat <!--> as comment */
10954 start = cur;
10955 continue;
10956 }
10957 }
10958 else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10959 state = *cur;
10960 }
10961
10962 cur++;
10963 }
10964
10965 /*
10966 * Rescan the three last characters to detect "<!--" and "-->"
10967 * split across chunks.
10968 */
10969 if ((state == 0) || (state == '-')) {
10970 if (cur - start < 3)
10971 cur = start;
10972 else
10973 cur -= 3;
10974 }
10975 index = cur - ctxt->input->cur;
10976 if (index > LONG_MAX) {
10977 ctxt->checkIndex = 0;
10978 ctxt->endCheckState = 0;
10979 return(1);
10980 }
10981 ctxt->checkIndex = index;
10982 ctxt->endCheckState = state;
10983 return(0);
10984 }
10985
10986 /**
10987 * xmlCheckCdataPush:
10988 * @cur: pointer to the block of characters
10989 * @len: length of the block in bytes
10990 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
10991 *
10992 * Check that the block of characters is okay as SCdata content [20]
10993 *
10994 * Returns the number of bytes to pass if okay, a negative index where an
10995 * UTF-8 error occurred otherwise
10996 */
10997 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)10998 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
10999 int ix;
11000 unsigned char c;
11001 int codepoint;
11002
11003 if ((utf == NULL) || (len <= 0))
11004 return(0);
11005
11006 for (ix = 0; ix < len;) { /* string is 0-terminated */
11007 c = utf[ix];
11008 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11009 if (c >= 0x20)
11010 ix++;
11011 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11012 ix++;
11013 else
11014 return(-ix);
11015 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11016 if (ix + 2 > len) return(complete ? -ix : ix);
11017 if ((utf[ix+1] & 0xc0 ) != 0x80)
11018 return(-ix);
11019 codepoint = (utf[ix] & 0x1f) << 6;
11020 codepoint |= utf[ix+1] & 0x3f;
11021 if (!xmlIsCharQ(codepoint))
11022 return(-ix);
11023 ix += 2;
11024 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11025 if (ix + 3 > len) return(complete ? -ix : ix);
11026 if (((utf[ix+1] & 0xc0) != 0x80) ||
11027 ((utf[ix+2] & 0xc0) != 0x80))
11028 return(-ix);
11029 codepoint = (utf[ix] & 0xf) << 12;
11030 codepoint |= (utf[ix+1] & 0x3f) << 6;
11031 codepoint |= utf[ix+2] & 0x3f;
11032 if (!xmlIsCharQ(codepoint))
11033 return(-ix);
11034 ix += 3;
11035 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11036 if (ix + 4 > len) return(complete ? -ix : ix);
11037 if (((utf[ix+1] & 0xc0) != 0x80) ||
11038 ((utf[ix+2] & 0xc0) != 0x80) ||
11039 ((utf[ix+3] & 0xc0) != 0x80))
11040 return(-ix);
11041 codepoint = (utf[ix] & 0x7) << 18;
11042 codepoint |= (utf[ix+1] & 0x3f) << 12;
11043 codepoint |= (utf[ix+2] & 0x3f) << 6;
11044 codepoint |= utf[ix+3] & 0x3f;
11045 if (!xmlIsCharQ(codepoint))
11046 return(-ix);
11047 ix += 4;
11048 } else /* unknown encoding */
11049 return(-ix);
11050 }
11051 return(ix);
11052 }
11053
11054 /**
11055 * xmlParseTryOrFinish:
11056 * @ctxt: an XML parser context
11057 * @terminate: last chunk indicator
11058 *
11059 * Try to progress on parsing
11060 *
11061 * Returns zero if no parsing was possible
11062 */
11063 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11064 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11065 int ret = 0;
11066 size_t avail;
11067 xmlChar cur, next;
11068
11069 if (ctxt->input == NULL)
11070 return(0);
11071
11072 if ((ctxt->input != NULL) &&
11073 (ctxt->input->cur - ctxt->input->base > 4096)) {
11074 xmlParserShrink(ctxt);
11075 }
11076
11077 while (ctxt->disableSAX == 0) {
11078 avail = ctxt->input->end - ctxt->input->cur;
11079 if (avail < 1)
11080 goto done;
11081 switch (ctxt->instate) {
11082 case XML_PARSER_EOF:
11083 /*
11084 * Document parsing is done !
11085 */
11086 goto done;
11087 case XML_PARSER_START:
11088 /*
11089 * Very first chars read from the document flow.
11090 */
11091 if ((!terminate) && (avail < 4))
11092 goto done;
11093
11094 /*
11095 * We need more bytes to detect EBCDIC code pages.
11096 * See xmlDetectEBCDIC.
11097 */
11098 if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11099 (!terminate) && (avail < 200))
11100 goto done;
11101
11102 xmlDetectEncoding(ctxt);
11103 ctxt->instate = XML_PARSER_XML_DECL;
11104 break;
11105
11106 case XML_PARSER_XML_DECL:
11107 if ((!terminate) && (avail < 2))
11108 goto done;
11109 cur = ctxt->input->cur[0];
11110 next = ctxt->input->cur[1];
11111 if ((cur == '<') && (next == '?')) {
11112 /* PI or XML decl */
11113 if ((!terminate) &&
11114 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11115 goto done;
11116 if ((ctxt->input->cur[2] == 'x') &&
11117 (ctxt->input->cur[3] == 'm') &&
11118 (ctxt->input->cur[4] == 'l') &&
11119 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11120 ret += 5;
11121 xmlParseXMLDecl(ctxt);
11122 } else {
11123 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11124 if (ctxt->version == NULL) {
11125 xmlErrMemory(ctxt);
11126 break;
11127 }
11128 }
11129 } else {
11130 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11131 if (ctxt->version == NULL) {
11132 xmlErrMemory(ctxt);
11133 break;
11134 }
11135 }
11136 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11137 xmlSAXLocator copy = xmlDefaultSAXLocator;
11138 ctxt->sax->setDocumentLocator(ctxt->userData, ©);
11139 }
11140 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11141 (!ctxt->disableSAX))
11142 ctxt->sax->startDocument(ctxt->userData);
11143 ctxt->instate = XML_PARSER_MISC;
11144 break;
11145 case XML_PARSER_START_TAG: {
11146 const xmlChar *name;
11147 const xmlChar *prefix = NULL;
11148 const xmlChar *URI = NULL;
11149 int line = ctxt->input->line;
11150 int nbNs = 0;
11151
11152 if ((!terminate) && (avail < 2))
11153 goto done;
11154 cur = ctxt->input->cur[0];
11155 if (cur != '<') {
11156 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11157 "Start tag expected, '<' not found");
11158 ctxt->instate = XML_PARSER_EOF;
11159 xmlFinishDocument(ctxt);
11160 goto done;
11161 }
11162 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11163 goto done;
11164 if (ctxt->spaceNr == 0)
11165 spacePush(ctxt, -1);
11166 else if (*ctxt->space == -2)
11167 spacePush(ctxt, -1);
11168 else
11169 spacePush(ctxt, *ctxt->space);
11170 #ifdef LIBXML_SAX1_ENABLED
11171 if (ctxt->sax2)
11172 #endif /* LIBXML_SAX1_ENABLED */
11173 name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11174 #ifdef LIBXML_SAX1_ENABLED
11175 else
11176 name = xmlParseStartTag(ctxt);
11177 #endif /* LIBXML_SAX1_ENABLED */
11178 if (name == NULL) {
11179 spacePop(ctxt);
11180 ctxt->instate = XML_PARSER_EOF;
11181 xmlFinishDocument(ctxt);
11182 goto done;
11183 }
11184 #ifdef LIBXML_VALID_ENABLED
11185 /*
11186 * [ VC: Root Element Type ]
11187 * The Name in the document type declaration must match
11188 * the element type of the root element.
11189 */
11190 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11191 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11192 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11193 #endif /* LIBXML_VALID_ENABLED */
11194
11195 /*
11196 * Check for an Empty Element.
11197 */
11198 if ((RAW == '/') && (NXT(1) == '>')) {
11199 SKIP(2);
11200
11201 if (ctxt->sax2) {
11202 if ((ctxt->sax != NULL) &&
11203 (ctxt->sax->endElementNs != NULL) &&
11204 (!ctxt->disableSAX))
11205 ctxt->sax->endElementNs(ctxt->userData, name,
11206 prefix, URI);
11207 if (nbNs > 0)
11208 xmlParserNsPop(ctxt, nbNs);
11209 #ifdef LIBXML_SAX1_ENABLED
11210 } else {
11211 if ((ctxt->sax != NULL) &&
11212 (ctxt->sax->endElement != NULL) &&
11213 (!ctxt->disableSAX))
11214 ctxt->sax->endElement(ctxt->userData, name);
11215 #endif /* LIBXML_SAX1_ENABLED */
11216 }
11217 spacePop(ctxt);
11218 } else if (RAW == '>') {
11219 NEXT;
11220 nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11221 } else {
11222 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11223 "Couldn't find end of Start Tag %s\n",
11224 name);
11225 nodePop(ctxt);
11226 spacePop(ctxt);
11227 if (nbNs > 0)
11228 xmlParserNsPop(ctxt, nbNs);
11229 }
11230
11231 if (ctxt->nameNr == 0)
11232 ctxt->instate = XML_PARSER_EPILOG;
11233 else
11234 ctxt->instate = XML_PARSER_CONTENT;
11235 break;
11236 }
11237 case XML_PARSER_CONTENT: {
11238 cur = ctxt->input->cur[0];
11239
11240 if (cur == '<') {
11241 if ((!terminate) && (avail < 2))
11242 goto done;
11243 next = ctxt->input->cur[1];
11244
11245 if (next == '/') {
11246 ctxt->instate = XML_PARSER_END_TAG;
11247 break;
11248 } else if (next == '?') {
11249 if ((!terminate) &&
11250 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11251 goto done;
11252 xmlParsePI(ctxt);
11253 ctxt->instate = XML_PARSER_CONTENT;
11254 break;
11255 } else if (next == '!') {
11256 if ((!terminate) && (avail < 3))
11257 goto done;
11258 next = ctxt->input->cur[2];
11259
11260 if (next == '-') {
11261 if ((!terminate) && (avail < 4))
11262 goto done;
11263 if (ctxt->input->cur[3] == '-') {
11264 if ((!terminate) &&
11265 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11266 goto done;
11267 xmlParseComment(ctxt);
11268 ctxt->instate = XML_PARSER_CONTENT;
11269 break;
11270 }
11271 } else if (next == '[') {
11272 if ((!terminate) && (avail < 9))
11273 goto done;
11274 if ((ctxt->input->cur[2] == '[') &&
11275 (ctxt->input->cur[3] == 'C') &&
11276 (ctxt->input->cur[4] == 'D') &&
11277 (ctxt->input->cur[5] == 'A') &&
11278 (ctxt->input->cur[6] == 'T') &&
11279 (ctxt->input->cur[7] == 'A') &&
11280 (ctxt->input->cur[8] == '[')) {
11281 SKIP(9);
11282 ctxt->instate = XML_PARSER_CDATA_SECTION;
11283 break;
11284 }
11285 }
11286 }
11287 } else if (cur == '&') {
11288 if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11289 goto done;
11290 xmlParseReference(ctxt);
11291 break;
11292 } else {
11293 /* TODO Avoid the extra copy, handle directly !!! */
11294 /*
11295 * Goal of the following test is:
11296 * - minimize calls to the SAX 'character' callback
11297 * when they are mergeable
11298 * - handle an problem for isBlank when we only parse
11299 * a sequence of blank chars and the next one is
11300 * not available to check against '<' presence.
11301 * - tries to homogenize the differences in SAX
11302 * callbacks between the push and pull versions
11303 * of the parser.
11304 */
11305 if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11306 if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11307 goto done;
11308 }
11309 ctxt->checkIndex = 0;
11310 xmlParseCharDataInternal(ctxt, !terminate);
11311 break;
11312 }
11313
11314 ctxt->instate = XML_PARSER_START_TAG;
11315 break;
11316 }
11317 case XML_PARSER_END_TAG:
11318 if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11319 goto done;
11320 if (ctxt->sax2) {
11321 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11322 nameNsPop(ctxt);
11323 }
11324 #ifdef LIBXML_SAX1_ENABLED
11325 else
11326 xmlParseEndTag1(ctxt, 0);
11327 #endif /* LIBXML_SAX1_ENABLED */
11328 if (ctxt->nameNr == 0) {
11329 ctxt->instate = XML_PARSER_EPILOG;
11330 } else {
11331 ctxt->instate = XML_PARSER_CONTENT;
11332 }
11333 break;
11334 case XML_PARSER_CDATA_SECTION: {
11335 /*
11336 * The Push mode need to have the SAX callback for
11337 * cdataBlock merge back contiguous callbacks.
11338 */
11339 const xmlChar *term;
11340
11341 if (terminate) {
11342 /*
11343 * Don't call xmlParseLookupString. If 'terminate'
11344 * is set, checkIndex is invalid.
11345 */
11346 term = BAD_CAST strstr((const char *) ctxt->input->cur,
11347 "]]>");
11348 } else {
11349 term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11350 }
11351
11352 if (term == NULL) {
11353 int tmp, size;
11354
11355 if (terminate) {
11356 /* Unfinished CDATA section */
11357 size = ctxt->input->end - ctxt->input->cur;
11358 } else {
11359 if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11360 goto done;
11361 ctxt->checkIndex = 0;
11362 /* XXX: Why don't we pass the full buffer? */
11363 size = XML_PARSER_BIG_BUFFER_SIZE;
11364 }
11365 tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11366 if (tmp <= 0) {
11367 tmp = -tmp;
11368 ctxt->input->cur += tmp;
11369 goto encoding_error;
11370 }
11371 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11372 if (ctxt->sax->cdataBlock != NULL)
11373 ctxt->sax->cdataBlock(ctxt->userData,
11374 ctxt->input->cur, tmp);
11375 else if (ctxt->sax->characters != NULL)
11376 ctxt->sax->characters(ctxt->userData,
11377 ctxt->input->cur, tmp);
11378 }
11379 SKIPL(tmp);
11380 } else {
11381 int base = term - CUR_PTR;
11382 int tmp;
11383
11384 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11385 if ((tmp < 0) || (tmp != base)) {
11386 tmp = -tmp;
11387 ctxt->input->cur += tmp;
11388 goto encoding_error;
11389 }
11390 if ((ctxt->sax != NULL) && (base == 0) &&
11391 (ctxt->sax->cdataBlock != NULL) &&
11392 (!ctxt->disableSAX)) {
11393 /*
11394 * Special case to provide identical behaviour
11395 * between pull and push parsers on enpty CDATA
11396 * sections
11397 */
11398 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11399 (!strncmp((const char *)&ctxt->input->cur[-9],
11400 "<![CDATA[", 9)))
11401 ctxt->sax->cdataBlock(ctxt->userData,
11402 BAD_CAST "", 0);
11403 } else if ((ctxt->sax != NULL) && (base > 0) &&
11404 (!ctxt->disableSAX)) {
11405 if (ctxt->sax->cdataBlock != NULL)
11406 ctxt->sax->cdataBlock(ctxt->userData,
11407 ctxt->input->cur, base);
11408 else if (ctxt->sax->characters != NULL)
11409 ctxt->sax->characters(ctxt->userData,
11410 ctxt->input->cur, base);
11411 }
11412 SKIPL(base + 3);
11413 ctxt->instate = XML_PARSER_CONTENT;
11414 }
11415 break;
11416 }
11417 case XML_PARSER_MISC:
11418 case XML_PARSER_PROLOG:
11419 case XML_PARSER_EPILOG:
11420 SKIP_BLANKS;
11421 avail = ctxt->input->end - ctxt->input->cur;
11422 if (avail < 1)
11423 goto done;
11424 if (ctxt->input->cur[0] == '<') {
11425 if ((!terminate) && (avail < 2))
11426 goto done;
11427 next = ctxt->input->cur[1];
11428 if (next == '?') {
11429 if ((!terminate) &&
11430 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11431 goto done;
11432 xmlParsePI(ctxt);
11433 break;
11434 } else if (next == '!') {
11435 if ((!terminate) && (avail < 3))
11436 goto done;
11437
11438 if (ctxt->input->cur[2] == '-') {
11439 if ((!terminate) && (avail < 4))
11440 goto done;
11441 if (ctxt->input->cur[3] == '-') {
11442 if ((!terminate) &&
11443 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11444 goto done;
11445 xmlParseComment(ctxt);
11446 break;
11447 }
11448 } else if (ctxt->instate == XML_PARSER_MISC) {
11449 if ((!terminate) && (avail < 9))
11450 goto done;
11451 if ((ctxt->input->cur[2] == 'D') &&
11452 (ctxt->input->cur[3] == 'O') &&
11453 (ctxt->input->cur[4] == 'C') &&
11454 (ctxt->input->cur[5] == 'T') &&
11455 (ctxt->input->cur[6] == 'Y') &&
11456 (ctxt->input->cur[7] == 'P') &&
11457 (ctxt->input->cur[8] == 'E')) {
11458 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11459 goto done;
11460 ctxt->inSubset = 1;
11461 xmlParseDocTypeDecl(ctxt);
11462 if (RAW == '[') {
11463 ctxt->instate = XML_PARSER_DTD;
11464 } else {
11465 /*
11466 * Create and update the external subset.
11467 */
11468 ctxt->inSubset = 2;
11469 if ((ctxt->sax != NULL) &&
11470 (!ctxt->disableSAX) &&
11471 (ctxt->sax->externalSubset != NULL))
11472 ctxt->sax->externalSubset(
11473 ctxt->userData,
11474 ctxt->intSubName,
11475 ctxt->extSubSystem,
11476 ctxt->extSubURI);
11477 ctxt->inSubset = 0;
11478 xmlCleanSpecialAttr(ctxt);
11479 ctxt->instate = XML_PARSER_PROLOG;
11480 }
11481 break;
11482 }
11483 }
11484 }
11485 }
11486
11487 if (ctxt->instate == XML_PARSER_EPILOG) {
11488 if (ctxt->errNo == XML_ERR_OK)
11489 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11490 ctxt->instate = XML_PARSER_EOF;
11491 xmlFinishDocument(ctxt);
11492 } else {
11493 ctxt->instate = XML_PARSER_START_TAG;
11494 }
11495 break;
11496 case XML_PARSER_DTD: {
11497 if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11498 goto done;
11499 xmlParseInternalSubset(ctxt);
11500 ctxt->inSubset = 2;
11501 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11502 (ctxt->sax->externalSubset != NULL))
11503 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11504 ctxt->extSubSystem, ctxt->extSubURI);
11505 ctxt->inSubset = 0;
11506 xmlCleanSpecialAttr(ctxt);
11507 ctxt->instate = XML_PARSER_PROLOG;
11508 break;
11509 }
11510 default:
11511 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11512 "PP: internal error\n");
11513 ctxt->instate = XML_PARSER_EOF;
11514 break;
11515 }
11516 }
11517 done:
11518 return(ret);
11519 encoding_error:
11520 /* Only report the first error */
11521 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
11522 xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
11523 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
11524 }
11525 return(0);
11526 }
11527
11528 /**
11529 * xmlParseChunk:
11530 * @ctxt: an XML parser context
11531 * @chunk: chunk of memory
11532 * @size: size of chunk in bytes
11533 * @terminate: last chunk indicator
11534 *
11535 * Parse a chunk of memory in push parser mode.
11536 *
11537 * Assumes that the parser context was initialized with
11538 * xmlCreatePushParserCtxt.
11539 *
11540 * The last chunk, which will often be empty, must be marked with
11541 * the @terminate flag. With the default SAX callbacks, the resulting
11542 * document will be available in ctxt->myDoc. This pointer will not
11543 * be freed by the library.
11544 *
11545 * If the document isn't well-formed, ctxt->myDoc is set to NULL.
11546 * The push parser doesn't support recovery mode.
11547 *
11548 * Returns an xmlParserErrors code (0 on success).
11549 */
11550 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)11551 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11552 int terminate) {
11553 size_t curBase;
11554 size_t maxLength;
11555 int end_in_lf = 0;
11556
11557 if ((ctxt == NULL) || (size < 0))
11558 return(XML_ERR_ARGUMENT);
11559 if (ctxt->disableSAX != 0)
11560 return(ctxt->errNo);
11561 if (ctxt->input == NULL)
11562 return(XML_ERR_INTERNAL_ERROR);
11563
11564 ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11565 if (ctxt->instate == XML_PARSER_START)
11566 xmlCtxtInitializeLate(ctxt);
11567 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11568 (chunk[size - 1] == '\r')) {
11569 end_in_lf = 1;
11570 size--;
11571 }
11572
11573 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11574 (ctxt->input->buf != NULL)) {
11575 size_t pos = ctxt->input->cur - ctxt->input->base;
11576 int res;
11577
11578 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11579 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11580 if (res < 0) {
11581 xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11582 xmlHaltParser(ctxt);
11583 return(ctxt->errNo);
11584 }
11585 }
11586
11587 xmlParseTryOrFinish(ctxt, terminate);
11588
11589 curBase = ctxt->input->cur - ctxt->input->base;
11590 maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11591 XML_MAX_HUGE_LENGTH :
11592 XML_MAX_LOOKUP_LIMIT;
11593 if (curBase > maxLength) {
11594 xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11595 "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11596 xmlHaltParser(ctxt);
11597 }
11598
11599 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11600 return(ctxt->errNo);
11601
11602 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11603 (ctxt->input->buf != NULL)) {
11604 size_t pos = ctxt->input->cur - ctxt->input->base;
11605 int res;
11606
11607 res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11608 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11609 if (res < 0) {
11610 xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11611 xmlHaltParser(ctxt);
11612 return(ctxt->errNo);
11613 }
11614 }
11615 if (terminate) {
11616 /*
11617 * Check for termination
11618 */
11619 if ((ctxt->instate != XML_PARSER_EOF) &&
11620 (ctxt->instate != XML_PARSER_EPILOG)) {
11621 if (ctxt->nameNr > 0) {
11622 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11623 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11624 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11625 "Premature end of data in tag %s line %d\n",
11626 name, line, NULL);
11627 } else if (ctxt->instate == XML_PARSER_START) {
11628 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11629 } else {
11630 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11631 "Start tag expected, '<' not found\n");
11632 }
11633 } else if ((ctxt->input->buf != NULL) &&
11634 (ctxt->input->buf->encoder != NULL) &&
11635 (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11636 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11637 "Truncated multi-byte sequence at EOF\n");
11638 }
11639 if (ctxt->instate != XML_PARSER_EOF) {
11640 ctxt->instate = XML_PARSER_EOF;
11641 xmlFinishDocument(ctxt);
11642 }
11643 }
11644 if (ctxt->wellFormed == 0)
11645 return((xmlParserErrors) ctxt->errNo);
11646 else
11647 return(0);
11648 }
11649
11650 /************************************************************************
11651 * *
11652 * I/O front end functions to the parser *
11653 * *
11654 ************************************************************************/
11655
11656 /**
11657 * xmlCreatePushParserCtxt:
11658 * @sax: a SAX handler (optional)
11659 * @user_data: user data for SAX callbacks (optional)
11660 * @chunk: initial chunk (optional, deprecated)
11661 * @size: size of initial chunk in bytes
11662 * @filename: file name or URI (optional)
11663 *
11664 * Create a parser context for using the XML parser in push mode.
11665 * See xmlParseChunk.
11666 *
11667 * Passing an initial chunk is useless and deprecated.
11668 *
11669 * @filename is used as base URI to fetch external entities and for
11670 * error reports.
11671 *
11672 * Returns the new parser context or NULL in case of error.
11673 */
11674
11675 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)11676 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11677 const char *chunk, int size, const char *filename) {
11678 xmlParserCtxtPtr ctxt;
11679 xmlParserInputPtr input;
11680
11681 ctxt = xmlNewSAXParserCtxt(sax, user_data);
11682 if (ctxt == NULL)
11683 return(NULL);
11684
11685 ctxt->options &= ~XML_PARSE_NODICT;
11686 ctxt->dictNames = 1;
11687
11688 input = xmlNewInputPush(ctxt, filename, chunk, size, NULL);
11689 if (input == NULL) {
11690 xmlFreeParserCtxt(ctxt);
11691 return(NULL);
11692 }
11693 inputPush(ctxt, input);
11694
11695 return(ctxt);
11696 }
11697 #endif /* LIBXML_PUSH_ENABLED */
11698
11699 /**
11700 * xmlStopParser:
11701 * @ctxt: an XML parser context
11702 *
11703 * Blocks further parser processing
11704 */
11705 void
xmlStopParser(xmlParserCtxtPtr ctxt)11706 xmlStopParser(xmlParserCtxtPtr ctxt) {
11707 if (ctxt == NULL)
11708 return;
11709 xmlHaltParser(ctxt);
11710 if (ctxt->errNo != XML_ERR_NO_MEMORY)
11711 ctxt->errNo = XML_ERR_USER_STOP;
11712 }
11713
11714 /**
11715 * xmlCreateIOParserCtxt:
11716 * @sax: a SAX handler (optional)
11717 * @user_data: user data for SAX callbacks (optional)
11718 * @ioread: an I/O read function
11719 * @ioclose: an I/O close function (optional)
11720 * @ioctx: an I/O handler
11721 * @enc: the charset encoding if known (deprecated)
11722 *
11723 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadIO.
11724 *
11725 * Create a parser context for using the XML parser with an existing
11726 * I/O stream
11727 *
11728 * Returns the new parser context or NULL
11729 */
11730 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)11731 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11732 xmlInputReadCallback ioread,
11733 xmlInputCloseCallback ioclose,
11734 void *ioctx, xmlCharEncoding enc) {
11735 xmlParserCtxtPtr ctxt;
11736 xmlParserInputPtr input;
11737 const char *encoding;
11738
11739 ctxt = xmlNewSAXParserCtxt(sax, user_data);
11740 if (ctxt == NULL)
11741 return(NULL);
11742
11743 encoding = xmlGetCharEncodingName(enc);
11744 input = xmlNewInputIO(ctxt, NULL, ioread, ioclose, ioctx, encoding, 0);
11745 if (input == NULL) {
11746 xmlFreeParserCtxt(ctxt);
11747 return (NULL);
11748 }
11749 inputPush(ctxt, input);
11750
11751 return(ctxt);
11752 }
11753
11754 #ifdef LIBXML_VALID_ENABLED
11755 /************************************************************************
11756 * *
11757 * Front ends when parsing a DTD *
11758 * *
11759 ************************************************************************/
11760
11761 /**
11762 * xmlIOParseDTD:
11763 * @sax: the SAX handler block or NULL
11764 * @input: an Input Buffer
11765 * @enc: the charset encoding if known
11766 *
11767 * Load and parse a DTD
11768 *
11769 * Returns the resulting xmlDtdPtr or NULL in case of error.
11770 * @input will be freed by the function in any case.
11771 */
11772
11773 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)11774 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11775 xmlCharEncoding enc) {
11776 xmlDtdPtr ret = NULL;
11777 xmlParserCtxtPtr ctxt;
11778 xmlParserInputPtr pinput = NULL;
11779
11780 if (input == NULL)
11781 return(NULL);
11782
11783 ctxt = xmlNewSAXParserCtxt(sax, NULL);
11784 if (ctxt == NULL) {
11785 xmlFreeParserInputBuffer(input);
11786 return(NULL);
11787 }
11788
11789 /*
11790 * generate a parser input from the I/O handler
11791 */
11792
11793 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11794 if (pinput == NULL) {
11795 xmlFreeParserInputBuffer(input);
11796 xmlFreeParserCtxt(ctxt);
11797 return(NULL);
11798 }
11799
11800 /*
11801 * plug some encoding conversion routines here.
11802 */
11803 if (xmlPushInput(ctxt, pinput) < 0) {
11804 xmlFreeParserCtxt(ctxt);
11805 return(NULL);
11806 }
11807 if (enc != XML_CHAR_ENCODING_NONE) {
11808 xmlSwitchEncoding(ctxt, enc);
11809 }
11810
11811 /*
11812 * let's parse that entity knowing it's an external subset.
11813 */
11814 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11815 if (ctxt->myDoc == NULL) {
11816 xmlErrMemory(ctxt);
11817 return(NULL);
11818 }
11819 ctxt->myDoc->properties = XML_DOC_INTERNAL;
11820 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11821 BAD_CAST "none", BAD_CAST "none");
11822
11823 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11824
11825 if (ctxt->myDoc != NULL) {
11826 if (ctxt->wellFormed) {
11827 ret = ctxt->myDoc->extSubset;
11828 ctxt->myDoc->extSubset = NULL;
11829 if (ret != NULL) {
11830 xmlNodePtr tmp;
11831
11832 ret->doc = NULL;
11833 tmp = ret->children;
11834 while (tmp != NULL) {
11835 tmp->doc = NULL;
11836 tmp = tmp->next;
11837 }
11838 }
11839 } else {
11840 ret = NULL;
11841 }
11842 xmlFreeDoc(ctxt->myDoc);
11843 ctxt->myDoc = NULL;
11844 }
11845 xmlFreeParserCtxt(ctxt);
11846
11847 return(ret);
11848 }
11849
11850 /**
11851 * xmlSAXParseDTD:
11852 * @sax: the SAX handler block
11853 * @ExternalID: a NAME* containing the External ID of the DTD
11854 * @SystemID: a NAME* containing the URL to the DTD
11855 *
11856 * DEPRECATED: Don't use.
11857 *
11858 * Load and parse an external subset.
11859 *
11860 * Returns the resulting xmlDtdPtr or NULL in case of error.
11861 */
11862
11863 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)11864 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11865 const xmlChar *SystemID) {
11866 xmlDtdPtr ret = NULL;
11867 xmlParserCtxtPtr ctxt;
11868 xmlParserInputPtr input = NULL;
11869 xmlChar* systemIdCanonic;
11870
11871 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11872
11873 ctxt = xmlNewSAXParserCtxt(sax, NULL);
11874 if (ctxt == NULL) {
11875 return(NULL);
11876 }
11877
11878 /*
11879 * Canonicalise the system ID
11880 */
11881 systemIdCanonic = xmlCanonicPath(SystemID);
11882 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11883 xmlFreeParserCtxt(ctxt);
11884 return(NULL);
11885 }
11886
11887 /*
11888 * Ask the Entity resolver to load the damn thing
11889 */
11890
11891 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11892 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11893 systemIdCanonic);
11894 if (input == NULL) {
11895 xmlFreeParserCtxt(ctxt);
11896 if (systemIdCanonic != NULL)
11897 xmlFree(systemIdCanonic);
11898 return(NULL);
11899 }
11900
11901 /*
11902 * plug some encoding conversion routines here.
11903 */
11904 if (xmlPushInput(ctxt, input) < 0) {
11905 xmlFreeParserCtxt(ctxt);
11906 if (systemIdCanonic != NULL)
11907 xmlFree(systemIdCanonic);
11908 return(NULL);
11909 }
11910
11911 xmlDetectEncoding(ctxt);
11912
11913 if (input->filename == NULL)
11914 input->filename = (char *) systemIdCanonic;
11915 else
11916 xmlFree(systemIdCanonic);
11917
11918 /*
11919 * let's parse that entity knowing it's an external subset.
11920 */
11921 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11922 if (ctxt->myDoc == NULL) {
11923 xmlErrMemory(ctxt);
11924 xmlFreeParserCtxt(ctxt);
11925 return(NULL);
11926 }
11927 ctxt->myDoc->properties = XML_DOC_INTERNAL;
11928 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11929 ExternalID, SystemID);
11930 if (ctxt->myDoc->extSubset == NULL) {
11931 xmlFreeDoc(ctxt->myDoc);
11932 xmlFreeParserCtxt(ctxt);
11933 return(NULL);
11934 }
11935 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11936
11937 if (ctxt->myDoc != NULL) {
11938 if (ctxt->wellFormed) {
11939 ret = ctxt->myDoc->extSubset;
11940 ctxt->myDoc->extSubset = NULL;
11941 if (ret != NULL) {
11942 xmlNodePtr tmp;
11943
11944 ret->doc = NULL;
11945 tmp = ret->children;
11946 while (tmp != NULL) {
11947 tmp->doc = NULL;
11948 tmp = tmp->next;
11949 }
11950 }
11951 } else {
11952 ret = NULL;
11953 }
11954 xmlFreeDoc(ctxt->myDoc);
11955 ctxt->myDoc = NULL;
11956 }
11957 xmlFreeParserCtxt(ctxt);
11958
11959 return(ret);
11960 }
11961
11962
11963 /**
11964 * xmlParseDTD:
11965 * @ExternalID: a NAME* containing the External ID of the DTD
11966 * @SystemID: a NAME* containing the URL to the DTD
11967 *
11968 * Load and parse an external subset.
11969 *
11970 * Returns the resulting xmlDtdPtr or NULL in case of error.
11971 */
11972
11973 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)11974 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11975 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11976 }
11977 #endif /* LIBXML_VALID_ENABLED */
11978
11979 /************************************************************************
11980 * *
11981 * Front ends when parsing an Entity *
11982 * *
11983 ************************************************************************/
11984
11985 static xmlNodePtr
xmlCtxtParseContent(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,int hasTextDecl,int buildTree)11986 xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11987 int hasTextDecl, int buildTree) {
11988 xmlNodePtr root = NULL;
11989 xmlNodePtr list = NULL;
11990 xmlChar *rootName = BAD_CAST "#root";
11991 int result;
11992
11993 if (buildTree) {
11994 root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11995 if (root == NULL) {
11996 xmlErrMemory(ctxt);
11997 goto error;
11998 }
11999 }
12000
12001 if (xmlPushInput(ctxt, input) < 0)
12002 goto error;
12003
12004 nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
12005 spacePush(ctxt, -1);
12006
12007 if (buildTree)
12008 nodePush(ctxt, root);
12009
12010 if (hasTextDecl) {
12011 xmlDetectEncoding(ctxt);
12012
12013 /*
12014 * Parse a possible text declaration first
12015 */
12016 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
12017 (IS_BLANK_CH(NXT(5)))) {
12018 xmlParseTextDecl(ctxt);
12019 /*
12020 * An XML-1.0 document can't reference an entity not XML-1.0
12021 */
12022 if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
12023 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12024 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12025 "Version mismatch between document and "
12026 "entity\n");
12027 }
12028 }
12029 }
12030
12031 xmlParseContentInternal(ctxt);
12032
12033 if (ctxt->input->cur < ctxt->input->end)
12034 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12035
12036 if ((ctxt->wellFormed) ||
12037 ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12038 if (root != NULL) {
12039 xmlNodePtr cur;
12040
12041 /*
12042 * Return the newly created nodeset after unlinking it from
12043 * its pseudo parent.
12044 */
12045 cur = root->children;
12046 list = cur;
12047 while (cur != NULL) {
12048 cur->parent = NULL;
12049 cur = cur->next;
12050 }
12051 root->children = NULL;
12052 root->last = NULL;
12053 }
12054 }
12055
12056 /*
12057 * Read the rest of the stream in case of errors. We want
12058 * to account for the whole entity size.
12059 */
12060 do {
12061 ctxt->input->cur = ctxt->input->end;
12062 xmlParserShrink(ctxt);
12063 result = xmlParserGrow(ctxt);
12064 } while (result > 0);
12065
12066 if (buildTree)
12067 nodePop(ctxt);
12068
12069 namePop(ctxt);
12070 spacePop(ctxt);
12071
12072 /* xmlPopInput would free the stream */
12073 inputPop(ctxt);
12074
12075 error:
12076 xmlFreeNode(root);
12077
12078 return(list);
12079 }
12080
12081 static void
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt,xmlEntityPtr ent)12082 xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12083 xmlParserInputPtr input;
12084 xmlNodePtr list;
12085 unsigned long consumed;
12086 int isExternal;
12087 int buildTree;
12088 int oldMinNsIndex;
12089 int oldNodelen, oldNodemem;
12090
12091 isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12092 buildTree = (ctxt->node != NULL);
12093
12094 /*
12095 * Recursion check
12096 */
12097 if (ent->flags & XML_ENT_EXPANDING) {
12098 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12099 xmlHaltParser(ctxt);
12100 goto error;
12101 }
12102
12103 /*
12104 * Load entity
12105 */
12106 input = xmlNewEntityInputStream(ctxt, ent);
12107 if (input == NULL)
12108 goto error;
12109
12110 /*
12111 * When building a tree, we need to limit the scope of namespace
12112 * declarations, so that entities don't reference xmlNs structs
12113 * from the parent of a reference.
12114 */
12115 oldMinNsIndex = ctxt->nsdb->minNsIndex;
12116 if (buildTree)
12117 ctxt->nsdb->minNsIndex = ctxt->nsNr;
12118
12119 oldNodelen = ctxt->nodelen;
12120 oldNodemem = ctxt->nodemem;
12121 ctxt->nodelen = 0;
12122 ctxt->nodemem = 0;
12123
12124 /*
12125 * Parse content
12126 *
12127 * This initiates a recursive call chain:
12128 *
12129 * - xmlCtxtParseContent
12130 * - xmlParseContentInternal
12131 * - xmlParseReference
12132 * - xmlCtxtParseEntity
12133 *
12134 * The nesting depth is limited by the maximum number of inputs,
12135 * see xmlPushInput.
12136 *
12137 * It's possible to make this non-recursive (minNsIndex must be
12138 * stored in the input struct) at the expense of code readability.
12139 */
12140
12141 ent->flags |= XML_ENT_EXPANDING;
12142
12143 list = xmlCtxtParseContent(ctxt, input, isExternal, buildTree);
12144
12145 ent->flags &= ~XML_ENT_EXPANDING;
12146
12147 ctxt->nsdb->minNsIndex = oldMinNsIndex;
12148 ctxt->nodelen = oldNodelen;
12149 ctxt->nodemem = oldNodemem;
12150
12151 /*
12152 * Entity size accounting
12153 */
12154 consumed = input->consumed;
12155 xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12156
12157 if ((ent->flags & XML_ENT_CHECKED) == 0)
12158 xmlSaturatedAdd(&ent->expandedSize, consumed);
12159
12160 if ((ent->flags & XML_ENT_PARSED) == 0) {
12161 if (isExternal)
12162 xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12163
12164 ent->children = list;
12165
12166 while (list != NULL) {
12167 list->parent = (xmlNodePtr) ent;
12168 if (list->next == NULL)
12169 ent->last = list;
12170 list = list->next;
12171 }
12172 } else {
12173 xmlFreeNodeList(list);
12174 }
12175
12176 xmlFreeInputStream(input);
12177
12178 error:
12179 ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12180 }
12181
12182 /**
12183 * xmlParseCtxtExternalEntity:
12184 * @ctx: the existing parsing context
12185 * @URL: the URL for the entity to load
12186 * @ID: the System ID for the entity to load
12187 * @lst: the return value for the set of parsed nodes
12188 *
12189 * Parse an external general entity within an existing parsing context
12190 * An external general parsed entity is well-formed if it matches the
12191 * production labeled extParsedEnt.
12192 *
12193 * [78] extParsedEnt ::= TextDecl? content
12194 *
12195 * Returns 0 if the entity is well formed, -1 in case of args problem and
12196 * the parser error code otherwise
12197 */
12198
12199 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * listOut)12200 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12201 const xmlChar *ID, xmlNodePtr *listOut) {
12202 xmlParserInputPtr input;
12203 xmlNodePtr list;
12204
12205 if (listOut != NULL)
12206 *listOut = NULL;
12207
12208 if (ctxt == NULL)
12209 return(XML_ERR_ARGUMENT);
12210
12211 input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12212 if (input == NULL)
12213 return(ctxt->errNo);
12214
12215 xmlCtxtInitializeLate(ctxt);
12216
12217 list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 1, 1);
12218 if (*listOut != NULL)
12219 *listOut = list;
12220 else
12221 xmlFreeNodeList(list);
12222
12223 xmlFreeInputStream(input);
12224 return(ctxt->errNo);
12225 }
12226
12227 #ifdef LIBXML_SAX1_ENABLED
12228 /**
12229 * xmlParseExternalEntity:
12230 * @doc: the document the chunk pertains to
12231 * @sax: the SAX handler block (possibly NULL)
12232 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12233 * @depth: Used for loop detection, use 0
12234 * @URL: the URL for the entity to load
12235 * @ID: the System ID for the entity to load
12236 * @lst: the return value for the set of parsed nodes
12237 *
12238 * Parse an external general entity
12239 * An external general parsed entity is well-formed if it matches the
12240 * production labeled extParsedEnt.
12241 *
12242 * [78] extParsedEnt ::= TextDecl? content
12243 *
12244 * Returns 0 if the entity is well formed, -1 in case of args problem and
12245 * the parser error code otherwise
12246 */
12247
12248 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12249 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12250 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12251 xmlParserCtxtPtr ctxt;
12252 int ret;
12253
12254 if (list != NULL)
12255 *list = NULL;
12256
12257 if (doc == NULL)
12258 return(XML_ERR_ARGUMENT);
12259
12260 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12261 if (ctxt == NULL)
12262 return(XML_ERR_NO_MEMORY);
12263
12264 ctxt->depth = depth;
12265 ctxt->myDoc = doc;
12266 ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12267
12268 xmlFreeParserCtxt(ctxt);
12269 return(ret);
12270 }
12271
12272 /**
12273 * xmlParseBalancedChunkMemory:
12274 * @doc: the document the chunk pertains to (must not be NULL)
12275 * @sax: the SAX handler block (possibly NULL)
12276 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12277 * @depth: Used for loop detection, use 0
12278 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12279 * @lst: the return value for the set of parsed nodes
12280 *
12281 * Parse a well-balanced chunk of an XML document
12282 * called by the parser
12283 * The allowed sequence for the Well Balanced Chunk is the one defined by
12284 * the content production in the XML grammar:
12285 *
12286 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12287 *
12288 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12289 * the parser error code otherwise
12290 */
12291
12292 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12293 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12294 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12295 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12296 depth, string, lst, 0 );
12297 }
12298 #endif /* LIBXML_SAX1_ENABLED */
12299
12300 /**
12301 * xmlParseInNodeContext:
12302 * @node: the context node
12303 * @data: the input string
12304 * @datalen: the input string length in bytes
12305 * @options: a combination of xmlParserOption
12306 * @lst: the return value for the set of parsed nodes
12307 *
12308 * Parse a well-balanced chunk of an XML document
12309 * within the context (DTD, namespaces, etc ...) of the given node.
12310 *
12311 * The allowed sequence for the data is a Well Balanced Chunk defined by
12312 * the content production in the XML grammar:
12313 *
12314 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12315 *
12316 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12317 * error code otherwise
12318 */
12319 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)12320 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12321 int options, xmlNodePtr *lst) {
12322 xmlParserCtxtPtr ctxt;
12323 xmlDocPtr doc = NULL;
12324 xmlNodePtr fake, cur;
12325 int nsnr = 0;
12326
12327 xmlParserErrors ret = XML_ERR_OK;
12328
12329 /*
12330 * check all input parameters, grab the document
12331 */
12332 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12333 return(XML_ERR_ARGUMENT);
12334 switch (node->type) {
12335 case XML_ELEMENT_NODE:
12336 case XML_ATTRIBUTE_NODE:
12337 case XML_TEXT_NODE:
12338 case XML_CDATA_SECTION_NODE:
12339 case XML_ENTITY_REF_NODE:
12340 case XML_PI_NODE:
12341 case XML_COMMENT_NODE:
12342 case XML_DOCUMENT_NODE:
12343 case XML_HTML_DOCUMENT_NODE:
12344 break;
12345 default:
12346 return(XML_ERR_INTERNAL_ERROR);
12347
12348 }
12349 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12350 (node->type != XML_DOCUMENT_NODE) &&
12351 (node->type != XML_HTML_DOCUMENT_NODE))
12352 node = node->parent;
12353 if (node == NULL)
12354 return(XML_ERR_INTERNAL_ERROR);
12355 if (node->type == XML_ELEMENT_NODE)
12356 doc = node->doc;
12357 else
12358 doc = (xmlDocPtr) node;
12359 if (doc == NULL)
12360 return(XML_ERR_INTERNAL_ERROR);
12361
12362 /*
12363 * allocate a context and set-up everything not related to the
12364 * node position in the tree
12365 */
12366 if (doc->type == XML_DOCUMENT_NODE)
12367 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12368 #ifdef LIBXML_HTML_ENABLED
12369 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
12370 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12371 /*
12372 * When parsing in context, it makes no sense to add implied
12373 * elements like html/body/etc...
12374 */
12375 options |= HTML_PARSE_NOIMPLIED;
12376 }
12377 #endif
12378 else
12379 return(XML_ERR_INTERNAL_ERROR);
12380
12381 if (ctxt == NULL)
12382 return(XML_ERR_NO_MEMORY);
12383
12384 /*
12385 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12386 * We need a dictionary for xmlCtxtInitializeLate, so if there's no doc dict
12387 * we must wait until the last moment to free the original one.
12388 */
12389 if (doc->dict != NULL) {
12390 if (ctxt->dict != NULL)
12391 xmlDictFree(ctxt->dict);
12392 ctxt->dict = doc->dict;
12393 } else {
12394 options |= XML_PARSE_NODICT;
12395 ctxt->dictNames = 0;
12396 }
12397
12398 if (doc->encoding != NULL)
12399 xmlSwitchEncodingName(ctxt, (const char *) doc->encoding);
12400
12401 xmlCtxtUseOptions(ctxt, options);
12402 xmlCtxtInitializeLate(ctxt);
12403 ctxt->myDoc = doc;
12404 /* parsing in context, i.e. as within existing content */
12405 ctxt->input_id = 2;
12406
12407 /*
12408 * TODO: Use xmlCtxtParseContent
12409 */
12410
12411 fake = xmlNewDocComment(node->doc, NULL);
12412 if (fake == NULL) {
12413 xmlFreeParserCtxt(ctxt);
12414 return(XML_ERR_NO_MEMORY);
12415 }
12416 xmlAddChild(node, fake);
12417
12418 if (node->type == XML_ELEMENT_NODE)
12419 nodePush(ctxt, node);
12420
12421 if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) {
12422 /*
12423 * initialize the SAX2 namespaces stack
12424 */
12425 cur = node;
12426 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12427 xmlNsPtr ns = cur->nsDef;
12428 xmlHashedString hprefix, huri;
12429
12430 while (ns != NULL) {
12431 hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12432 huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12433 if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12434 nsnr++;
12435 ns = ns->next;
12436 }
12437 cur = cur->parent;
12438 }
12439 }
12440
12441 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12442 /*
12443 * ID/IDREF registration will be done in xmlValidateElement below
12444 */
12445 ctxt->loadsubset |= XML_SKIP_IDS;
12446 }
12447
12448 #ifdef LIBXML_HTML_ENABLED
12449 if (doc->type == XML_HTML_DOCUMENT_NODE)
12450 __htmlParseContent(ctxt);
12451 else
12452 #endif
12453 xmlParseContentInternal(ctxt);
12454
12455 if (ctxt->input->cur < ctxt->input->end)
12456 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12457
12458 xmlParserNsPop(ctxt, nsnr);
12459
12460 if ((ctxt->wellFormed) ||
12461 ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12462 ret = XML_ERR_OK;
12463 } else {
12464 ret = (xmlParserErrors) ctxt->errNo;
12465 }
12466
12467 /*
12468 * Return the newly created nodeset after unlinking it from
12469 * the pseudo sibling.
12470 */
12471
12472 cur = fake->next;
12473 fake->next = NULL;
12474 node->last = fake;
12475
12476 if (cur != NULL) {
12477 cur->prev = NULL;
12478 }
12479
12480 *lst = cur;
12481
12482 while (cur != NULL) {
12483 cur->parent = NULL;
12484 cur = cur->next;
12485 }
12486
12487 xmlUnlinkNode(fake);
12488 xmlFreeNode(fake);
12489
12490
12491 if (ret != XML_ERR_OK) {
12492 xmlFreeNodeList(*lst);
12493 *lst = NULL;
12494 }
12495
12496 if (doc->dict != NULL)
12497 ctxt->dict = NULL;
12498 xmlFreeParserCtxt(ctxt);
12499
12500 return(ret);
12501 }
12502
12503 #ifdef LIBXML_SAX1_ENABLED
12504 /**
12505 * xmlParseBalancedChunkMemoryRecover:
12506 * @doc: the document the chunk pertains to (must not be NULL)
12507 * @sax: the SAX handler block (possibly NULL)
12508 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12509 * @depth: Used for loop detection, use 0
12510 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12511 * @list: the return value for the set of parsed nodes
12512 * @recover: return nodes even if the data is broken (use 0)
12513 *
12514 * Parse a well-balanced chunk of an XML document
12515 *
12516 * The allowed sequence for the Well Balanced Chunk is the one defined by
12517 * the content production in the XML grammar:
12518 *
12519 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12520 *
12521 * Returns 0 if the chunk is well balanced, or thehe parser error code
12522 * otherwise.
12523 *
12524 * In case recover is set to 1, the nodelist will not be empty even if
12525 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12526 * some extent.
12527 */
12528 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * listOut,int recover)12529 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12530 void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12531 int recover) {
12532 xmlParserCtxtPtr ctxt;
12533 xmlParserInputPtr input;
12534 xmlNodePtr list;
12535 int ret;
12536
12537 if (listOut != NULL)
12538 *listOut = NULL;
12539
12540 if (string == NULL)
12541 return(XML_ERR_ARGUMENT);
12542
12543 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12544 if (ctxt == NULL)
12545 return(XML_ERR_NO_MEMORY);
12546
12547 xmlCtxtInitializeLate(ctxt);
12548
12549 ctxt->depth = depth;
12550 ctxt->myDoc = doc;
12551 if (recover) {
12552 ctxt->options |= XML_PARSE_RECOVER;
12553 ctxt->recovery = 1;
12554 }
12555
12556 input = xmlNewStringInputStream(ctxt, string);
12557 if (input == NULL)
12558 return(ctxt->errNo);
12559
12560 list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 0, 1);
12561 if (listOut != NULL)
12562 *listOut = list;
12563 else
12564 xmlFreeNodeList(list);
12565
12566 ret = ctxt->errNo;
12567
12568 xmlFreeInputStream(input);
12569 xmlFreeParserCtxt(ctxt);
12570 return(ret);
12571 }
12572
12573 /**
12574 * xmlSAXParseEntity:
12575 * @sax: the SAX handler block
12576 * @filename: the filename
12577 *
12578 * DEPRECATED: Don't use.
12579 *
12580 * parse an XML external entity out of context and build a tree.
12581 * It use the given SAX function block to handle the parsing callback.
12582 * If sax is NULL, fallback to the default DOM tree building routines.
12583 *
12584 * [78] extParsedEnt ::= TextDecl? content
12585 *
12586 * This correspond to a "Well Balanced" chunk
12587 *
12588 * Returns the resulting document tree
12589 */
12590
12591 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)12592 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12593 xmlDocPtr ret;
12594 xmlParserCtxtPtr ctxt;
12595
12596 ctxt = xmlCreateFileParserCtxt(filename);
12597 if (ctxt == NULL) {
12598 return(NULL);
12599 }
12600 if (sax != NULL) {
12601 if (sax->initialized == XML_SAX2_MAGIC) {
12602 *ctxt->sax = *sax;
12603 } else {
12604 memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12605 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12606 }
12607 ctxt->userData = NULL;
12608 }
12609
12610 xmlParseExtParsedEnt(ctxt);
12611
12612 if (ctxt->wellFormed) {
12613 ret = ctxt->myDoc;
12614 } else {
12615 ret = NULL;
12616 xmlFreeDoc(ctxt->myDoc);
12617 }
12618
12619 xmlFreeParserCtxt(ctxt);
12620
12621 return(ret);
12622 }
12623
12624 /**
12625 * xmlParseEntity:
12626 * @filename: the filename
12627 *
12628 * parse an XML external entity out of context and build a tree.
12629 *
12630 * [78] extParsedEnt ::= TextDecl? content
12631 *
12632 * This correspond to a "Well Balanced" chunk
12633 *
12634 * Returns the resulting document tree
12635 */
12636
12637 xmlDocPtr
xmlParseEntity(const char * filename)12638 xmlParseEntity(const char *filename) {
12639 return(xmlSAXParseEntity(NULL, filename));
12640 }
12641 #endif /* LIBXML_SAX1_ENABLED */
12642
12643 /**
12644 * xmlCreateEntityParserCtxt:
12645 * @URL: the entity URL
12646 * @ID: the entity PUBLIC ID
12647 * @base: a possible base for the target URI
12648 *
12649 * DEPRECATED: Use xmlNewInputURL.
12650 *
12651 * Create a parser context for an external entity
12652 * Automatic support for ZLIB/Compress compressed document is provided
12653 * by default if found at compile-time.
12654 *
12655 * Returns the new parser context or NULL
12656 */
12657 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)12658 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12659 const xmlChar *base) {
12660 xmlParserCtxtPtr ctxt;
12661 xmlParserInputPtr input;
12662 xmlChar *uri = NULL;
12663
12664 ctxt = xmlNewParserCtxt();
12665 if (ctxt == NULL)
12666 return(NULL);
12667
12668 if (base != NULL) {
12669 if (xmlBuildURISafe(URL, base, &uri) < 0)
12670 goto error;
12671 if (uri != NULL)
12672 URL = uri;
12673 }
12674
12675 input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12676 if (input == NULL)
12677 goto error;
12678
12679 if (inputPush(ctxt, input) < 0)
12680 goto error;
12681
12682 xmlFree(uri);
12683 return(ctxt);
12684
12685 error:
12686 xmlFree(uri);
12687 xmlFreeParserCtxt(ctxt);
12688 return(NULL);
12689 }
12690
12691 /************************************************************************
12692 * *
12693 * Front ends when parsing from a file *
12694 * *
12695 ************************************************************************/
12696
12697 /**
12698 * xmlCreateURLParserCtxt:
12699 * @filename: the filename or URL
12700 * @options: a combination of xmlParserOption
12701 *
12702 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12703 *
12704 * Create a parser context for a file or URL content.
12705 * Automatic support for ZLIB/Compress compressed document is provided
12706 * by default if found at compile-time and for file accesses
12707 *
12708 * Returns the new parser context or NULL
12709 */
12710 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)12711 xmlCreateURLParserCtxt(const char *filename, int options)
12712 {
12713 xmlParserCtxtPtr ctxt;
12714 xmlParserInputPtr input;
12715
12716 ctxt = xmlNewParserCtxt();
12717 if (ctxt == NULL)
12718 return(NULL);
12719
12720 xmlCtxtUseOptions(ctxt, options);
12721 ctxt->linenumbers = 1;
12722
12723 input = xmlLoadExternalEntity(filename, NULL, ctxt);
12724 if (input == NULL) {
12725 xmlFreeParserCtxt(ctxt);
12726 return(NULL);
12727 }
12728 inputPush(ctxt, input);
12729
12730 return(ctxt);
12731 }
12732
12733 /**
12734 * xmlCreateFileParserCtxt:
12735 * @filename: the filename
12736 *
12737 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12738 *
12739 * Create a parser context for a file content.
12740 * Automatic support for ZLIB/Compress compressed document is provided
12741 * by default if found at compile-time.
12742 *
12743 * Returns the new parser context or NULL
12744 */
12745 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)12746 xmlCreateFileParserCtxt(const char *filename)
12747 {
12748 return(xmlCreateURLParserCtxt(filename, 0));
12749 }
12750
12751 #ifdef LIBXML_SAX1_ENABLED
12752 /**
12753 * xmlSAXParseFileWithData:
12754 * @sax: the SAX handler block
12755 * @filename: the filename
12756 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12757 * documents
12758 * @data: the userdata
12759 *
12760 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12761 *
12762 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12763 * compressed document is provided by default if found at compile-time.
12764 * It use the given SAX function block to handle the parsing callback.
12765 * If sax is NULL, fallback to the default DOM tree building routines.
12766 *
12767 * User data (void *) is stored within the parser context in the
12768 * context's _private member, so it is available nearly everywhere in libxml
12769 *
12770 * Returns the resulting document tree
12771 */
12772
12773 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)12774 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12775 int recovery, void *data) {
12776 xmlDocPtr ret;
12777 xmlParserCtxtPtr ctxt;
12778 xmlParserInputPtr input;
12779
12780 ctxt = xmlNewSAXParserCtxt(sax, NULL);
12781 if (ctxt == NULL)
12782 return(NULL);
12783
12784 if (data != NULL)
12785 ctxt->_private = data;
12786
12787 if (recovery) {
12788 ctxt->options |= XML_PARSE_RECOVER;
12789 ctxt->recovery = 1;
12790 }
12791
12792 input = xmlNewInputURL(ctxt, filename, NULL, NULL, 0);
12793
12794 ret = xmlCtxtParseDocument(ctxt, input);
12795
12796 xmlFreeParserCtxt(ctxt);
12797 return(ret);
12798 }
12799
12800 /**
12801 * xmlSAXParseFile:
12802 * @sax: the SAX handler block
12803 * @filename: the filename
12804 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12805 * documents
12806 *
12807 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12808 *
12809 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12810 * compressed document is provided by default if found at compile-time.
12811 * It use the given SAX function block to handle the parsing callback.
12812 * If sax is NULL, fallback to the default DOM tree building routines.
12813 *
12814 * Returns the resulting document tree
12815 */
12816
12817 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)12818 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12819 int recovery) {
12820 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12821 }
12822
12823 /**
12824 * xmlRecoverDoc:
12825 * @cur: a pointer to an array of xmlChar
12826 *
12827 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12828 *
12829 * parse an XML in-memory document and build a tree.
12830 * In the case the document is not Well Formed, a attempt to build a
12831 * tree is tried anyway
12832 *
12833 * Returns the resulting document tree or NULL in case of failure
12834 */
12835
12836 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)12837 xmlRecoverDoc(const xmlChar *cur) {
12838 return(xmlSAXParseDoc(NULL, cur, 1));
12839 }
12840
12841 /**
12842 * xmlParseFile:
12843 * @filename: the filename
12844 *
12845 * DEPRECATED: Use xmlReadFile.
12846 *
12847 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12848 * compressed document is provided by default if found at compile-time.
12849 *
12850 * Returns the resulting document tree if the file was wellformed,
12851 * NULL otherwise.
12852 */
12853
12854 xmlDocPtr
xmlParseFile(const char * filename)12855 xmlParseFile(const char *filename) {
12856 return(xmlSAXParseFile(NULL, filename, 0));
12857 }
12858
12859 /**
12860 * xmlRecoverFile:
12861 * @filename: the filename
12862 *
12863 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12864 *
12865 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12866 * compressed document is provided by default if found at compile-time.
12867 * In the case the document is not Well Formed, it attempts to build
12868 * a tree anyway
12869 *
12870 * Returns the resulting document tree or NULL in case of failure
12871 */
12872
12873 xmlDocPtr
xmlRecoverFile(const char * filename)12874 xmlRecoverFile(const char *filename) {
12875 return(xmlSAXParseFile(NULL, filename, 1));
12876 }
12877
12878
12879 /**
12880 * xmlSetupParserForBuffer:
12881 * @ctxt: an XML parser context
12882 * @buffer: a xmlChar * buffer
12883 * @filename: a file name
12884 *
12885 * DEPRECATED: Don't use.
12886 *
12887 * Setup the parser context to parse a new buffer; Clears any prior
12888 * contents from the parser context. The buffer parameter must not be
12889 * NULL, but the filename parameter can be
12890 */
12891 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)12892 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12893 const char* filename)
12894 {
12895 xmlParserInputPtr input;
12896
12897 if ((ctxt == NULL) || (buffer == NULL))
12898 return;
12899
12900 xmlClearParserCtxt(ctxt);
12901
12902 input = xmlNewInputString(ctxt, filename, (const char *) buffer, NULL, 0);
12903 if (input == NULL)
12904 return;
12905 inputPush(ctxt, input);
12906 }
12907
12908 /**
12909 * xmlSAXUserParseFile:
12910 * @sax: a SAX handler
12911 * @user_data: The user data returned on SAX callbacks
12912 * @filename: a file name
12913 *
12914 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12915 *
12916 * parse an XML file and call the given SAX handler routines.
12917 * Automatic support for ZLIB/Compress compressed document is provided
12918 *
12919 * Returns 0 in case of success or a error number otherwise
12920 */
12921 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)12922 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12923 const char *filename) {
12924 int ret = 0;
12925 xmlParserCtxtPtr ctxt;
12926
12927 ctxt = xmlCreateFileParserCtxt(filename);
12928 if (ctxt == NULL) return -1;
12929 if (sax != NULL) {
12930 if (sax->initialized == XML_SAX2_MAGIC) {
12931 *ctxt->sax = *sax;
12932 } else {
12933 memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12934 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12935 }
12936 ctxt->userData = user_data;
12937 }
12938
12939 xmlParseDocument(ctxt);
12940
12941 if (ctxt->wellFormed)
12942 ret = 0;
12943 else {
12944 if (ctxt->errNo != 0)
12945 ret = ctxt->errNo;
12946 else
12947 ret = -1;
12948 }
12949 if (ctxt->myDoc != NULL) {
12950 xmlFreeDoc(ctxt->myDoc);
12951 ctxt->myDoc = NULL;
12952 }
12953 xmlFreeParserCtxt(ctxt);
12954
12955 return ret;
12956 }
12957 #endif /* LIBXML_SAX1_ENABLED */
12958
12959 /************************************************************************
12960 * *
12961 * Front ends when parsing from memory *
12962 * *
12963 ************************************************************************/
12964
12965 /**
12966 * xmlCreateMemoryParserCtxt:
12967 * @buffer: a pointer to a char array
12968 * @size: the size of the array
12969 *
12970 * Create a parser context for an XML in-memory document. The input buffer
12971 * must not contain a terminating null byte.
12972 *
12973 * Returns the new parser context or NULL
12974 */
12975 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)12976 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12977 xmlParserCtxtPtr ctxt;
12978 xmlParserInputPtr input;
12979
12980 if (size < 0)
12981 return(NULL);
12982
12983 ctxt = xmlNewParserCtxt();
12984 if (ctxt == NULL)
12985 return(NULL);
12986
12987 input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL, 0);
12988 if (input == NULL) {
12989 xmlFreeParserCtxt(ctxt);
12990 return(NULL);
12991 }
12992 inputPush(ctxt, input);
12993
12994 return(ctxt);
12995 }
12996
12997 #ifdef LIBXML_SAX1_ENABLED
12998 /**
12999 * xmlSAXParseMemoryWithData:
13000 * @sax: the SAX handler block
13001 * @buffer: an pointer to a char array
13002 * @size: the size of the array
13003 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13004 * documents
13005 * @data: the userdata
13006 *
13007 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13008 *
13009 * parse an XML in-memory block and use the given SAX function block
13010 * to handle the parsing callback. If sax is NULL, fallback to the default
13011 * DOM tree building routines.
13012 *
13013 * User data (void *) is stored within the parser context in the
13014 * context's _private member, so it is available nearly everywhere in libxml
13015 *
13016 * Returns the resulting document tree
13017 */
13018
13019 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)13020 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13021 int size, int recovery, void *data) {
13022 xmlDocPtr ret;
13023 xmlParserCtxtPtr ctxt;
13024 xmlParserInputPtr input;
13025
13026 if (size < 0)
13027 return(NULL);
13028
13029 ctxt = xmlNewSAXParserCtxt(sax, NULL);
13030 if (ctxt == NULL)
13031 return(NULL);
13032
13033 if (data != NULL)
13034 ctxt->_private=data;
13035
13036 if (recovery) {
13037 ctxt->options |= XML_PARSE_RECOVER;
13038 ctxt->recovery = 1;
13039 }
13040
13041 input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL,
13042 XML_INPUT_BUF_STATIC);
13043
13044 ret = xmlCtxtParseDocument(ctxt, input);
13045
13046 xmlFreeParserCtxt(ctxt);
13047 return(ret);
13048 }
13049
13050 /**
13051 * xmlSAXParseMemory:
13052 * @sax: the SAX handler block
13053 * @buffer: an pointer to a char array
13054 * @size: the size of the array
13055 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13056 * documents
13057 *
13058 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13059 *
13060 * parse an XML in-memory block and use the given SAX function block
13061 * to handle the parsing callback. If sax is NULL, fallback to the default
13062 * DOM tree building routines.
13063 *
13064 * Returns the resulting document tree
13065 */
13066 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)13067 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13068 int size, int recovery) {
13069 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13070 }
13071
13072 /**
13073 * xmlParseMemory:
13074 * @buffer: an pointer to a char array
13075 * @size: the size of the array
13076 *
13077 * DEPRECATED: Use xmlReadMemory.
13078 *
13079 * parse an XML in-memory block and build a tree.
13080 *
13081 * Returns the resulting document tree
13082 */
13083
xmlParseMemory(const char * buffer,int size)13084 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13085 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13086 }
13087
13088 /**
13089 * xmlRecoverMemory:
13090 * @buffer: an pointer to a char array
13091 * @size: the size of the array
13092 *
13093 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13094 *
13095 * parse an XML in-memory block and build a tree.
13096 * In the case the document is not Well Formed, an attempt to
13097 * build a tree is tried anyway
13098 *
13099 * Returns the resulting document tree or NULL in case of error
13100 */
13101
xmlRecoverMemory(const char * buffer,int size)13102 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13103 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13104 }
13105
13106 /**
13107 * xmlSAXUserParseMemory:
13108 * @sax: a SAX handler
13109 * @user_data: The user data returned on SAX callbacks
13110 * @buffer: an in-memory XML document input
13111 * @size: the length of the XML document in bytes
13112 *
13113 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13114 *
13115 * parse an XML in-memory buffer and call the given SAX handler routines.
13116 *
13117 * Returns 0 in case of success or a error number otherwise
13118 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)13119 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13120 const char *buffer, int size) {
13121 int ret = 0;
13122 xmlParserCtxtPtr ctxt;
13123
13124 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13125 if (ctxt == NULL) return -1;
13126 if (sax != NULL) {
13127 if (sax->initialized == XML_SAX2_MAGIC) {
13128 *ctxt->sax = *sax;
13129 } else {
13130 memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13131 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13132 }
13133 ctxt->userData = user_data;
13134 }
13135
13136 xmlParseDocument(ctxt);
13137
13138 if (ctxt->wellFormed)
13139 ret = 0;
13140 else {
13141 if (ctxt->errNo != 0)
13142 ret = ctxt->errNo;
13143 else
13144 ret = -1;
13145 }
13146 if (ctxt->myDoc != NULL) {
13147 xmlFreeDoc(ctxt->myDoc);
13148 ctxt->myDoc = NULL;
13149 }
13150 xmlFreeParserCtxt(ctxt);
13151
13152 return ret;
13153 }
13154 #endif /* LIBXML_SAX1_ENABLED */
13155
13156 /**
13157 * xmlCreateDocParserCtxt:
13158 * @str: a pointer to an array of xmlChar
13159 *
13160 * Creates a parser context for an XML in-memory document.
13161 *
13162 * Returns the new parser context or NULL
13163 */
13164 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * str)13165 xmlCreateDocParserCtxt(const xmlChar *str) {
13166 xmlParserCtxtPtr ctxt;
13167 xmlParserInputPtr input;
13168
13169 ctxt = xmlNewParserCtxt();
13170 if (ctxt == NULL)
13171 return(NULL);
13172
13173 input = xmlNewInputString(ctxt, NULL, (const char *) str, NULL, 0);
13174 if (input == NULL) {
13175 xmlFreeParserCtxt(ctxt);
13176 return(NULL);
13177 }
13178 inputPush(ctxt, input);
13179
13180 return(ctxt);
13181 }
13182
13183 #ifdef LIBXML_SAX1_ENABLED
13184 /**
13185 * xmlSAXParseDoc:
13186 * @sax: the SAX handler block
13187 * @cur: a pointer to an array of xmlChar
13188 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13189 * documents
13190 *
13191 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13192 *
13193 * parse an XML in-memory document and build a tree.
13194 * It use the given SAX function block to handle the parsing callback.
13195 * If sax is NULL, fallback to the default DOM tree building routines.
13196 *
13197 * Returns the resulting document tree
13198 */
13199
13200 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)13201 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13202 xmlDocPtr ret;
13203 xmlParserCtxtPtr ctxt;
13204 xmlSAXHandlerPtr oldsax = NULL;
13205
13206 if (cur == NULL) return(NULL);
13207
13208
13209 ctxt = xmlCreateDocParserCtxt(cur);
13210 if (ctxt == NULL) return(NULL);
13211 if (sax != NULL) {
13212 oldsax = ctxt->sax;
13213 ctxt->sax = sax;
13214 ctxt->userData = NULL;
13215 }
13216
13217 xmlParseDocument(ctxt);
13218 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13219 else {
13220 ret = NULL;
13221 xmlFreeDoc(ctxt->myDoc);
13222 ctxt->myDoc = NULL;
13223 }
13224 if (sax != NULL)
13225 ctxt->sax = oldsax;
13226 xmlFreeParserCtxt(ctxt);
13227
13228 return(ret);
13229 }
13230
13231 /**
13232 * xmlParseDoc:
13233 * @cur: a pointer to an array of xmlChar
13234 *
13235 * DEPRECATED: Use xmlReadDoc.
13236 *
13237 * parse an XML in-memory document and build a tree.
13238 *
13239 * Returns the resulting document tree
13240 */
13241
13242 xmlDocPtr
xmlParseDoc(const xmlChar * cur)13243 xmlParseDoc(const xmlChar *cur) {
13244 return(xmlSAXParseDoc(NULL, cur, 0));
13245 }
13246 #endif /* LIBXML_SAX1_ENABLED */
13247
13248 /************************************************************************
13249 * *
13250 * New set (2.6.0) of simpler and more flexible APIs *
13251 * *
13252 ************************************************************************/
13253
13254 /**
13255 * DICT_FREE:
13256 * @str: a string
13257 *
13258 * Free a string if it is not owned by the "dict" dictionary in the
13259 * current scope
13260 */
13261 #define DICT_FREE(str) \
13262 if ((str) && ((!dict) || \
13263 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13264 xmlFree((char *)(str));
13265
13266 /**
13267 * xmlCtxtReset:
13268 * @ctxt: an XML parser context
13269 *
13270 * Reset a parser context
13271 */
13272 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)13273 xmlCtxtReset(xmlParserCtxtPtr ctxt)
13274 {
13275 xmlParserInputPtr input;
13276 xmlDictPtr dict;
13277
13278 if (ctxt == NULL)
13279 return;
13280
13281 dict = ctxt->dict;
13282
13283 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13284 xmlFreeInputStream(input);
13285 }
13286 ctxt->inputNr = 0;
13287 ctxt->input = NULL;
13288
13289 ctxt->spaceNr = 0;
13290 if (ctxt->spaceTab != NULL) {
13291 ctxt->spaceTab[0] = -1;
13292 ctxt->space = &ctxt->spaceTab[0];
13293 } else {
13294 ctxt->space = NULL;
13295 }
13296
13297
13298 ctxt->nodeNr = 0;
13299 ctxt->node = NULL;
13300
13301 ctxt->nameNr = 0;
13302 ctxt->name = NULL;
13303
13304 ctxt->nsNr = 0;
13305 xmlParserNsReset(ctxt->nsdb);
13306
13307 DICT_FREE(ctxt->version);
13308 ctxt->version = NULL;
13309 DICT_FREE(ctxt->encoding);
13310 ctxt->encoding = NULL;
13311 DICT_FREE(ctxt->extSubURI);
13312 ctxt->extSubURI = NULL;
13313 DICT_FREE(ctxt->extSubSystem);
13314 ctxt->extSubSystem = NULL;
13315 if (ctxt->myDoc != NULL)
13316 xmlFreeDoc(ctxt->myDoc);
13317 ctxt->myDoc = NULL;
13318
13319 ctxt->standalone = -1;
13320 ctxt->hasExternalSubset = 0;
13321 ctxt->hasPErefs = 0;
13322 ctxt->html = 0;
13323 ctxt->instate = XML_PARSER_START;
13324
13325 ctxt->wellFormed = 1;
13326 ctxt->nsWellFormed = 1;
13327 ctxt->disableSAX = 0;
13328 ctxt->valid = 1;
13329 #if 0
13330 ctxt->vctxt.userData = ctxt;
13331 ctxt->vctxt.error = xmlParserValidityError;
13332 ctxt->vctxt.warning = xmlParserValidityWarning;
13333 #endif
13334 ctxt->record_info = 0;
13335 ctxt->checkIndex = 0;
13336 ctxt->endCheckState = 0;
13337 ctxt->inSubset = 0;
13338 ctxt->errNo = XML_ERR_OK;
13339 ctxt->depth = 0;
13340 ctxt->catalogs = NULL;
13341 ctxt->sizeentities = 0;
13342 ctxt->sizeentcopy = 0;
13343 xmlInitNodeInfoSeq(&ctxt->node_seq);
13344
13345 if (ctxt->attsDefault != NULL) {
13346 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13347 ctxt->attsDefault = NULL;
13348 }
13349 if (ctxt->attsSpecial != NULL) {
13350 xmlHashFree(ctxt->attsSpecial, NULL);
13351 ctxt->attsSpecial = NULL;
13352 }
13353
13354 #ifdef LIBXML_CATALOG_ENABLED
13355 if (ctxt->catalogs != NULL)
13356 xmlCatalogFreeLocal(ctxt->catalogs);
13357 #endif
13358 ctxt->nbErrors = 0;
13359 ctxt->nbWarnings = 0;
13360 if (ctxt->lastError.code != XML_ERR_OK)
13361 xmlResetError(&ctxt->lastError);
13362 }
13363
13364 /**
13365 * xmlCtxtResetPush:
13366 * @ctxt: an XML parser context
13367 * @chunk: a pointer to an array of chars
13368 * @size: number of chars in the array
13369 * @filename: an optional file name or URI
13370 * @encoding: the document encoding, or NULL
13371 *
13372 * Reset a push parser context
13373 *
13374 * Returns 0 in case of success and 1 in case of error
13375 */
13376 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)13377 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13378 int size, const char *filename, const char *encoding)
13379 {
13380 xmlParserInputPtr input;
13381
13382 if (ctxt == NULL)
13383 return(1);
13384
13385 xmlCtxtReset(ctxt);
13386
13387 input = xmlNewInputPush(ctxt, filename, chunk, size, encoding);
13388 if (input == NULL)
13389 return(1);
13390 inputPush(ctxt, input);
13391
13392 return(0);
13393 }
13394
13395 static int
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt,int options,int keepMask)13396 xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13397 {
13398 int allMask;
13399
13400 if (ctxt == NULL)
13401 return(-1);
13402
13403 /*
13404 * XInclude options aren't handled by the parser.
13405 *
13406 * XML_PARSE_XINCLUDE
13407 * XML_PARSE_NOXINCNODE
13408 * XML_PARSE_NOBASEFIX
13409 */
13410 allMask = XML_PARSE_RECOVER |
13411 XML_PARSE_NOENT |
13412 XML_PARSE_DTDLOAD |
13413 XML_PARSE_DTDATTR |
13414 XML_PARSE_DTDVALID |
13415 XML_PARSE_NOERROR |
13416 XML_PARSE_NOWARNING |
13417 XML_PARSE_PEDANTIC |
13418 XML_PARSE_NOBLANKS |
13419 #ifdef LIBXML_SAX1_ENABLED
13420 XML_PARSE_SAX1 |
13421 #endif
13422 XML_PARSE_NONET |
13423 XML_PARSE_NODICT |
13424 XML_PARSE_NSCLEAN |
13425 XML_PARSE_NOCDATA |
13426 XML_PARSE_COMPACT |
13427 XML_PARSE_OLD10 |
13428 XML_PARSE_HUGE |
13429 XML_PARSE_OLDSAX |
13430 XML_PARSE_IGNORE_ENC |
13431 XML_PARSE_BIG_LINES |
13432 XML_PARSE_NO_XXE;
13433
13434 ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13435
13436 /*
13437 * For some options, struct members are historically the source
13438 * of truth. The values are initalized from global variables and
13439 * old code could also modify them directly. Several older API
13440 * functions that don't take an options argument rely on these
13441 * deprecated mechanisms.
13442 *
13443 * Once public access to struct members and the globals are
13444 * disabled, we can use the options bitmask as source of
13445 * truth, making all these struct members obsolete.
13446 *
13447 * The XML_DETECT_IDS flags is misnamed. It simply enables
13448 * loading of the external subset.
13449 */
13450 ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13451 ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13452 ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13453 ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13454 ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13455 ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13456 ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13457 ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13458
13459 /*
13460 * Changing SAX callbacks is a bad idea. This should be fixed.
13461 */
13462 if (options & XML_PARSE_NOBLANKS) {
13463 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13464 }
13465 if (options & XML_PARSE_NOCDATA) {
13466 ctxt->sax->cdataBlock = NULL;
13467 }
13468 if (options & XML_PARSE_HUGE) {
13469 if (ctxt->dict != NULL)
13470 xmlDictSetLimit(ctxt->dict, 0);
13471 }
13472
13473 ctxt->linenumbers = 1;
13474
13475 return(options & ~allMask);
13476 }
13477
13478 /**
13479 * xmlCtxtSetOptions:
13480 * @ctxt: an XML parser context
13481 * @options: a bitmask of xmlParserOption values
13482 *
13483 * Applies the options to the parser context. Unset options are
13484 * cleared.
13485 *
13486 * Available since 2.13.0. With older versions, you can use
13487 * xmlCtxtUseOptions.
13488 *
13489 * XML_PARSE_RECOVER
13490 *
13491 * Enable "recovery" mode which allows non-wellformed documents.
13492 * How this mode behaves exactly is unspecified and may change
13493 * without further notice. Use of this feature is DISCOURAGED.
13494 *
13495 * XML_PARSE_NOENT
13496 *
13497 * Despite the confusing name, this option enables substitution
13498 * of entities. The resulting tree won't contain any entity
13499 * reference nodes.
13500 *
13501 * This option also enables loading of external entities (both
13502 * general and parameter entities) which is dangerous. If you
13503 * process untrusted data, it's recommended to set the
13504 * XML_PARSE_NO_XXE option to disable loading of external
13505 * entities.
13506 *
13507 * XML_PARSE_DTDLOAD
13508 *
13509 * Enables loading of an external DTD and the loading and
13510 * substitution of external parameter entities. Has no effect
13511 * if XML_PARSE_NO_XXE is set.
13512 *
13513 * XML_PARSE_DTDATTR
13514 *
13515 * Adds default attributes from the DTD to the result document.
13516 *
13517 * Implies XML_PARSE_DTDLOAD, but loading of external content
13518 * can be disabled with XML_PARSE_NO_XXE.
13519 *
13520 * XML_PARSE_DTDVALID
13521 *
13522 * This option enables DTD validation which requires to load
13523 * external DTDs and external entities (both general and
13524 * parameter entities) unless XML_PARSE_NO_XXE was set.
13525 *
13526 * XML_PARSE_NO_XXE
13527 *
13528 * Disables loading of external DTDs or entities.
13529 *
13530 * XML_PARSE_NOERROR
13531 *
13532 * Disable error and warning reports to the error handlers.
13533 * Errors are still accessible with xmlCtxtGetLastError.
13534 *
13535 * XML_PARSE_NOWARNING
13536 *
13537 * Disable warning reports.
13538 *
13539 * XML_PARSE_PEDANTIC
13540 *
13541 * Enable some pedantic warnings.
13542 *
13543 * XML_PARSE_NOBLANKS
13544 *
13545 * Remove some text nodes containing only whitespace from the
13546 * result document. Which nodes are removed depends on DTD
13547 * element declarations or a conservative heuristic. The
13548 * reindenting feature of the serialization code relies on this
13549 * option to be set when parsing. Use of this option is
13550 * DISCOURAGED.
13551 *
13552 * XML_PARSE_SAX1
13553 *
13554 * Always invoke the deprecated SAX1 startElement and endElement
13555 * handlers. This option is DEPRECATED.
13556 *
13557 * XML_PARSE_NONET
13558 *
13559 * Disable network access with the builtin HTTP and FTP clients.
13560 *
13561 * XML_PARSE_NODICT
13562 *
13563 * Create a document without interned strings, making all
13564 * strings separate memory allocations.
13565 *
13566 * XML_PARSE_NSCLEAN
13567 *
13568 * Remove redundant namespace declarations from the result
13569 * document.
13570 *
13571 * XML_PARSE_NOCDATA
13572 *
13573 * Output normal text nodes instead of CDATA nodes.
13574 *
13575 * XML_PARSE_COMPACT
13576 *
13577 * Store small strings directly in the node struct to save
13578 * memory.
13579 *
13580 * XML_PARSE_OLD10
13581 *
13582 * Use old Name productions from before XML 1.0 Fifth Edition.
13583 * This options is DEPRECATED.
13584 *
13585 * XML_PARSE_HUGE
13586 *
13587 * Relax some internal limits.
13588 *
13589 * Maximum size of text nodes, tags, comments, processing instructions,
13590 * CDATA sections, entity values
13591 *
13592 * normal: 10M
13593 * huge: 1B
13594 *
13595 * Maximum size of names, system literals, pubid literals
13596 *
13597 * normal: 50K
13598 * huge: 10M
13599 *
13600 * Maximum nesting depth of elements
13601 *
13602 * normal: 256
13603 * huge: 2048
13604 *
13605 * Maximum nesting depth of entities
13606 *
13607 * normal: 20
13608 * huge: 40
13609 *
13610 * XML_PARSE_OLDSAX
13611 *
13612 * Enable an unspecified legacy mode for SAX parsers. This
13613 * option is DEPRECATED.
13614 *
13615 * XML_PARSE_IGNORE_ENC
13616 *
13617 * Ignore the encoding in the XML declaration. This option is
13618 * mostly unneeded these days. The only effect is to enforce
13619 * UTF-8 decoding of ASCII-like data.
13620 *
13621 * XML_PARSE_BIG_LINES
13622 *
13623 * Enable reporting of line numbers larger than 65535.
13624 *
13625 * Returns 0 in case of success, the set of unknown or unimplemented options
13626 * in case of error.
13627 */
13628 int
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt,int options)13629 xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13630 {
13631 return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13632 }
13633
13634 /**
13635 * xmlCtxtUseOptions:
13636 * @ctxt: an XML parser context
13637 * @options: a combination of xmlParserOption
13638 *
13639 * DEPRECATED: Use xmlCtxtSetOptions.
13640 *
13641 * Applies the options to the parser context. The following options
13642 * are never cleared and can only be enabled:
13643 *
13644 * XML_PARSE_NOERROR
13645 * XML_PARSE_NOWARNING
13646 * XML_PARSE_NONET
13647 * XML_PARSE_NSCLEAN
13648 * XML_PARSE_NOCDATA
13649 * XML_PARSE_COMPACT
13650 * XML_PARSE_OLD10
13651 * XML_PARSE_HUGE
13652 * XML_PARSE_OLDSAX
13653 * XML_PARSE_IGNORE_ENC
13654 * XML_PARSE_BIG_LINES
13655 *
13656 * Returns 0 in case of success, the set of unknown or unimplemented options
13657 * in case of error.
13658 */
13659 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)13660 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13661 {
13662 int keepMask;
13663
13664 /*
13665 * For historic reasons, some options can only be enabled.
13666 */
13667 keepMask = XML_PARSE_NOERROR |
13668 XML_PARSE_NOWARNING |
13669 XML_PARSE_NONET |
13670 XML_PARSE_NSCLEAN |
13671 XML_PARSE_NOCDATA |
13672 XML_PARSE_COMPACT |
13673 XML_PARSE_OLD10 |
13674 XML_PARSE_HUGE |
13675 XML_PARSE_OLDSAX |
13676 XML_PARSE_IGNORE_ENC |
13677 XML_PARSE_BIG_LINES;
13678
13679 return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13680 }
13681
13682 /**
13683 * xmlCtxtSetMaxAmplification:
13684 * @ctxt: an XML parser context
13685 * @maxAmpl: maximum amplification factor
13686 *
13687 * To protect against exponential entity expansion ("billion laughs"), the
13688 * size of serialized output is (roughly) limited to the input size
13689 * multiplied by this factor. The default value is 5.
13690 *
13691 * When working with documents making heavy use of entity expansion, it can
13692 * be necessary to increase the value. For security reasons, this should only
13693 * be considered when processing trusted input.
13694 */
13695 void
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt,unsigned maxAmpl)13696 xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13697 {
13698 ctxt->maxAmpl = maxAmpl;
13699 }
13700
13701 /**
13702 * xmlCtxtParseDocument:
13703 * @ctxt: an XML parser context
13704 * @input: parser input
13705 *
13706 * Parse an XML document and return the resulting document tree.
13707 * Takes ownership of the input object.
13708 *
13709 * Returns the resulting document tree or NULL
13710 */
13711 xmlDocPtr
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)13712 xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13713 {
13714 xmlDocPtr ret = NULL;
13715
13716 if ((ctxt == NULL) || (input == NULL))
13717 return(NULL);
13718
13719 /* assert(ctxt->inputNr == 0); */
13720 while (ctxt->inputNr > 0)
13721 xmlFreeInputStream(inputPop(ctxt));
13722
13723 if (inputPush(ctxt, input) < 0) {
13724 xmlFreeInputStream(input);
13725 return(NULL);
13726 }
13727
13728 xmlParseDocument(ctxt);
13729
13730 if ((ctxt->wellFormed) ||
13731 ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
13732 ret = ctxt->myDoc;
13733 } else {
13734 if (ctxt->errNo == XML_ERR_OK)
13735 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error\n");
13736
13737 ret = NULL;
13738 xmlFreeDoc(ctxt->myDoc);
13739 }
13740 ctxt->myDoc = NULL;
13741
13742 /* assert(ctxt->inputNr == 1); */
13743 while (ctxt->inputNr > 0)
13744 xmlFreeInputStream(inputPop(ctxt));
13745
13746 return(ret);
13747 }
13748
13749 /**
13750 * xmlReadDoc:
13751 * @cur: a pointer to a zero terminated string
13752 * @URL: base URL (optional)
13753 * @encoding: the document encoding (optional)
13754 * @options: a combination of xmlParserOption
13755 *
13756 * Convenience function to parse an XML document from a
13757 * zero-terminated string.
13758 *
13759 * See xmlCtxtReadDoc for details.
13760 *
13761 * Returns the resulting document tree
13762 */
13763 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)13764 xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13765 int options)
13766 {
13767 xmlParserCtxtPtr ctxt;
13768 xmlParserInputPtr input;
13769 xmlDocPtr doc;
13770
13771 ctxt = xmlNewParserCtxt();
13772 if (ctxt == NULL)
13773 return(NULL);
13774
13775 xmlCtxtUseOptions(ctxt, options);
13776
13777 input = xmlNewInputString(ctxt, URL, (const char *) cur, encoding,
13778 XML_INPUT_BUF_STATIC);
13779
13780 doc = xmlCtxtParseDocument(ctxt, input);
13781
13782 xmlFreeParserCtxt(ctxt);
13783 return(doc);
13784 }
13785
13786 /**
13787 * xmlReadFile:
13788 * @filename: a file or URL
13789 * @encoding: the document encoding (optional)
13790 * @options: a combination of xmlParserOption
13791 *
13792 * Convenience function to parse an XML file from the filesystem,
13793 * the network or a global user-define resource loader.
13794 *
13795 * See xmlCtxtReadFile for details.
13796 *
13797 * Returns the resulting document tree
13798 */
13799 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)13800 xmlReadFile(const char *filename, const char *encoding, int options)
13801 {
13802 xmlParserCtxtPtr ctxt;
13803 xmlParserInputPtr input;
13804 xmlDocPtr doc;
13805
13806 ctxt = xmlNewParserCtxt();
13807 if (ctxt == NULL)
13808 return(NULL);
13809
13810 xmlCtxtUseOptions(ctxt, options);
13811
13812 input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13813
13814 doc = xmlCtxtParseDocument(ctxt, input);
13815
13816 xmlFreeParserCtxt(ctxt);
13817 return(doc);
13818 }
13819
13820 /**
13821 * xmlReadMemory:
13822 * @buffer: a pointer to a char array
13823 * @size: the size of the array
13824 * @url: base URL (optional)
13825 * @encoding: the document encoding (optional)
13826 * @options: a combination of xmlParserOption
13827 *
13828 * Parse an XML in-memory document and build a tree. The input buffer must
13829 * not contain a terminating null byte.
13830 *
13831 * See xmlCtxtReadMemory for details.
13832 *
13833 * Returns the resulting document tree
13834 */
13835 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * url,const char * encoding,int options)13836 xmlReadMemory(const char *buffer, int size, const char *url,
13837 const char *encoding, int options)
13838 {
13839 xmlParserCtxtPtr ctxt;
13840 xmlParserInputPtr input;
13841 xmlDocPtr doc;
13842
13843 if (size < 0)
13844 return(NULL);
13845
13846 ctxt = xmlNewParserCtxt();
13847 if (ctxt == NULL)
13848 return(NULL);
13849
13850 xmlCtxtUseOptions(ctxt, options);
13851
13852 input = xmlNewInputMemory(ctxt, url, buffer, size, encoding,
13853 XML_INPUT_BUF_STATIC);
13854
13855 doc = xmlCtxtParseDocument(ctxt, input);
13856
13857 xmlFreeParserCtxt(ctxt);
13858 return(doc);
13859 }
13860
13861 /**
13862 * xmlReadFd:
13863 * @fd: an open file descriptor
13864 * @URL: base URL (optional)
13865 * @encoding: the document encoding (optional)
13866 * @options: a combination of xmlParserOption
13867 *
13868 * Parse an XML from a file descriptor and build a tree.
13869 *
13870 * See xmlCtxtReadFd for details.
13871 *
13872 * NOTE that the file descriptor will not be closed when the
13873 * context is freed or reset.
13874 *
13875 * Returns the resulting document tree
13876 */
13877 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)13878 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13879 {
13880 xmlParserCtxtPtr ctxt;
13881 xmlParserInputPtr input;
13882 xmlDocPtr doc;
13883
13884 ctxt = xmlNewParserCtxt();
13885 if (ctxt == NULL)
13886 return(NULL);
13887
13888 xmlCtxtUseOptions(ctxt, options);
13889
13890 input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
13891 input->buf->closecallback = NULL;
13892
13893 doc = xmlCtxtParseDocument(ctxt, input);
13894
13895 xmlFreeParserCtxt(ctxt);
13896 return(doc);
13897 }
13898
13899 /**
13900 * xmlReadIO:
13901 * @ioread: an I/O read function
13902 * @ioclose: an I/O close function (optional)
13903 * @ioctx: an I/O handler
13904 * @URL: base URL (optional)
13905 * @encoding: the document encoding (optional)
13906 * @options: a combination of xmlParserOption
13907 *
13908 * Parse an XML document from I/O functions and context and build a tree.
13909 *
13910 * See xmlCtxtReadIO for details.
13911 *
13912 * Returns the resulting document tree
13913 */
13914 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)13915 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13916 void *ioctx, const char *URL, const char *encoding, int options)
13917 {
13918 xmlParserCtxtPtr ctxt;
13919 xmlParserInputPtr input;
13920 xmlDocPtr doc;
13921
13922 ctxt = xmlNewParserCtxt();
13923 if (ctxt == NULL)
13924 return(NULL);
13925
13926 xmlCtxtUseOptions(ctxt, options);
13927
13928 input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
13929
13930 doc = xmlCtxtParseDocument(ctxt, input);
13931
13932 xmlFreeParserCtxt(ctxt);
13933 return(doc);
13934 }
13935
13936 /**
13937 * xmlCtxtReadDoc:
13938 * @ctxt: an XML parser context
13939 * @str: a pointer to a zero terminated string
13940 * @URL: base URL (optional)
13941 * @encoding: the document encoding (optional)
13942 * @options: a combination of xmlParserOption
13943 *
13944 * Parse an XML in-memory document and build a tree.
13945 *
13946 * @URL is used as base to resolve external entities and for error
13947 * reporting.
13948 *
13949 * See xmlCtxtUseOptions for details.
13950 *
13951 * Returns the resulting document tree
13952 */
13953 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * str,const char * URL,const char * encoding,int options)13954 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
13955 const char *URL, const char *encoding, int options)
13956 {
13957 xmlParserInputPtr input;
13958
13959 if (ctxt == NULL)
13960 return(NULL);
13961
13962 xmlCtxtReset(ctxt);
13963 xmlCtxtUseOptions(ctxt, options);
13964
13965 input = xmlNewInputString(ctxt, URL, (const char *) str, encoding,
13966 XML_INPUT_BUF_STATIC);
13967
13968 return(xmlCtxtParseDocument(ctxt, input));
13969 }
13970
13971 /**
13972 * xmlCtxtReadFile:
13973 * @ctxt: an XML parser context
13974 * @filename: a file or URL
13975 * @encoding: the document encoding (optional)
13976 * @options: a combination of xmlParserOption
13977 *
13978 * Parse an XML file from the filesystem, the network or a user-defined
13979 * resource loader.
13980 *
13981 * See xmlNewInputURL and xmlCtxtUseOptions for details.
13982 *
13983 * Returns the resulting document tree
13984 */
13985 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)13986 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13987 const char *encoding, int options)
13988 {
13989 xmlParserInputPtr input;
13990
13991 if (ctxt == NULL)
13992 return(NULL);
13993
13994 xmlCtxtReset(ctxt);
13995 xmlCtxtUseOptions(ctxt, options);
13996
13997 input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13998
13999 return(xmlCtxtParseDocument(ctxt, input));
14000 }
14001
14002 /**
14003 * xmlCtxtReadMemory:
14004 * @ctxt: an XML parser context
14005 * @buffer: a pointer to a char array
14006 * @size: the size of the array
14007 * @URL: base URL (optional)
14008 * @encoding: the document encoding (optional)
14009 * @options: a combination of xmlParserOption
14010 *
14011 * Parse an XML in-memory document and build a tree. The input buffer must
14012 * not contain a terminating null byte.
14013 *
14014 * @URL is used as base to resolve external entities and for error
14015 * reporting.
14016 *
14017 * See xmlCtxtUseOptions for details.
14018 *
14019 * Returns the resulting document tree
14020 */
14021 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)14022 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14023 const char *URL, const char *encoding, int options)
14024 {
14025 xmlParserInputPtr input;
14026
14027 if ((ctxt == NULL) || (size < 0))
14028 return(NULL);
14029
14030 xmlCtxtReset(ctxt);
14031 xmlCtxtUseOptions(ctxt, options);
14032
14033 input = xmlNewInputMemory(ctxt, URL, buffer, size, encoding,
14034 XML_INPUT_BUF_STATIC);
14035
14036 return(xmlCtxtParseDocument(ctxt, input));
14037 }
14038
14039 /**
14040 * xmlCtxtReadFd:
14041 * @ctxt: an XML parser context
14042 * @fd: an open file descriptor
14043 * @URL: base URL (optional)
14044 * @encoding: the document encoding (optional)
14045 * @options: a combination of xmlParserOption
14046 *
14047 * Parse an XML document from a file descriptor and build a tree.
14048 *
14049 * NOTE that the file descriptor will not be closed when the
14050 * context is freed or reset.
14051 *
14052 * @URL is used as base to resolve external entities and for error
14053 * reporting.
14054 *
14055 * See xmlCtxtUseOptions for details.
14056 *
14057 * Returns the resulting document tree
14058 */
14059 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)14060 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14061 const char *URL, const char *encoding, int options)
14062 {
14063 xmlParserInputPtr input;
14064
14065 if (ctxt == NULL)
14066 return(NULL);
14067
14068 xmlCtxtReset(ctxt);
14069 xmlCtxtUseOptions(ctxt, options);
14070
14071 input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
14072 input->buf->closecallback = NULL;
14073
14074 return(xmlCtxtParseDocument(ctxt, input));
14075 }
14076
14077 /**
14078 * xmlCtxtReadIO:
14079 * @ctxt: an XML parser context
14080 * @ioread: an I/O read function
14081 * @ioclose: an I/O close function
14082 * @ioctx: an I/O handler
14083 * @URL: the base URL to use for the document
14084 * @encoding: the document encoding, or NULL
14085 * @options: a combination of xmlParserOption
14086 *
14087 * parse an XML document from I/O functions and source and build a tree.
14088 * This reuses the existing @ctxt parser context
14089 *
14090 * @URL is used as base to resolve external entities and for error
14091 * reporting.
14092 *
14093 * See xmlCtxtUseOptions for details.
14094 *
14095 * Returns the resulting document tree
14096 */
14097 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14098 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14099 xmlInputCloseCallback ioclose, void *ioctx,
14100 const char *URL,
14101 const char *encoding, int options)
14102 {
14103 xmlParserInputPtr input;
14104
14105 if (ctxt == NULL)
14106 return(NULL);
14107
14108 xmlCtxtReset(ctxt);
14109 xmlCtxtUseOptions(ctxt, options);
14110
14111 input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
14112
14113 return(xmlCtxtParseDocument(ctxt, input));
14114 }
14115
14116