xref: /aosp_15_r20/external/cronet/third_party/libxml/src/parser.c (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * [email protected]
31  */
32 
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37 
38 #define IN_LIBXML
39 #include "libxml.h"
40 
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46 
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/parser.h>
55 #include <libxml/xmlmemory.h>
56 #include <libxml/tree.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #include <libxml/SAX2.h>
65 #ifdef LIBXML_CATALOG_ENABLED
66 #include <libxml/catalog.h>
67 #endif
68 
69 #include "private/buf.h"
70 #include "private/dict.h"
71 #include "private/entities.h"
72 #include "private/error.h"
73 #include "private/html.h"
74 #include "private/io.h"
75 #include "private/parser.h"
76 
77 #define NS_INDEX_EMPTY  INT_MAX
78 #define NS_INDEX_XML    (INT_MAX - 1)
79 #define URI_HASH_EMPTY  0xD943A04E
80 #define URI_HASH_XML    0xF0451F02
81 
82 struct _xmlStartTag {
83     const xmlChar *prefix;
84     const xmlChar *URI;
85     int line;
86     int nsNr;
87 };
88 
89 typedef struct {
90     void *saxData;
91     unsigned prefixHashValue;
92     unsigned uriHashValue;
93     unsigned elementId;
94     int oldIndex;
95 } xmlParserNsExtra;
96 
97 typedef struct {
98     unsigned hashValue;
99     int index;
100 } xmlParserNsBucket;
101 
102 struct _xmlParserNsData {
103     xmlParserNsExtra *extra;
104 
105     unsigned hashSize;
106     unsigned hashElems;
107     xmlParserNsBucket *hash;
108 
109     unsigned elementId;
110     int defaultNsIndex;
111     int minNsIndex;
112 };
113 
114 struct _xmlAttrHashBucket {
115     int index;
116 };
117 
118 static int
119 xmlParseElementStart(xmlParserCtxtPtr ctxt);
120 
121 static void
122 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
123 
124 static xmlEntityPtr
125 xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
126 
127 static const xmlChar *
128 xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
129 
130 /************************************************************************
131  *									*
132  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
133  *									*
134  ************************************************************************/
135 
136 #define XML_PARSER_BIG_ENTITY 1000
137 #define XML_PARSER_LOT_ENTITY 5000
138 
139 /*
140  * Constants for protection against abusive entity expansion
141  * ("billion laughs").
142  */
143 
144 /*
145  * A certain amount of entity expansion which is always allowed.
146  */
147 #define XML_PARSER_ALLOWED_EXPANSION 1000000
148 
149 /*
150  * Fixed cost for each entity reference. This crudely models processing time
151  * as well to protect, for example, against exponential expansion of empty
152  * or very short entities.
153  */
154 #define XML_ENT_FIXED_COST 20
155 
156 /**
157  * xmlParserMaxDepth:
158  *
159  * arbitrary depth limit for the XML documents that we allow to
160  * process. This is not a limitation of the parser but a safety
161  * boundary feature. It can be disabled with the XML_PARSE_HUGE
162  * parser option.
163  */
164 const unsigned int xmlParserMaxDepth = 256;
165 
166 
167 
168 #define XML_PARSER_BIG_BUFFER_SIZE 300
169 #define XML_PARSER_BUFFER_SIZE 100
170 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
171 
172 /**
173  * XML_PARSER_CHUNK_SIZE
174  *
175  * When calling GROW that's the minimal amount of data
176  * the parser expected to have received. It is not a hard
177  * limit but an optimization when reading strings like Names
178  * It is not strictly needed as long as inputs available characters
179  * are followed by 0, which should be provided by the I/O level
180  */
181 #define XML_PARSER_CHUNK_SIZE 100
182 
183 /**
184  * xmlParserVersion:
185  *
186  * Constant string describing the internal version of the library
187  */
188 const char *const
189 xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
190 
191 /*
192  * List of XML prefixed PI allowed by W3C specs
193  */
194 
195 static const char* const xmlW3CPIs[] = {
196     "xml-stylesheet",
197     "xml-model",
198     NULL
199 };
200 
201 
202 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
203 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
204                                               const xmlChar **str);
205 
206 static void
207 xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
208 
209 static int
210 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
211 
212 /************************************************************************
213  *									*
214  *		Some factorized error routines				*
215  *									*
216  ************************************************************************/
217 
218 static void
xmlErrMemory(xmlParserCtxtPtr ctxt)219 xmlErrMemory(xmlParserCtxtPtr ctxt) {
220     xmlCtxtErrMemory(ctxt);
221 }
222 
223 /**
224  * xmlErrAttributeDup:
225  * @ctxt:  an XML parser context
226  * @prefix:  the attribute prefix
227  * @localname:  the attribute localname
228  *
229  * Handle a redefinition of attribute error
230  */
231 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)232 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
233                    const xmlChar * localname)
234 {
235     if (prefix == NULL)
236         xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237                    XML_ERR_FATAL, localname, NULL, NULL, 0,
238                    "Attribute %s redefined\n", localname);
239     else
240         xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241                    XML_ERR_FATAL, prefix, localname, NULL, 0,
242                    "Attribute %s:%s redefined\n", prefix, localname);
243 }
244 
245 /**
246  * xmlFatalErrMsg:
247  * @ctxt:  an XML parser context
248  * @error:  the error number
249  * @msg:  the error message
250  *
251  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
252  */
253 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)254 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
255                const char *msg)
256 {
257     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
258                NULL, NULL, NULL, 0, "%s", msg);
259 }
260 
261 /**
262  * xmlWarningMsg:
263  * @ctxt:  an XML parser context
264  * @error:  the error number
265  * @msg:  the error message
266  * @str1:  extra data
267  * @str2:  extra data
268  *
269  * Handle a warning.
270  */
271 void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)272 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
273               const char *msg, const xmlChar *str1, const xmlChar *str2)
274 {
275     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
276                str1, str2, NULL, 0, msg, str1, str2);
277 }
278 
279 /**
280  * xmlValidityError:
281  * @ctxt:  an XML parser context
282  * @error:  the error number
283  * @msg:  the error message
284  * @str1:  extra data
285  *
286  * Handle a validity error.
287  */
288 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)289 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
290               const char *msg, const xmlChar *str1, const xmlChar *str2)
291 {
292     ctxt->valid = 0;
293 
294     xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
295                str1, str2, NULL, 0, msg, str1, str2);
296 }
297 
298 /**
299  * xmlFatalErrMsgInt:
300  * @ctxt:  an XML parser context
301  * @error:  the error number
302  * @msg:  the error message
303  * @val:  an integer value
304  *
305  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
306  */
307 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)308 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
309                   const char *msg, int val)
310 {
311     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
312                NULL, NULL, NULL, val, msg, val);
313 }
314 
315 /**
316  * xmlFatalErrMsgStrIntStr:
317  * @ctxt:  an XML parser context
318  * @error:  the error number
319  * @msg:  the error message
320  * @str1:  an string info
321  * @val:  an integer value
322  * @str2:  an string info
323  *
324  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
325  */
326 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)327 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
328                   const char *msg, const xmlChar *str1, int val,
329 		  const xmlChar *str2)
330 {
331     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
332                str1, str2, NULL, val, msg, str1, val, str2);
333 }
334 
335 /**
336  * xmlFatalErrMsgStr:
337  * @ctxt:  an XML parser context
338  * @error:  the error number
339  * @msg:  the error message
340  * @val:  a string value
341  *
342  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
343  */
344 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)345 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
346                   const char *msg, const xmlChar * val)
347 {
348     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
349                val, NULL, NULL, 0, msg, val);
350 }
351 
352 /**
353  * xmlErrMsgStr:
354  * @ctxt:  an XML parser context
355  * @error:  the error number
356  * @msg:  the error message
357  * @val:  a string value
358  *
359  * Handle a non fatal parser error
360  */
361 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)362 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363                   const char *msg, const xmlChar * val)
364 {
365     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366                val, NULL, NULL, 0, msg, val);
367 }
368 
369 /**
370  * xmlNsErr:
371  * @ctxt:  an XML parser context
372  * @error:  the error number
373  * @msg:  the message
374  * @info1:  extra information string
375  * @info2:  extra information string
376  *
377  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378  */
379 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)380 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381          const char *msg,
382          const xmlChar * info1, const xmlChar * info2,
383          const xmlChar * info3)
384 {
385     ctxt->nsWellFormed = 0;
386 
387     xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388                info1, info2, info3, 0, msg, info1, info2, info3);
389 }
390 
391 /**
392  * xmlNsWarn
393  * @ctxt:  an XML parser context
394  * @error:  the error number
395  * @msg:  the message
396  * @info1:  extra information string
397  * @info2:  extra information string
398  *
399  * Handle a namespace warning error
400  */
401 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)402 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403          const char *msg,
404          const xmlChar * info1, const xmlChar * info2,
405          const xmlChar * info3)
406 {
407     xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408                info1, info2, info3, 0, msg, info1, info2, info3);
409 }
410 
411 static void
xmlSaturatedAdd(unsigned long * dst,unsigned long val)412 xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413     if (val > ULONG_MAX - *dst)
414         *dst = ULONG_MAX;
415     else
416         *dst += val;
417 }
418 
419 static void
xmlSaturatedAddSizeT(unsigned long * dst,unsigned long val)420 xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421     if (val > ULONG_MAX - *dst)
422         *dst = ULONG_MAX;
423     else
424         *dst += val;
425 }
426 
427 /**
428  * xmlParserEntityCheck:
429  * @ctxt:  parser context
430  * @extra:  sum of unexpanded entity sizes
431  *
432  * Check for non-linear entity expansion behaviour.
433  *
434  * In some cases like xmlExpandEntityInAttValue, this function is called
435  * for each, possibly nested entity and its unexpanded content length.
436  *
437  * In other cases like xmlParseReference, it's only called for each
438  * top-level entity with its unexpanded content length plus the sum of
439  * the unexpanded content lengths (plus fixed cost) of all nested
440  * entities.
441  *
442  * Summing the unexpanded lengths also adds the length of the reference.
443  * This is by design. Taking the length of the entity name into account
444  * discourages attacks that try to waste CPU time with abusively long
445  * entity names. See test/recurse/lol6.xml for example. Each call also
446  * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
447  * short entities.
448  *
449  * Returns 1 on error, 0 on success.
450  */
451 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long extra)452 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
453 {
454     unsigned long consumed;
455     unsigned long *expandedSize;
456     xmlParserInputPtr input = ctxt->input;
457     xmlEntityPtr entity = input->entity;
458 
459     if ((entity) && (entity->flags & XML_ENT_CHECKED))
460         return(0);
461 
462     /*
463      * Compute total consumed bytes so far, including input streams of
464      * external entities.
465      */
466     consumed = input->consumed;
467     xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
468     xmlSaturatedAdd(&consumed, ctxt->sizeentities);
469 
470     if (entity)
471         expandedSize = &entity->expandedSize;
472     else
473         expandedSize = &ctxt->sizeentcopy;
474 
475     /*
476      * Add extra cost and some fixed cost.
477      */
478     xmlSaturatedAdd(expandedSize, extra);
479     xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
480 
481     /*
482      * It's important to always use saturation arithmetic when tracking
483      * entity sizes to make the size checks reliable. If "sizeentcopy"
484      * overflows, we have to abort.
485      */
486     if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
487         ((*expandedSize >= ULONG_MAX) ||
488          (*expandedSize / ctxt->maxAmpl > consumed))) {
489         xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
490                        "Maximum entity amplification factor exceeded, see "
491                        "xmlCtxtSetMaxAmplification.\n");
492         xmlHaltParser(ctxt);
493         return(1);
494     }
495 
496     return(0);
497 }
498 
499 /************************************************************************
500  *									*
501  *		Library wide options					*
502  *									*
503  ************************************************************************/
504 
505 /**
506   * xmlHasFeature:
507   * @feature: the feature to be examined
508   *
509   * Examines if the library has been compiled with a given feature.
510   *
511   * Returns a non-zero value if the feature exist, otherwise zero.
512   * Returns zero (0) if the feature does not exist or an unknown
513   * unknown feature is requested, non-zero otherwise.
514   */
515 int
xmlHasFeature(xmlFeature feature)516 xmlHasFeature(xmlFeature feature)
517 {
518     switch (feature) {
519 	case XML_WITH_THREAD:
520 #ifdef LIBXML_THREAD_ENABLED
521 	    return(1);
522 #else
523 	    return(0);
524 #endif
525         case XML_WITH_TREE:
526 #ifdef LIBXML_TREE_ENABLED
527             return(1);
528 #else
529             return(0);
530 #endif
531         case XML_WITH_OUTPUT:
532 #ifdef LIBXML_OUTPUT_ENABLED
533             return(1);
534 #else
535             return(0);
536 #endif
537         case XML_WITH_PUSH:
538 #ifdef LIBXML_PUSH_ENABLED
539             return(1);
540 #else
541             return(0);
542 #endif
543         case XML_WITH_READER:
544 #ifdef LIBXML_READER_ENABLED
545             return(1);
546 #else
547             return(0);
548 #endif
549         case XML_WITH_PATTERN:
550 #ifdef LIBXML_PATTERN_ENABLED
551             return(1);
552 #else
553             return(0);
554 #endif
555         case XML_WITH_WRITER:
556 #ifdef LIBXML_WRITER_ENABLED
557             return(1);
558 #else
559             return(0);
560 #endif
561         case XML_WITH_SAX1:
562 #ifdef LIBXML_SAX1_ENABLED
563             return(1);
564 #else
565             return(0);
566 #endif
567         case XML_WITH_FTP:
568 #ifdef LIBXML_FTP_ENABLED
569             return(1);
570 #else
571             return(0);
572 #endif
573         case XML_WITH_HTTP:
574 #ifdef LIBXML_HTTP_ENABLED
575             return(1);
576 #else
577             return(0);
578 #endif
579         case XML_WITH_VALID:
580 #ifdef LIBXML_VALID_ENABLED
581             return(1);
582 #else
583             return(0);
584 #endif
585         case XML_WITH_HTML:
586 #ifdef LIBXML_HTML_ENABLED
587             return(1);
588 #else
589             return(0);
590 #endif
591         case XML_WITH_LEGACY:
592 #ifdef LIBXML_LEGACY_ENABLED
593             return(1);
594 #else
595             return(0);
596 #endif
597         case XML_WITH_C14N:
598 #ifdef LIBXML_C14N_ENABLED
599             return(1);
600 #else
601             return(0);
602 #endif
603         case XML_WITH_CATALOG:
604 #ifdef LIBXML_CATALOG_ENABLED
605             return(1);
606 #else
607             return(0);
608 #endif
609         case XML_WITH_XPATH:
610 #ifdef LIBXML_XPATH_ENABLED
611             return(1);
612 #else
613             return(0);
614 #endif
615         case XML_WITH_XPTR:
616 #ifdef LIBXML_XPTR_ENABLED
617             return(1);
618 #else
619             return(0);
620 #endif
621         case XML_WITH_XINCLUDE:
622 #ifdef LIBXML_XINCLUDE_ENABLED
623             return(1);
624 #else
625             return(0);
626 #endif
627         case XML_WITH_ICONV:
628 #ifdef LIBXML_ICONV_ENABLED
629             return(1);
630 #else
631             return(0);
632 #endif
633         case XML_WITH_ISO8859X:
634 #ifdef LIBXML_ISO8859X_ENABLED
635             return(1);
636 #else
637             return(0);
638 #endif
639         case XML_WITH_UNICODE:
640 #ifdef LIBXML_UNICODE_ENABLED
641             return(1);
642 #else
643             return(0);
644 #endif
645         case XML_WITH_REGEXP:
646 #ifdef LIBXML_REGEXP_ENABLED
647             return(1);
648 #else
649             return(0);
650 #endif
651         case XML_WITH_AUTOMATA:
652 #ifdef LIBXML_AUTOMATA_ENABLED
653             return(1);
654 #else
655             return(0);
656 #endif
657         case XML_WITH_EXPR:
658 #ifdef LIBXML_EXPR_ENABLED
659             return(1);
660 #else
661             return(0);
662 #endif
663         case XML_WITH_SCHEMAS:
664 #ifdef LIBXML_SCHEMAS_ENABLED
665             return(1);
666 #else
667             return(0);
668 #endif
669         case XML_WITH_SCHEMATRON:
670 #ifdef LIBXML_SCHEMATRON_ENABLED
671             return(1);
672 #else
673             return(0);
674 #endif
675         case XML_WITH_MODULES:
676 #ifdef LIBXML_MODULES_ENABLED
677             return(1);
678 #else
679             return(0);
680 #endif
681         case XML_WITH_DEBUG:
682 #ifdef LIBXML_DEBUG_ENABLED
683             return(1);
684 #else
685             return(0);
686 #endif
687         case XML_WITH_DEBUG_MEM:
688 #ifdef DEBUG_MEMORY_LOCATION
689             return(1);
690 #else
691             return(0);
692 #endif
693         case XML_WITH_ZLIB:
694 #ifdef LIBXML_ZLIB_ENABLED
695             return(1);
696 #else
697             return(0);
698 #endif
699         case XML_WITH_LZMA:
700 #ifdef LIBXML_LZMA_ENABLED
701             return(1);
702 #else
703             return(0);
704 #endif
705         case XML_WITH_ICU:
706 #ifdef LIBXML_ICU_ENABLED
707             return(1);
708 #else
709             return(0);
710 #endif
711         default:
712 	    break;
713      }
714      return(0);
715 }
716 
717 /************************************************************************
718  *									*
719  *			Simple string buffer				*
720  *									*
721  ************************************************************************/
722 
723 typedef struct {
724     xmlChar *mem;
725     unsigned size;
726     unsigned cap; /* size < cap */
727     unsigned max; /* size <= max */
728     xmlParserErrors code;
729 } xmlSBuf;
730 
731 static void
xmlSBufInit(xmlSBuf * buf,unsigned max)732 xmlSBufInit(xmlSBuf *buf, unsigned max) {
733     buf->mem = NULL;
734     buf->size = 0;
735     buf->cap = 0;
736     buf->max = max;
737     buf->code = XML_ERR_OK;
738 }
739 
740 static int
xmlSBufGrow(xmlSBuf * buf,unsigned len)741 xmlSBufGrow(xmlSBuf *buf, unsigned len) {
742     xmlChar *mem;
743     unsigned cap;
744 
745     if (len >= UINT_MAX / 2 - buf->size) {
746         buf->code = XML_ERR_RESOURCE_LIMIT;
747         return(-1);
748     }
749 
750     cap = (buf->size + len) * 2;
751     if (cap < 240)
752         cap = 240;
753 
754     mem = xmlRealloc(buf->mem, cap);
755     if (mem == NULL) {
756         buf->code = XML_ERR_NO_MEMORY;
757         return(-1);
758     }
759 
760     buf->mem = mem;
761     buf->cap = cap;
762 
763     return(0);
764 }
765 
766 static void
xmlSBufAddString(xmlSBuf * buf,const xmlChar * str,unsigned len)767 xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
768     if (buf->max - buf->size < len) {
769         buf->code = XML_ERR_RESOURCE_LIMIT;
770         return;
771     }
772 
773     if (buf->cap - buf->size <= len) {
774         if (xmlSBufGrow(buf, len) < 0)
775             return;
776     }
777 
778     if (len > 0)
779         memcpy(buf->mem + buf->size, str, len);
780     buf->size += len;
781 }
782 
783 static void
xmlSBufAddCString(xmlSBuf * buf,const char * str,unsigned len)784 xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
785     xmlSBufAddString(buf, (const xmlChar *) str, len);
786 }
787 
788 static void
xmlSBufAddChar(xmlSBuf * buf,int c)789 xmlSBufAddChar(xmlSBuf *buf, int c) {
790     xmlChar *end;
791 
792     if (buf->max - buf->size < 4) {
793         buf->code = XML_ERR_RESOURCE_LIMIT;
794         return;
795     }
796 
797     if (buf->cap - buf->size <= 4) {
798         if (xmlSBufGrow(buf, 4) < 0)
799             return;
800     }
801 
802     end = buf->mem + buf->size;
803 
804     if (c < 0x80) {
805         *end = (xmlChar) c;
806         buf->size += 1;
807     } else {
808         buf->size += xmlCopyCharMultiByte(end, c);
809     }
810 }
811 
812 static void
xmlSBufAddReplChar(xmlSBuf * buf)813 xmlSBufAddReplChar(xmlSBuf *buf) {
814     xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
815 }
816 
817 static void
xmlSBufReportError(xmlSBuf * buf,xmlParserCtxtPtr ctxt,const char * errMsg)818 xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
819     if (buf->code == XML_ERR_NO_MEMORY)
820         xmlCtxtErrMemory(ctxt);
821     else
822         xmlFatalErr(ctxt, buf->code, errMsg);
823 }
824 
825 static xmlChar *
xmlSBufFinish(xmlSBuf * buf,int * sizeOut,xmlParserCtxtPtr ctxt,const char * errMsg)826 xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
827               const char *errMsg) {
828     if (buf->mem == NULL) {
829         buf->mem = xmlMalloc(1);
830         if (buf->mem == NULL) {
831             buf->code = XML_ERR_NO_MEMORY;
832         } else {
833             buf->mem[0] = 0;
834         }
835     } else {
836         buf->mem[buf->size] = 0;
837     }
838 
839     if (buf->code == XML_ERR_OK) {
840         if (sizeOut != NULL)
841             *sizeOut = buf->size;
842         return(buf->mem);
843     }
844 
845     xmlSBufReportError(buf, ctxt, errMsg);
846 
847     xmlFree(buf->mem);
848 
849     if (sizeOut != NULL)
850         *sizeOut = 0;
851     return(NULL);
852 }
853 
854 static void
xmlSBufCleanup(xmlSBuf * buf,xmlParserCtxtPtr ctxt,const char * errMsg)855 xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
856     if (buf->code != XML_ERR_OK)
857         xmlSBufReportError(buf, ctxt, errMsg);
858 
859     xmlFree(buf->mem);
860 }
861 
862 static int
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt,const xmlChar * str,const char * errMsg)863 xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
864                     const char *errMsg) {
865     int c = str[0];
866     int c1 = str[1];
867 
868     if ((c1 & 0xC0) != 0x80)
869         goto encoding_error;
870 
871     if (c < 0xE0) {
872         /* 2-byte sequence */
873         if (c < 0xC2)
874             goto encoding_error;
875 
876         return(2);
877     } else {
878         int c2 = str[2];
879 
880         if ((c2 & 0xC0) != 0x80)
881             goto encoding_error;
882 
883         if (c < 0xF0) {
884             /* 3-byte sequence */
885             if (c == 0xE0) {
886                 /* overlong */
887                 if (c1 < 0xA0)
888                     goto encoding_error;
889             } else if (c == 0xED) {
890                 /* surrogate */
891                 if (c1 >= 0xA0)
892                     goto encoding_error;
893             } else if (c == 0xEF) {
894                 /* U+FFFE and U+FFFF are invalid Chars */
895                 if ((c1 == 0xBF) && (c2 >= 0xBE))
896                     xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
897             }
898 
899             return(3);
900         } else {
901             /* 4-byte sequence */
902             if ((str[3] & 0xC0) != 0x80)
903                 goto encoding_error;
904             if (c == 0xF0) {
905                 /* overlong */
906                 if (c1 < 0x90)
907                     goto encoding_error;
908             } else if (c >= 0xF4) {
909                 /* greater than 0x10FFFF */
910                 if ((c > 0xF4) || (c1 >= 0x90))
911                     goto encoding_error;
912             }
913 
914             return(4);
915         }
916     }
917 
918 encoding_error:
919     /* Only report the first error */
920     if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
921         xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
922         ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
923     }
924 
925     return(0);
926 }
927 
928 /************************************************************************
929  *									*
930  *		SAX2 defaulted attributes handling			*
931  *									*
932  ************************************************************************/
933 
934 /**
935  * xmlCtxtInitializeLate:
936  * @ctxt:  an XML parser context
937  *
938  * Final initialization of the parser context before starting to parse.
939  *
940  * This accounts for users modifying struct members of parser context
941  * directly.
942  */
943 static void
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt)944 xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
945     xmlSAXHandlerPtr sax;
946 
947     /* Avoid unused variable warning if features are disabled. */
948     (void) sax;
949 
950     /*
951      * Changing the SAX struct directly is still widespread practice
952      * in internal and external code.
953      */
954     if (ctxt == NULL) return;
955     sax = ctxt->sax;
956 #ifdef LIBXML_SAX1_ENABLED
957     /*
958      * Only enable SAX2 if there SAX2 element handlers, except when there
959      * are no element handlers at all.
960      */
961     if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
962         (sax) &&
963         (sax->initialized == XML_SAX2_MAGIC) &&
964         ((sax->startElementNs != NULL) ||
965          (sax->endElementNs != NULL) ||
966          ((sax->startElement == NULL) && (sax->endElement == NULL))))
967         ctxt->sax2 = 1;
968 #else
969     ctxt->sax2 = 1;
970 #endif /* LIBXML_SAX1_ENABLED */
971 
972     /*
973      * Some users replace the dictionary directly in the context struct.
974      * We really need an API function to do that cleanly.
975      */
976     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
977     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
978     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
979     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
980 		(ctxt->str_xml_ns == NULL)) {
981         xmlErrMemory(ctxt);
982     }
983 }
984 
985 typedef struct {
986     xmlHashedString prefix;
987     xmlHashedString name;
988     xmlHashedString value;
989     const xmlChar *valueEnd;
990     int external;
991     int expandedSize;
992 } xmlDefAttr;
993 
994 typedef struct _xmlDefAttrs xmlDefAttrs;
995 typedef xmlDefAttrs *xmlDefAttrsPtr;
996 struct _xmlDefAttrs {
997     int nbAttrs;	/* number of defaulted attributes on that element */
998     int maxAttrs;       /* the size of the array */
999 #if __STDC_VERSION__ >= 199901L
1000     /* Using a C99 flexible array member avoids UBSan errors. */
1001     xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
1002 #else
1003     xmlDefAttr attrs[1];
1004 #endif
1005 };
1006 
1007 /**
1008  * xmlAttrNormalizeSpace:
1009  * @src: the source string
1010  * @dst: the target string
1011  *
1012  * Normalize the space in non CDATA attribute values:
1013  * If the attribute type is not CDATA, then the XML processor MUST further
1014  * process the normalized attribute value by discarding any leading and
1015  * trailing space (#x20) characters, and by replacing sequences of space
1016  * (#x20) characters by a single space (#x20) character.
1017  * Note that the size of dst need to be at least src, and if one doesn't need
1018  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1019  * passing src as dst is just fine.
1020  *
1021  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1022  *         is needed.
1023  */
1024 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1025 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1026 {
1027     if ((src == NULL) || (dst == NULL))
1028         return(NULL);
1029 
1030     while (*src == 0x20) src++;
1031     while (*src != 0) {
1032 	if (*src == 0x20) {
1033 	    while (*src == 0x20) src++;
1034 	    if (*src != 0)
1035 		*dst++ = 0x20;
1036 	} else {
1037 	    *dst++ = *src++;
1038 	}
1039     }
1040     *dst = 0;
1041     if (dst == src)
1042        return(NULL);
1043     return(dst);
1044 }
1045 
1046 /**
1047  * xmlAddDefAttrs:
1048  * @ctxt:  an XML parser context
1049  * @fullname:  the element fullname
1050  * @fullattr:  the attribute fullname
1051  * @value:  the attribute value
1052  *
1053  * Add a defaulted attribute for an element
1054  */
1055 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1056 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1057                const xmlChar *fullname,
1058                const xmlChar *fullattr,
1059                const xmlChar *value) {
1060     xmlDefAttrsPtr defaults;
1061     xmlDefAttr *attr;
1062     int len, expandedSize;
1063     xmlHashedString name;
1064     xmlHashedString prefix;
1065     xmlHashedString hvalue;
1066     const xmlChar *localname;
1067 
1068     /*
1069      * Allows to detect attribute redefinitions
1070      */
1071     if (ctxt->attsSpecial != NULL) {
1072         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1073 	    return;
1074     }
1075 
1076     if (ctxt->attsDefault == NULL) {
1077         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1078 	if (ctxt->attsDefault == NULL)
1079 	    goto mem_error;
1080     }
1081 
1082     /*
1083      * split the element name into prefix:localname , the string found
1084      * are within the DTD and then not associated to namespace names.
1085      */
1086     localname = xmlSplitQName3(fullname, &len);
1087     if (localname == NULL) {
1088         name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1089 	prefix.name = NULL;
1090     } else {
1091         name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1092 	prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1093         if (prefix.name == NULL)
1094             goto mem_error;
1095     }
1096     if (name.name == NULL)
1097         goto mem_error;
1098 
1099     /*
1100      * make sure there is some storage
1101      */
1102     defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1103     if ((defaults == NULL) ||
1104         (defaults->nbAttrs >= defaults->maxAttrs)) {
1105         xmlDefAttrsPtr temp;
1106         int newSize;
1107 
1108         newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1109         temp = xmlRealloc(defaults,
1110                           sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1111 	if (temp == NULL)
1112 	    goto mem_error;
1113         if (defaults == NULL)
1114             temp->nbAttrs = 0;
1115 	temp->maxAttrs = newSize;
1116         defaults = temp;
1117 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1118 	                        defaults, NULL) < 0) {
1119 	    xmlFree(defaults);
1120 	    goto mem_error;
1121 	}
1122     }
1123 
1124     /*
1125      * Split the attribute name into prefix:localname , the string found
1126      * are within the DTD and hen not associated to namespace names.
1127      */
1128     localname = xmlSplitQName3(fullattr, &len);
1129     if (localname == NULL) {
1130         name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1131 	prefix.name = NULL;
1132     } else {
1133         name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1134 	prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1135         if (prefix.name == NULL)
1136             goto mem_error;
1137     }
1138     if (name.name == NULL)
1139         goto mem_error;
1140 
1141     /* intern the string and precompute the end */
1142     len = strlen((const char *) value);
1143     hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1144     if (hvalue.name == NULL)
1145         goto mem_error;
1146 
1147     expandedSize = strlen((const char *) name.name);
1148     if (prefix.name != NULL)
1149         expandedSize += strlen((const char *) prefix.name);
1150     expandedSize += len;
1151 
1152     attr = &defaults->attrs[defaults->nbAttrs++];
1153     attr->name = name;
1154     attr->prefix = prefix;
1155     attr->value = hvalue;
1156     attr->valueEnd = hvalue.name + len;
1157     attr->external = PARSER_EXTERNAL(ctxt);
1158     attr->expandedSize = expandedSize;
1159 
1160     return;
1161 
1162 mem_error:
1163     xmlErrMemory(ctxt);
1164     return;
1165 }
1166 
1167 /**
1168  * xmlAddSpecialAttr:
1169  * @ctxt:  an XML parser context
1170  * @fullname:  the element fullname
1171  * @fullattr:  the attribute fullname
1172  * @type:  the attribute type
1173  *
1174  * Register this attribute type
1175  */
1176 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1177 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1178 		  const xmlChar *fullname,
1179 		  const xmlChar *fullattr,
1180 		  int type)
1181 {
1182     if (ctxt->attsSpecial == NULL) {
1183         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1184 	if (ctxt->attsSpecial == NULL)
1185 	    goto mem_error;
1186     }
1187 
1188     if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1189                     (void *) (ptrdiff_t) type) < 0)
1190         goto mem_error;
1191     return;
1192 
1193 mem_error:
1194     xmlErrMemory(ctxt);
1195     return;
1196 }
1197 
1198 /**
1199  * xmlCleanSpecialAttrCallback:
1200  *
1201  * Removes CDATA attributes from the special attribute table
1202  */
1203 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1204 xmlCleanSpecialAttrCallback(void *payload, void *data,
1205                             const xmlChar *fullname, const xmlChar *fullattr,
1206                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1207     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1208 
1209     if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1210         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1211     }
1212 }
1213 
1214 /**
1215  * xmlCleanSpecialAttr:
1216  * @ctxt:  an XML parser context
1217  *
1218  * Trim the list of attributes defined to remove all those of type
1219  * CDATA as they are not special. This call should be done when finishing
1220  * to parse the DTD and before starting to parse the document root.
1221  */
1222 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1223 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1224 {
1225     if (ctxt->attsSpecial == NULL)
1226         return;
1227 
1228     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1229 
1230     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1231         xmlHashFree(ctxt->attsSpecial, NULL);
1232         ctxt->attsSpecial = NULL;
1233     }
1234     return;
1235 }
1236 
1237 /**
1238  * xmlCheckLanguageID:
1239  * @lang:  pointer to the string value
1240  *
1241  * DEPRECATED: Internal function, do not use.
1242  *
1243  * Checks that the value conforms to the LanguageID production:
1244  *
1245  * NOTE: this is somewhat deprecated, those productions were removed from
1246  *       the XML Second edition.
1247  *
1248  * [33] LanguageID ::= Langcode ('-' Subcode)*
1249  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1250  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1251  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1252  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1253  * [38] Subcode ::= ([a-z] | [A-Z])+
1254  *
1255  * The current REC reference the successors of RFC 1766, currently 5646
1256  *
1257  * http://www.rfc-editor.org/rfc/rfc5646.txt
1258  * langtag       = language
1259  *                 ["-" script]
1260  *                 ["-" region]
1261  *                 *("-" variant)
1262  *                 *("-" extension)
1263  *                 ["-" privateuse]
1264  * language      = 2*3ALPHA            ; shortest ISO 639 code
1265  *                 ["-" extlang]       ; sometimes followed by
1266  *                                     ; extended language subtags
1267  *               / 4ALPHA              ; or reserved for future use
1268  *               / 5*8ALPHA            ; or registered language subtag
1269  *
1270  * extlang       = 3ALPHA              ; selected ISO 639 codes
1271  *                 *2("-" 3ALPHA)      ; permanently reserved
1272  *
1273  * script        = 4ALPHA              ; ISO 15924 code
1274  *
1275  * region        = 2ALPHA              ; ISO 3166-1 code
1276  *               / 3DIGIT              ; UN M.49 code
1277  *
1278  * variant       = 5*8alphanum         ; registered variants
1279  *               / (DIGIT 3alphanum)
1280  *
1281  * extension     = singleton 1*("-" (2*8alphanum))
1282  *
1283  *                                     ; Single alphanumerics
1284  *                                     ; "x" reserved for private use
1285  * singleton     = DIGIT               ; 0 - 9
1286  *               / %x41-57             ; A - W
1287  *               / %x59-5A             ; Y - Z
1288  *               / %x61-77             ; a - w
1289  *               / %x79-7A             ; y - z
1290  *
1291  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1292  * The parser below doesn't try to cope with extension or privateuse
1293  * that could be added but that's not interoperable anyway
1294  *
1295  * Returns 1 if correct 0 otherwise
1296  **/
1297 int
xmlCheckLanguageID(const xmlChar * lang)1298 xmlCheckLanguageID(const xmlChar * lang)
1299 {
1300     const xmlChar *cur = lang, *nxt;
1301 
1302     if (cur == NULL)
1303         return (0);
1304     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1305         ((cur[0] == 'I') && (cur[1] == '-')) ||
1306         ((cur[0] == 'x') && (cur[1] == '-')) ||
1307         ((cur[0] == 'X') && (cur[1] == '-'))) {
1308         /*
1309          * Still allow IANA code and user code which were coming
1310          * from the previous version of the XML-1.0 specification
1311          * it's deprecated but we should not fail
1312          */
1313         cur += 2;
1314         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1315                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1316             cur++;
1317         return(cur[0] == 0);
1318     }
1319     nxt = cur;
1320     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1321            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1322            nxt++;
1323     if (nxt - cur >= 4) {
1324         /*
1325          * Reserved
1326          */
1327         if ((nxt - cur > 8) || (nxt[0] != 0))
1328             return(0);
1329         return(1);
1330     }
1331     if (nxt - cur < 2)
1332         return(0);
1333     /* we got an ISO 639 code */
1334     if (nxt[0] == 0)
1335         return(1);
1336     if (nxt[0] != '-')
1337         return(0);
1338 
1339     nxt++;
1340     cur = nxt;
1341     /* now we can have extlang or script or region or variant */
1342     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1343         goto region_m49;
1344 
1345     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1346            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1347            nxt++;
1348     if (nxt - cur == 4)
1349         goto script;
1350     if (nxt - cur == 2)
1351         goto region;
1352     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1353         goto variant;
1354     if (nxt - cur != 3)
1355         return(0);
1356     /* we parsed an extlang */
1357     if (nxt[0] == 0)
1358         return(1);
1359     if (nxt[0] != '-')
1360         return(0);
1361 
1362     nxt++;
1363     cur = nxt;
1364     /* now we can have script or region or variant */
1365     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1366         goto region_m49;
1367 
1368     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1369            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1370            nxt++;
1371     if (nxt - cur == 2)
1372         goto region;
1373     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1374         goto variant;
1375     if (nxt - cur != 4)
1376         return(0);
1377     /* we parsed a script */
1378 script:
1379     if (nxt[0] == 0)
1380         return(1);
1381     if (nxt[0] != '-')
1382         return(0);
1383 
1384     nxt++;
1385     cur = nxt;
1386     /* now we can have region or variant */
1387     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1388         goto region_m49;
1389 
1390     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1391            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1392            nxt++;
1393 
1394     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1395         goto variant;
1396     if (nxt - cur != 2)
1397         return(0);
1398     /* we parsed a region */
1399 region:
1400     if (nxt[0] == 0)
1401         return(1);
1402     if (nxt[0] != '-')
1403         return(0);
1404 
1405     nxt++;
1406     cur = nxt;
1407     /* now we can just have a variant */
1408     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1409            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1410            nxt++;
1411 
1412     if ((nxt - cur < 5) || (nxt - cur > 8))
1413         return(0);
1414 
1415     /* we parsed a variant */
1416 variant:
1417     if (nxt[0] == 0)
1418         return(1);
1419     if (nxt[0] != '-')
1420         return(0);
1421     /* extensions and private use subtags not checked */
1422     return (1);
1423 
1424 region_m49:
1425     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1426         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1427         nxt += 3;
1428         goto region;
1429     }
1430     return(0);
1431 }
1432 
1433 /************************************************************************
1434  *									*
1435  *		Parser stacks related functions and macros		*
1436  *									*
1437  ************************************************************************/
1438 
1439 static xmlChar *
1440 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1441 
1442 /**
1443  * xmlParserNsCreate:
1444  *
1445  * Create a new namespace database.
1446  *
1447  * Returns the new obejct.
1448  */
1449 xmlParserNsData *
xmlParserNsCreate(void)1450 xmlParserNsCreate(void) {
1451     xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1452 
1453     if (nsdb == NULL)
1454         return(NULL);
1455     memset(nsdb, 0, sizeof(*nsdb));
1456     nsdb->defaultNsIndex = INT_MAX;
1457 
1458     return(nsdb);
1459 }
1460 
1461 /**
1462  * xmlParserNsFree:
1463  * @nsdb: namespace database
1464  *
1465  * Free a namespace database.
1466  */
1467 void
xmlParserNsFree(xmlParserNsData * nsdb)1468 xmlParserNsFree(xmlParserNsData *nsdb) {
1469     if (nsdb == NULL)
1470         return;
1471 
1472     xmlFree(nsdb->extra);
1473     xmlFree(nsdb->hash);
1474     xmlFree(nsdb);
1475 }
1476 
1477 /**
1478  * xmlParserNsReset:
1479  * @nsdb: namespace database
1480  *
1481  * Reset a namespace database.
1482  */
1483 static void
xmlParserNsReset(xmlParserNsData * nsdb)1484 xmlParserNsReset(xmlParserNsData *nsdb) {
1485     if (nsdb == NULL)
1486         return;
1487 
1488     nsdb->hashElems = 0;
1489     nsdb->elementId = 0;
1490     nsdb->defaultNsIndex = INT_MAX;
1491 
1492     if (nsdb->hash)
1493         memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1494 }
1495 
1496 /**
1497  * xmlParserStartElement:
1498  * @nsdb: namespace database
1499  *
1500  * Signal that a new element has started.
1501  *
1502  * Returns 0 on success, -1 if the element counter overflowed.
1503  */
1504 static int
xmlParserNsStartElement(xmlParserNsData * nsdb)1505 xmlParserNsStartElement(xmlParserNsData *nsdb) {
1506     if (nsdb->elementId == UINT_MAX)
1507         return(-1);
1508     nsdb->elementId++;
1509 
1510     return(0);
1511 }
1512 
1513 /**
1514  * xmlParserNsLookup:
1515  * @ctxt: parser context
1516  * @prefix: namespace prefix
1517  * @bucketPtr: optional bucket (return value)
1518  *
1519  * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1520  * be set to the matching bucket, or the first empty bucket if no match
1521  * was found.
1522  *
1523  * Returns the namespace index on success, INT_MAX if no namespace was
1524  * found.
1525  */
1526 static int
xmlParserNsLookup(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,xmlParserNsBucket ** bucketPtr)1527 xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1528                   xmlParserNsBucket **bucketPtr) {
1529     xmlParserNsBucket *bucket;
1530     unsigned index, hashValue;
1531 
1532     if (prefix->name == NULL)
1533         return(ctxt->nsdb->defaultNsIndex);
1534 
1535     if (ctxt->nsdb->hashSize == 0)
1536         return(INT_MAX);
1537 
1538     hashValue = prefix->hashValue;
1539     index = hashValue & (ctxt->nsdb->hashSize - 1);
1540     bucket = &ctxt->nsdb->hash[index];
1541 
1542     while (bucket->hashValue) {
1543         if ((bucket->hashValue == hashValue) &&
1544             (bucket->index != INT_MAX)) {
1545             if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1546                 if (bucketPtr != NULL)
1547                     *bucketPtr = bucket;
1548                 return(bucket->index);
1549             }
1550         }
1551 
1552         index++;
1553         bucket++;
1554         if (index == ctxt->nsdb->hashSize) {
1555             index = 0;
1556             bucket = ctxt->nsdb->hash;
1557         }
1558     }
1559 
1560     if (bucketPtr != NULL)
1561         *bucketPtr = bucket;
1562     return(INT_MAX);
1563 }
1564 
1565 /**
1566  * xmlParserNsLookupUri:
1567  * @ctxt: parser context
1568  * @prefix: namespace prefix
1569  *
1570  * Lookup namespace URI with given prefix.
1571  *
1572  * Returns the namespace URI on success, NULL if no namespace was found.
1573  */
1574 static const xmlChar *
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix)1575 xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1576     const xmlChar *ret;
1577     int nsIndex;
1578 
1579     if (prefix->name == ctxt->str_xml)
1580         return(ctxt->str_xml_ns);
1581 
1582     /*
1583      * minNsIndex is used when building an entity tree. We must
1584      * ignore namespaces declared outside the entity.
1585      */
1586     nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1587     if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1588         return(NULL);
1589 
1590     ret = ctxt->nsTab[nsIndex * 2 + 1];
1591     if (ret[0] == 0)
1592         ret = NULL;
1593     return(ret);
1594 }
1595 
1596 /**
1597  * xmlParserNsLookupSax:
1598  * @ctxt: parser context
1599  * @prefix: namespace prefix
1600  *
1601  * Lookup extra data for the given prefix. This returns data stored
1602  * with xmlParserNsUdpateSax.
1603  *
1604  * Returns the data on success, NULL if no namespace was found.
1605  */
1606 void *
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix)1607 xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1608     xmlHashedString hprefix;
1609     int nsIndex;
1610 
1611     if (prefix == ctxt->str_xml)
1612         return(NULL);
1613 
1614     hprefix.name = prefix;
1615     if (prefix != NULL)
1616         hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1617     else
1618         hprefix.hashValue = 0;
1619     nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1620     if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1621         return(NULL);
1622 
1623     return(ctxt->nsdb->extra[nsIndex].saxData);
1624 }
1625 
1626 /**
1627  * xmlParserNsUpdateSax:
1628  * @ctxt: parser context
1629  * @prefix: namespace prefix
1630  * @saxData: extra data for SAX handler
1631  *
1632  * Sets or updates extra data for the given prefix. This value will be
1633  * returned by xmlParserNsLookupSax as long as the namespace with the
1634  * given prefix is in scope.
1635  *
1636  * Returns the data on success, NULL if no namespace was found.
1637  */
1638 int
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix,void * saxData)1639 xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1640                      void *saxData) {
1641     xmlHashedString hprefix;
1642     int nsIndex;
1643 
1644     if (prefix == ctxt->str_xml)
1645         return(-1);
1646 
1647     hprefix.name = prefix;
1648     if (prefix != NULL)
1649         hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1650     else
1651         hprefix.hashValue = 0;
1652     nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1653     if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1654         return(-1);
1655 
1656     ctxt->nsdb->extra[nsIndex].saxData = saxData;
1657     return(0);
1658 }
1659 
1660 /**
1661  * xmlParserNsGrow:
1662  * @ctxt: parser context
1663  *
1664  * Grows the namespace tables.
1665  *
1666  * Returns 0 on success, -1 if a memory allocation failed.
1667  */
1668 static int
xmlParserNsGrow(xmlParserCtxtPtr ctxt)1669 xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1670     const xmlChar **table;
1671     xmlParserNsExtra *extra;
1672     int newSize;
1673 
1674     if (ctxt->nsMax > INT_MAX / 2)
1675         goto error;
1676     newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1677 
1678     table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1679     if (table == NULL)
1680         goto error;
1681     ctxt->nsTab = table;
1682 
1683     extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1684     if (extra == NULL)
1685         goto error;
1686     ctxt->nsdb->extra = extra;
1687 
1688     ctxt->nsMax = newSize;
1689     return(0);
1690 
1691 error:
1692     xmlErrMemory(ctxt);
1693     return(-1);
1694 }
1695 
1696 /**
1697  * xmlParserNsPush:
1698  * @ctxt: parser context
1699  * @prefix: prefix with hash value
1700  * @uri: uri with hash value
1701  * @saxData: extra data for SAX handler
1702  * @defAttr: whether the namespace comes from a default attribute
1703  *
1704  * Push a new namespace on the table.
1705  *
1706  * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1707  * -1 if a memory allocation failed.
1708  */
1709 static int
xmlParserNsPush(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,const xmlHashedString * uri,void * saxData,int defAttr)1710 xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1711                 const xmlHashedString *uri, void *saxData, int defAttr) {
1712     xmlParserNsBucket *bucket = NULL;
1713     xmlParserNsExtra *extra;
1714     const xmlChar **ns;
1715     unsigned hashValue, nsIndex, oldIndex;
1716 
1717     if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1718         return(0);
1719 
1720     if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1721         xmlErrMemory(ctxt);
1722         return(-1);
1723     }
1724 
1725     /*
1726      * Default namespace and 'xml' namespace
1727      */
1728     if ((prefix == NULL) || (prefix->name == NULL)) {
1729         oldIndex = ctxt->nsdb->defaultNsIndex;
1730 
1731         if (oldIndex != INT_MAX) {
1732             extra = &ctxt->nsdb->extra[oldIndex];
1733 
1734             if (extra->elementId == ctxt->nsdb->elementId) {
1735                 if (defAttr == 0)
1736                     xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1737                 return(0);
1738             }
1739 
1740             if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1741                 (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1742                 return(0);
1743         }
1744 
1745         ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1746         goto populate_entry;
1747     }
1748 
1749     /*
1750      * Hash table lookup
1751      */
1752     oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1753     if (oldIndex != INT_MAX) {
1754         extra = &ctxt->nsdb->extra[oldIndex];
1755 
1756         /*
1757          * Check for duplicate definitions on the same element.
1758          */
1759         if (extra->elementId == ctxt->nsdb->elementId) {
1760             if (defAttr == 0)
1761                 xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1762             return(0);
1763         }
1764 
1765         if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1766             (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1767             return(0);
1768 
1769         bucket->index = ctxt->nsNr;
1770         goto populate_entry;
1771     }
1772 
1773     /*
1774      * Insert new bucket
1775      */
1776 
1777     hashValue = prefix->hashValue;
1778 
1779     /*
1780      * Grow hash table, 50% fill factor
1781      */
1782     if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1783         xmlParserNsBucket *newHash;
1784         unsigned newSize, i, index;
1785 
1786         if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1787             xmlErrMemory(ctxt);
1788             return(-1);
1789         }
1790         newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1791         newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1792         if (newHash == NULL) {
1793             xmlErrMemory(ctxt);
1794             return(-1);
1795         }
1796         memset(newHash, 0, newSize * sizeof(newHash[0]));
1797 
1798         for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1799             unsigned hv = ctxt->nsdb->hash[i].hashValue;
1800             unsigned newIndex;
1801 
1802             if (hv == 0)
1803                 continue;
1804             newIndex = hv & (newSize - 1);
1805 
1806             while (newHash[newIndex].hashValue != 0) {
1807                 newIndex++;
1808                 if (newIndex == newSize)
1809                     newIndex = 0;
1810             }
1811 
1812             newHash[newIndex] = ctxt->nsdb->hash[i];
1813         }
1814 
1815         xmlFree(ctxt->nsdb->hash);
1816         ctxt->nsdb->hash = newHash;
1817         ctxt->nsdb->hashSize = newSize;
1818 
1819         /*
1820          * Relookup
1821          */
1822         index = hashValue & (newSize - 1);
1823 
1824         while (newHash[index].hashValue != 0) {
1825             index++;
1826             if (index == newSize)
1827                 index = 0;
1828         }
1829 
1830         bucket = &newHash[index];
1831     }
1832 
1833     bucket->hashValue = hashValue;
1834     bucket->index = ctxt->nsNr;
1835     ctxt->nsdb->hashElems++;
1836     oldIndex = INT_MAX;
1837 
1838 populate_entry:
1839     nsIndex = ctxt->nsNr;
1840 
1841     ns = &ctxt->nsTab[nsIndex * 2];
1842     ns[0] = prefix ? prefix->name : NULL;
1843     ns[1] = uri->name;
1844 
1845     extra = &ctxt->nsdb->extra[nsIndex];
1846     extra->saxData = saxData;
1847     extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1848     extra->uriHashValue = uri->hashValue;
1849     extra->elementId = ctxt->nsdb->elementId;
1850     extra->oldIndex = oldIndex;
1851 
1852     ctxt->nsNr++;
1853 
1854     return(1);
1855 }
1856 
1857 /**
1858  * xmlParserNsPop:
1859  * @ctxt: an XML parser context
1860  * @nr:  the number to pop
1861  *
1862  * Pops the top @nr namespaces and restores the hash table.
1863  *
1864  * Returns the number of namespaces popped.
1865  */
1866 static int
xmlParserNsPop(xmlParserCtxtPtr ctxt,int nr)1867 xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1868 {
1869     int i;
1870 
1871     /* assert(nr <= ctxt->nsNr); */
1872 
1873     for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1874         const xmlChar *prefix = ctxt->nsTab[i * 2];
1875         xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1876 
1877         if (prefix == NULL) {
1878             ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1879         } else {
1880             xmlHashedString hprefix;
1881             xmlParserNsBucket *bucket = NULL;
1882 
1883             hprefix.name = prefix;
1884             hprefix.hashValue = extra->prefixHashValue;
1885             xmlParserNsLookup(ctxt, &hprefix, &bucket);
1886             /* assert(bucket && bucket->hashValue); */
1887             bucket->index = extra->oldIndex;
1888         }
1889     }
1890 
1891     ctxt->nsNr -= nr;
1892     return(nr);
1893 }
1894 
1895 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1896 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1897     const xmlChar **atts;
1898     unsigned *attallocs;
1899     int maxatts;
1900 
1901     if (nr + 5 > ctxt->maxatts) {
1902 	maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1903 	atts = (const xmlChar **) xmlMalloc(
1904 				     maxatts * sizeof(const xmlChar *));
1905 	if (atts == NULL) goto mem_error;
1906 	attallocs = xmlRealloc(ctxt->attallocs,
1907                                (maxatts / 5) * sizeof(attallocs[0]));
1908 	if (attallocs == NULL) {
1909             xmlFree(atts);
1910             goto mem_error;
1911         }
1912         if (ctxt->maxatts > 0)
1913             memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1914         xmlFree(ctxt->atts);
1915 	ctxt->atts = atts;
1916 	ctxt->attallocs = attallocs;
1917 	ctxt->maxatts = maxatts;
1918     }
1919     return(ctxt->maxatts);
1920 mem_error:
1921     xmlErrMemory(ctxt);
1922     return(-1);
1923 }
1924 
1925 /**
1926  * inputPush:
1927  * @ctxt:  an XML parser context
1928  * @value:  the parser input
1929  *
1930  * Pushes a new parser input on top of the input stack
1931  *
1932  * Returns -1 in case of error, the index in the stack otherwise
1933  */
1934 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1935 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1936 {
1937     if ((ctxt == NULL) || (value == NULL))
1938         return(-1);
1939     if (ctxt->inputNr >= ctxt->inputMax) {
1940         size_t newSize = ctxt->inputMax * 2;
1941         xmlParserInputPtr *tmp;
1942 
1943         tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1944                                                newSize * sizeof(*tmp));
1945         if (tmp == NULL) {
1946             xmlErrMemory(ctxt);
1947             return (-1);
1948         }
1949         ctxt->inputTab = tmp;
1950         ctxt->inputMax = newSize;
1951     }
1952     ctxt->inputTab[ctxt->inputNr] = value;
1953     ctxt->input = value;
1954     return (ctxt->inputNr++);
1955 }
1956 /**
1957  * inputPop:
1958  * @ctxt: an XML parser context
1959  *
1960  * Pops the top parser input from the input stack
1961  *
1962  * Returns the input just removed
1963  */
1964 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1965 inputPop(xmlParserCtxtPtr ctxt)
1966 {
1967     xmlParserInputPtr ret;
1968 
1969     if (ctxt == NULL)
1970         return(NULL);
1971     if (ctxt->inputNr <= 0)
1972         return (NULL);
1973     ctxt->inputNr--;
1974     if (ctxt->inputNr > 0)
1975         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1976     else
1977         ctxt->input = NULL;
1978     ret = ctxt->inputTab[ctxt->inputNr];
1979     ctxt->inputTab[ctxt->inputNr] = NULL;
1980     return (ret);
1981 }
1982 /**
1983  * nodePush:
1984  * @ctxt:  an XML parser context
1985  * @value:  the element node
1986  *
1987  * DEPRECATED: Internal function, do not use.
1988  *
1989  * Pushes a new element node on top of the node stack
1990  *
1991  * Returns -1 in case of error, the index in the stack otherwise
1992  */
1993 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1994 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1995 {
1996     int maxDepth;
1997 
1998     if (ctxt == NULL)
1999         return(0);
2000 
2001     maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2002     if (ctxt->nodeNr > maxDepth) {
2003         xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2004                 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2005                 ctxt->nodeNr);
2006         xmlHaltParser(ctxt);
2007         return(-1);
2008     }
2009     if (ctxt->nodeNr >= ctxt->nodeMax) {
2010         xmlNodePtr *tmp;
2011 
2012 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2013                                       ctxt->nodeMax * 2 *
2014                                       sizeof(ctxt->nodeTab[0]));
2015         if (tmp == NULL) {
2016             xmlErrMemory(ctxt);
2017             return (-1);
2018         }
2019         ctxt->nodeTab = tmp;
2020 	ctxt->nodeMax *= 2;
2021     }
2022     ctxt->nodeTab[ctxt->nodeNr] = value;
2023     ctxt->node = value;
2024     return (ctxt->nodeNr++);
2025 }
2026 
2027 /**
2028  * nodePop:
2029  * @ctxt: an XML parser context
2030  *
2031  * DEPRECATED: Internal function, do not use.
2032  *
2033  * Pops the top element node from the node stack
2034  *
2035  * Returns the node just removed
2036  */
2037 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)2038 nodePop(xmlParserCtxtPtr ctxt)
2039 {
2040     xmlNodePtr ret;
2041 
2042     if (ctxt == NULL) return(NULL);
2043     if (ctxt->nodeNr <= 0)
2044         return (NULL);
2045     ctxt->nodeNr--;
2046     if (ctxt->nodeNr > 0)
2047         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2048     else
2049         ctxt->node = NULL;
2050     ret = ctxt->nodeTab[ctxt->nodeNr];
2051     ctxt->nodeTab[ctxt->nodeNr] = NULL;
2052     return (ret);
2053 }
2054 
2055 /**
2056  * nameNsPush:
2057  * @ctxt:  an XML parser context
2058  * @value:  the element name
2059  * @prefix:  the element prefix
2060  * @URI:  the element namespace name
2061  * @line:  the current line number for error messages
2062  * @nsNr:  the number of namespaces pushed on the namespace table
2063  *
2064  * Pushes a new element name/prefix/URL on top of the name stack
2065  *
2066  * Returns -1 in case of error, the index in the stack otherwise
2067  */
2068 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)2069 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2070            const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2071 {
2072     xmlStartTag *tag;
2073 
2074     if (ctxt->nameNr >= ctxt->nameMax) {
2075         const xmlChar * *tmp;
2076         xmlStartTag *tmp2;
2077         ctxt->nameMax *= 2;
2078         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2079                                     ctxt->nameMax *
2080                                     sizeof(ctxt->nameTab[0]));
2081         if (tmp == NULL) {
2082 	    ctxt->nameMax /= 2;
2083 	    goto mem_error;
2084         }
2085 	ctxt->nameTab = tmp;
2086         tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2087                                     ctxt->nameMax *
2088                                     sizeof(ctxt->pushTab[0]));
2089         if (tmp2 == NULL) {
2090 	    ctxt->nameMax /= 2;
2091 	    goto mem_error;
2092         }
2093 	ctxt->pushTab = tmp2;
2094     } else if (ctxt->pushTab == NULL) {
2095         ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2096                                             sizeof(ctxt->pushTab[0]));
2097         if (ctxt->pushTab == NULL)
2098             goto mem_error;
2099     }
2100     ctxt->nameTab[ctxt->nameNr] = value;
2101     ctxt->name = value;
2102     tag = &ctxt->pushTab[ctxt->nameNr];
2103     tag->prefix = prefix;
2104     tag->URI = URI;
2105     tag->line = line;
2106     tag->nsNr = nsNr;
2107     return (ctxt->nameNr++);
2108 mem_error:
2109     xmlErrMemory(ctxt);
2110     return (-1);
2111 }
2112 #ifdef LIBXML_PUSH_ENABLED
2113 /**
2114  * nameNsPop:
2115  * @ctxt: an XML parser context
2116  *
2117  * Pops the top element/prefix/URI name from the name stack
2118  *
2119  * Returns the name just removed
2120  */
2121 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)2122 nameNsPop(xmlParserCtxtPtr ctxt)
2123 {
2124     const xmlChar *ret;
2125 
2126     if (ctxt->nameNr <= 0)
2127         return (NULL);
2128     ctxt->nameNr--;
2129     if (ctxt->nameNr > 0)
2130         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2131     else
2132         ctxt->name = NULL;
2133     ret = ctxt->nameTab[ctxt->nameNr];
2134     ctxt->nameTab[ctxt->nameNr] = NULL;
2135     return (ret);
2136 }
2137 #endif /* LIBXML_PUSH_ENABLED */
2138 
2139 /**
2140  * namePush:
2141  * @ctxt:  an XML parser context
2142  * @value:  the element name
2143  *
2144  * DEPRECATED: Internal function, do not use.
2145  *
2146  * Pushes a new element name on top of the name stack
2147  *
2148  * Returns -1 in case of error, the index in the stack otherwise
2149  */
2150 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)2151 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2152 {
2153     if (ctxt == NULL) return (-1);
2154 
2155     if (ctxt->nameNr >= ctxt->nameMax) {
2156         const xmlChar * *tmp;
2157         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2158                                     ctxt->nameMax * 2 *
2159                                     sizeof(ctxt->nameTab[0]));
2160         if (tmp == NULL) {
2161 	    goto mem_error;
2162         }
2163 	ctxt->nameTab = tmp;
2164         ctxt->nameMax *= 2;
2165     }
2166     ctxt->nameTab[ctxt->nameNr] = value;
2167     ctxt->name = value;
2168     return (ctxt->nameNr++);
2169 mem_error:
2170     xmlErrMemory(ctxt);
2171     return (-1);
2172 }
2173 
2174 /**
2175  * namePop:
2176  * @ctxt: an XML parser context
2177  *
2178  * DEPRECATED: Internal function, do not use.
2179  *
2180  * Pops the top element name from the name stack
2181  *
2182  * Returns the name just removed
2183  */
2184 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)2185 namePop(xmlParserCtxtPtr ctxt)
2186 {
2187     const xmlChar *ret;
2188 
2189     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2190         return (NULL);
2191     ctxt->nameNr--;
2192     if (ctxt->nameNr > 0)
2193         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2194     else
2195         ctxt->name = NULL;
2196     ret = ctxt->nameTab[ctxt->nameNr];
2197     ctxt->nameTab[ctxt->nameNr] = NULL;
2198     return (ret);
2199 }
2200 
spacePush(xmlParserCtxtPtr ctxt,int val)2201 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2202     if (ctxt->spaceNr >= ctxt->spaceMax) {
2203         int *tmp;
2204 
2205 	ctxt->spaceMax *= 2;
2206         tmp = (int *) xmlRealloc(ctxt->spaceTab,
2207 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2208         if (tmp == NULL) {
2209 	    xmlErrMemory(ctxt);
2210 	    ctxt->spaceMax /=2;
2211 	    return(-1);
2212 	}
2213 	ctxt->spaceTab = tmp;
2214     }
2215     ctxt->spaceTab[ctxt->spaceNr] = val;
2216     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2217     return(ctxt->spaceNr++);
2218 }
2219 
spacePop(xmlParserCtxtPtr ctxt)2220 static int spacePop(xmlParserCtxtPtr ctxt) {
2221     int ret;
2222     if (ctxt->spaceNr <= 0) return(0);
2223     ctxt->spaceNr--;
2224     if (ctxt->spaceNr > 0)
2225 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2226     else
2227         ctxt->space = &ctxt->spaceTab[0];
2228     ret = ctxt->spaceTab[ctxt->spaceNr];
2229     ctxt->spaceTab[ctxt->spaceNr] = -1;
2230     return(ret);
2231 }
2232 
2233 /*
2234  * Macros for accessing the content. Those should be used only by the parser,
2235  * and not exported.
2236  *
2237  * Dirty macros, i.e. one often need to make assumption on the context to
2238  * use them
2239  *
2240  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2241  *           To be used with extreme caution since operations consuming
2242  *           characters may move the input buffer to a different location !
2243  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2244  *           This should be used internally by the parser
2245  *           only to compare to ASCII values otherwise it would break when
2246  *           running with UTF-8 encoding.
2247  *   RAW     same as CUR but in the input buffer, bypass any token
2248  *           extraction that may have been done
2249  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2250  *           to compare on ASCII based substring.
2251  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2252  *           strings without newlines within the parser.
2253  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2254  *           defined char within the parser.
2255  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2256  *
2257  *   NEXT    Skip to the next character, this does the proper decoding
2258  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2259  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2260  *   CUR_CHAR(l) returns the current unicode character (int), set l
2261  *           to the number of xmlChars used for the encoding [0-5].
2262  *   CUR_SCHAR  same but operate on a string instead of the context
2263  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2264  *            the index
2265  *   GROW, SHRINK  handling of input buffers
2266  */
2267 
2268 #define RAW (*ctxt->input->cur)
2269 #define CUR (*ctxt->input->cur)
2270 #define NXT(val) ctxt->input->cur[(val)]
2271 #define CUR_PTR ctxt->input->cur
2272 #define BASE_PTR ctxt->input->base
2273 
2274 #define CMP4( s, c1, c2, c3, c4 ) \
2275   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2276     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2277 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2278   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2279 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2280   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2281 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2282   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2283 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2284   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2285 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2286   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2287     ((unsigned char *) s)[ 8 ] == c9 )
2288 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2289   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2290     ((unsigned char *) s)[ 9 ] == c10 )
2291 
2292 #define SKIP(val) do {							\
2293     ctxt->input->cur += (val),ctxt->input->col+=(val);			\
2294     if (*ctxt->input->cur == 0)						\
2295         xmlParserGrow(ctxt);						\
2296   } while (0)
2297 
2298 #define SKIPL(val) do {							\
2299     int skipl;								\
2300     for(skipl=0; skipl<val; skipl++) {					\
2301 	if (*(ctxt->input->cur) == '\n') {				\
2302 	ctxt->input->line++; ctxt->input->col = 1;			\
2303 	} else ctxt->input->col++;					\
2304 	ctxt->input->cur++;						\
2305     }									\
2306     if (*ctxt->input->cur == 0)						\
2307         xmlParserGrow(ctxt);						\
2308   } while (0)
2309 
2310 #define SHRINK \
2311     if ((!PARSER_PROGRESSIVE(ctxt)) && \
2312         (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2313 	(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2314 	xmlParserShrink(ctxt);
2315 
2316 #define GROW \
2317     if ((!PARSER_PROGRESSIVE(ctxt)) && \
2318         (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2319 	xmlParserGrow(ctxt);
2320 
2321 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2322 
2323 #define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2324 
2325 #define NEXT xmlNextChar(ctxt)
2326 
2327 #define NEXT1 {								\
2328 	ctxt->input->col++;						\
2329 	ctxt->input->cur++;						\
2330 	if (*ctxt->input->cur == 0)					\
2331 	    xmlParserGrow(ctxt);						\
2332     }
2333 
2334 #define NEXTL(l) do {							\
2335     if (*(ctxt->input->cur) == '\n') {					\
2336 	ctxt->input->line++; ctxt->input->col = 1;			\
2337     } else ctxt->input->col++;						\
2338     ctxt->input->cur += l;				\
2339   } while (0)
2340 
2341 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2342 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2343 
2344 #define COPY_BUF(b, i, v)						\
2345     if (v < 0x80) b[i++] = v;						\
2346     else i += xmlCopyCharMultiByte(&b[i],v)
2347 
2348 /**
2349  * xmlSkipBlankChars:
2350  * @ctxt:  the XML parser context
2351  *
2352  * DEPRECATED: Internal function, do not use.
2353  *
2354  * Skip whitespace in the input stream.
2355  *
2356  * Returns the number of space chars skipped
2357  */
2358 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2359 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2360     const xmlChar *cur;
2361     int res = 0;
2362 
2363     /*
2364      * It's Okay to use CUR/NEXT here since all the blanks are on
2365      * the ASCII range.
2366      */
2367     cur = ctxt->input->cur;
2368     while (IS_BLANK_CH(*cur)) {
2369         if (*cur == '\n') {
2370             ctxt->input->line++; ctxt->input->col = 1;
2371         } else {
2372             ctxt->input->col++;
2373         }
2374         cur++;
2375         if (res < INT_MAX)
2376             res++;
2377         if (*cur == 0) {
2378             ctxt->input->cur = cur;
2379             xmlParserGrow(ctxt);
2380             cur = ctxt->input->cur;
2381         }
2382     }
2383     ctxt->input->cur = cur;
2384 
2385     return(res);
2386 }
2387 
2388 static void
xmlPopPE(xmlParserCtxtPtr ctxt)2389 xmlPopPE(xmlParserCtxtPtr ctxt) {
2390     unsigned long consumed;
2391     xmlEntityPtr ent;
2392 
2393     ent = ctxt->input->entity;
2394 
2395     ent->flags &= ~XML_ENT_EXPANDING;
2396 
2397     if ((ent->flags & XML_ENT_CHECKED) == 0) {
2398         int result;
2399 
2400         /*
2401          * Read the rest of the stream in case of errors. We want
2402          * to account for the whole entity size.
2403          */
2404         do {
2405             ctxt->input->cur = ctxt->input->end;
2406             xmlParserShrink(ctxt);
2407             result = xmlParserGrow(ctxt);
2408         } while (result > 0);
2409 
2410         consumed = ctxt->input->consumed;
2411         xmlSaturatedAddSizeT(&consumed,
2412                              ctxt->input->end - ctxt->input->base);
2413 
2414         xmlSaturatedAdd(&ent->expandedSize, consumed);
2415 
2416         /*
2417          * Add to sizeentities when parsing an external entity
2418          * for the first time.
2419          */
2420         if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2421             xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2422         }
2423 
2424         ent->flags |= XML_ENT_CHECKED;
2425     }
2426 
2427     xmlPopInput(ctxt);
2428 
2429     xmlParserEntityCheck(ctxt, ent->expandedSize);
2430 }
2431 
2432 /**
2433  * xmlSkipBlankCharsPE:
2434  * @ctxt:  the XML parser context
2435  *
2436  * Skip whitespace in the input stream, also handling parameter
2437  * entities.
2438  *
2439  * Returns the number of space chars skipped
2440  */
2441 static int
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt)2442 xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2443     int res = 0;
2444     int inParam;
2445     int expandParam;
2446 
2447     inParam = PARSER_IN_PE(ctxt);
2448     expandParam = PARSER_EXTERNAL(ctxt);
2449 
2450     if (!inParam && !expandParam)
2451         return(xmlSkipBlankChars(ctxt));
2452 
2453     while (PARSER_STOPPED(ctxt) == 0) {
2454         if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2455             NEXT;
2456         } else if (CUR == '%') {
2457             if ((expandParam == 0) ||
2458                 (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2459                 break;
2460 
2461             /*
2462              * Expand parameter entity. We continue to consume
2463              * whitespace at the start of the entity and possible
2464              * even consume the whole entity and pop it. We might
2465              * even pop multiple PEs in this loop.
2466              */
2467             xmlParsePEReference(ctxt);
2468 
2469             inParam = PARSER_IN_PE(ctxt);
2470             expandParam = PARSER_EXTERNAL(ctxt);
2471         } else if (CUR == 0) {
2472             if (inParam == 0)
2473                 break;
2474 
2475             xmlPopPE(ctxt);
2476 
2477             inParam = PARSER_IN_PE(ctxt);
2478             expandParam = PARSER_EXTERNAL(ctxt);
2479         } else {
2480             break;
2481         }
2482 
2483         /*
2484          * Also increase the counter when entering or exiting a PERef.
2485          * The spec says: "When a parameter-entity reference is recognized
2486          * in the DTD and included, its replacement text MUST be enlarged
2487          * by the attachment of one leading and one following space (#x20)
2488          * character."
2489          */
2490         if (res < INT_MAX)
2491             res++;
2492     }
2493 
2494     return(res);
2495 }
2496 
2497 /************************************************************************
2498  *									*
2499  *		Commodity functions to handle entities			*
2500  *									*
2501  ************************************************************************/
2502 
2503 /**
2504  * xmlPopInput:
2505  * @ctxt:  an XML parser context
2506  *
2507  * xmlPopInput: the current input pointed by ctxt->input came to an end
2508  *          pop it and return the next char.
2509  *
2510  * Returns the current xmlChar in the parser context
2511  */
2512 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2513 xmlPopInput(xmlParserCtxtPtr ctxt) {
2514     xmlParserInputPtr input;
2515 
2516     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2517     input = inputPop(ctxt);
2518     xmlFreeInputStream(input);
2519     if (*ctxt->input->cur == 0)
2520         xmlParserGrow(ctxt);
2521     return(CUR);
2522 }
2523 
2524 /**
2525  * xmlPushInput:
2526  * @ctxt:  an XML parser context
2527  * @input:  an XML parser input fragment (entity, XML fragment ...).
2528  *
2529  * Push an input stream onto the stack.
2530  *
2531  * This makes the parser use an input returned from advanced functions
2532  * like xmlNewInputURL or xmlNewInputMemory.
2533  *
2534  * Returns -1 in case of error or the index in the input stack
2535  */
2536 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2537 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2538     int maxDepth;
2539     int ret;
2540 
2541     if ((ctxt == NULL) || (input == NULL))
2542         return(-1);
2543 
2544     maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2545     if (ctxt->inputNr > maxDepth) {
2546         xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2547                        "Maximum entity nesting depth exceeded");
2548         xmlHaltParser(ctxt);
2549 	return(-1);
2550     }
2551     ret = inputPush(ctxt, input);
2552     GROW;
2553     return(ret);
2554 }
2555 
2556 /**
2557  * xmlParseCharRef:
2558  * @ctxt:  an XML parser context
2559  *
2560  * DEPRECATED: Internal function, don't use.
2561  *
2562  * Parse a numeric character reference. Always consumes '&'.
2563  *
2564  * [66] CharRef ::= '&#' [0-9]+ ';' |
2565  *                  '&#x' [0-9a-fA-F]+ ';'
2566  *
2567  * [ WFC: Legal Character ]
2568  * Characters referred to using character references must match the
2569  * production for Char.
2570  *
2571  * Returns the value parsed (as an int), 0 in case of error
2572  */
2573 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2574 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2575     int val = 0;
2576     int count = 0;
2577 
2578     /*
2579      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2580      */
2581     if ((RAW == '&') && (NXT(1) == '#') &&
2582         (NXT(2) == 'x')) {
2583 	SKIP(3);
2584 	GROW;
2585 	while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2586 	    if (count++ > 20) {
2587 		count = 0;
2588 		GROW;
2589 	    }
2590 	    if ((RAW >= '0') && (RAW <= '9'))
2591 	        val = val * 16 + (CUR - '0');
2592 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2593 	        val = val * 16 + (CUR - 'a') + 10;
2594 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2595 	        val = val * 16 + (CUR - 'A') + 10;
2596 	    else {
2597 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2598 		val = 0;
2599 		break;
2600 	    }
2601 	    if (val > 0x110000)
2602 	        val = 0x110000;
2603 
2604 	    NEXT;
2605 	    count++;
2606 	}
2607 	if (RAW == ';') {
2608 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2609 	    ctxt->input->col++;
2610 	    ctxt->input->cur++;
2611 	}
2612     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2613 	SKIP(2);
2614 	GROW;
2615 	while (RAW != ';') { /* loop blocked by count */
2616 	    if (count++ > 20) {
2617 		count = 0;
2618 		GROW;
2619 	    }
2620 	    if ((RAW >= '0') && (RAW <= '9'))
2621 	        val = val * 10 + (CUR - '0');
2622 	    else {
2623 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2624 		val = 0;
2625 		break;
2626 	    }
2627 	    if (val > 0x110000)
2628 	        val = 0x110000;
2629 
2630 	    NEXT;
2631 	    count++;
2632 	}
2633 	if (RAW == ';') {
2634 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2635 	    ctxt->input->col++;
2636 	    ctxt->input->cur++;
2637 	}
2638     } else {
2639         if (RAW == '&')
2640             SKIP(1);
2641         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2642     }
2643 
2644     /*
2645      * [ WFC: Legal Character ]
2646      * Characters referred to using character references must match the
2647      * production for Char.
2648      */
2649     if (val >= 0x110000) {
2650         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2651                 "xmlParseCharRef: character reference out of bounds\n",
2652 	        val);
2653     } else if (IS_CHAR(val)) {
2654         return(val);
2655     } else {
2656         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2657                           "xmlParseCharRef: invalid xmlChar value %d\n",
2658 	                  val);
2659     }
2660     return(0);
2661 }
2662 
2663 /**
2664  * xmlParseStringCharRef:
2665  * @ctxt:  an XML parser context
2666  * @str:  a pointer to an index in the string
2667  *
2668  * parse Reference declarations, variant parsing from a string rather
2669  * than an an input flow.
2670  *
2671  * [66] CharRef ::= '&#' [0-9]+ ';' |
2672  *                  '&#x' [0-9a-fA-F]+ ';'
2673  *
2674  * [ WFC: Legal Character ]
2675  * Characters referred to using character references must match the
2676  * production for Char.
2677  *
2678  * Returns the value parsed (as an int), 0 in case of error, str will be
2679  *         updated to the current value of the index
2680  */
2681 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2682 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2683     const xmlChar *ptr;
2684     xmlChar cur;
2685     int val = 0;
2686 
2687     if ((str == NULL) || (*str == NULL)) return(0);
2688     ptr = *str;
2689     cur = *ptr;
2690     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2691 	ptr += 3;
2692 	cur = *ptr;
2693 	while (cur != ';') { /* Non input consuming loop */
2694 	    if ((cur >= '0') && (cur <= '9'))
2695 	        val = val * 16 + (cur - '0');
2696 	    else if ((cur >= 'a') && (cur <= 'f'))
2697 	        val = val * 16 + (cur - 'a') + 10;
2698 	    else if ((cur >= 'A') && (cur <= 'F'))
2699 	        val = val * 16 + (cur - 'A') + 10;
2700 	    else {
2701 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2702 		val = 0;
2703 		break;
2704 	    }
2705 	    if (val > 0x110000)
2706 	        val = 0x110000;
2707 
2708 	    ptr++;
2709 	    cur = *ptr;
2710 	}
2711 	if (cur == ';')
2712 	    ptr++;
2713     } else if  ((cur == '&') && (ptr[1] == '#')){
2714 	ptr += 2;
2715 	cur = *ptr;
2716 	while (cur != ';') { /* Non input consuming loops */
2717 	    if ((cur >= '0') && (cur <= '9'))
2718 	        val = val * 10 + (cur - '0');
2719 	    else {
2720 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2721 		val = 0;
2722 		break;
2723 	    }
2724 	    if (val > 0x110000)
2725 	        val = 0x110000;
2726 
2727 	    ptr++;
2728 	    cur = *ptr;
2729 	}
2730 	if (cur == ';')
2731 	    ptr++;
2732     } else {
2733 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2734 	return(0);
2735     }
2736     *str = ptr;
2737 
2738     /*
2739      * [ WFC: Legal Character ]
2740      * Characters referred to using character references must match the
2741      * production for Char.
2742      */
2743     if (val >= 0x110000) {
2744         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2745                 "xmlParseStringCharRef: character reference out of bounds\n",
2746                 val);
2747     } else if (IS_CHAR(val)) {
2748         return(val);
2749     } else {
2750         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2751 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2752 			  val);
2753     }
2754     return(0);
2755 }
2756 
2757 /**
2758  * xmlParserHandlePEReference:
2759  * @ctxt:  the parser context
2760  *
2761  * DEPRECATED: Internal function, do not use.
2762  *
2763  * [69] PEReference ::= '%' Name ';'
2764  *
2765  * [ WFC: No Recursion ]
2766  * A parsed entity must not contain a recursive
2767  * reference to itself, either directly or indirectly.
2768  *
2769  * [ WFC: Entity Declared ]
2770  * In a document without any DTD, a document with only an internal DTD
2771  * subset which contains no parameter entity references, or a document
2772  * with "standalone='yes'", ...  ... The declaration of a parameter
2773  * entity must precede any reference to it...
2774  *
2775  * [ VC: Entity Declared ]
2776  * In a document with an external subset or external parameter entities
2777  * with "standalone='no'", ...  ... The declaration of a parameter entity
2778  * must precede any reference to it...
2779  *
2780  * [ WFC: In DTD ]
2781  * Parameter-entity references may only appear in the DTD.
2782  * NOTE: misleading but this is handled.
2783  *
2784  * A PEReference may have been detected in the current input stream
2785  * the handling is done accordingly to
2786  *      http://www.w3.org/TR/REC-xml#entproc
2787  * i.e.
2788  *   - Included in literal in entity values
2789  *   - Included as Parameter Entity reference within DTDs
2790  */
2791 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2792 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2793     xmlParsePEReference(ctxt);
2794 }
2795 
2796 /**
2797  * xmlStringLenDecodeEntities:
2798  * @ctxt:  the parser context
2799  * @str:  the input string
2800  * @len: the string length
2801  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2802  * @end:  an end marker xmlChar, 0 if none
2803  * @end2:  an end marker xmlChar, 0 if none
2804  * @end3:  an end marker xmlChar, 0 if none
2805  *
2806  * DEPRECATED: Internal function, don't use.
2807  *
2808  * Returns A newly allocated string with the substitution done. The caller
2809  *      must deallocate it !
2810  */
2811 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what ATTRIBUTE_UNUSED,xmlChar end,xmlChar end2,xmlChar end3)2812 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2813                            int what ATTRIBUTE_UNUSED,
2814                            xmlChar end, xmlChar end2, xmlChar end3) {
2815     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2816         return(NULL);
2817 
2818     if ((str[len] != 0) ||
2819         (end != 0) || (end2 != 0) || (end3 != 0))
2820         return(NULL);
2821 
2822     return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2823 }
2824 
2825 /**
2826  * xmlStringDecodeEntities:
2827  * @ctxt:  the parser context
2828  * @str:  the input string
2829  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2830  * @end:  an end marker xmlChar, 0 if none
2831  * @end2:  an end marker xmlChar, 0 if none
2832  * @end3:  an end marker xmlChar, 0 if none
2833  *
2834  * DEPRECATED: Internal function, don't use.
2835  *
2836  * Returns A newly allocated string with the substitution done. The caller
2837  *      must deallocate it !
2838  */
2839 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what ATTRIBUTE_UNUSED,xmlChar end,xmlChar end2,xmlChar end3)2840 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2841                         int what ATTRIBUTE_UNUSED,
2842 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2843     if ((ctxt == NULL) || (str == NULL))
2844         return(NULL);
2845 
2846     if ((end != 0) || (end2 != 0) || (end3 != 0))
2847         return(NULL);
2848 
2849     return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2850 }
2851 
2852 /************************************************************************
2853  *									*
2854  *		Commodity functions, cleanup needed ?			*
2855  *									*
2856  ************************************************************************/
2857 
2858 /**
2859  * areBlanks:
2860  * @ctxt:  an XML parser context
2861  * @str:  a xmlChar *
2862  * @len:  the size of @str
2863  * @blank_chars: we know the chars are blanks
2864  *
2865  * Is this a sequence of blank chars that one can ignore ?
2866  *
2867  * Returns 1 if ignorable 0 otherwise.
2868  */
2869 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2870 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2871                      int blank_chars) {
2872     int i;
2873     xmlNodePtr lastChild;
2874 
2875     /*
2876      * Don't spend time trying to differentiate them, the same callback is
2877      * used !
2878      */
2879     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2880 	return(0);
2881 
2882     /*
2883      * Check for xml:space value.
2884      */
2885     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2886         (*(ctxt->space) == -2))
2887 	return(0);
2888 
2889     /*
2890      * Check that the string is made of blanks
2891      */
2892     if (blank_chars == 0) {
2893 	for (i = 0;i < len;i++)
2894 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2895     }
2896 
2897     /*
2898      * Look if the element is mixed content in the DTD if available
2899      */
2900     if (ctxt->node == NULL) return(0);
2901     if (ctxt->myDoc != NULL) {
2902         xmlElementPtr elemDecl = NULL;
2903         xmlDocPtr doc = ctxt->myDoc;
2904         const xmlChar *prefix = NULL;
2905 
2906         if (ctxt->node->ns)
2907             prefix = ctxt->node->ns->prefix;
2908         if (doc->intSubset != NULL)
2909             elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2910                                       prefix);
2911         if ((elemDecl == NULL) && (doc->extSubset != NULL))
2912             elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2913                                       prefix);
2914         if (elemDecl != NULL) {
2915             if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2916                 return(1);
2917             if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2918                 (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2919                 return(0);
2920         }
2921     }
2922 
2923     /*
2924      * Otherwise, heuristic :-\
2925      */
2926     if ((RAW != '<') && (RAW != 0xD)) return(0);
2927     if ((ctxt->node->children == NULL) &&
2928 	(RAW == '<') && (NXT(1) == '/')) return(0);
2929 
2930     lastChild = xmlGetLastChild(ctxt->node);
2931     if (lastChild == NULL) {
2932         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2933             (ctxt->node->content != NULL)) return(0);
2934     } else if (xmlNodeIsText(lastChild))
2935         return(0);
2936     else if ((ctxt->node->children != NULL) &&
2937              (xmlNodeIsText(ctxt->node->children)))
2938         return(0);
2939     return(1);
2940 }
2941 
2942 /************************************************************************
2943  *									*
2944  *		Extra stuff for namespace support			*
2945  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2946  *									*
2947  ************************************************************************/
2948 
2949 /**
2950  * xmlSplitQName:
2951  * @ctxt:  an XML parser context
2952  * @name:  an XML parser context
2953  * @prefixOut:  a xmlChar **
2954  *
2955  * parse an UTF8 encoded XML qualified name string
2956  *
2957  * [NS 5] QName ::= (Prefix ':')? LocalPart
2958  *
2959  * [NS 6] Prefix ::= NCName
2960  *
2961  * [NS 7] LocalPart ::= NCName
2962  *
2963  * Returns the local part, and prefix is updated
2964  *   to get the Prefix if any.
2965  */
2966 
2967 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefixOut)2968 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2969     xmlChar buf[XML_MAX_NAMELEN + 5];
2970     xmlChar *buffer = NULL;
2971     int len = 0;
2972     int max = XML_MAX_NAMELEN;
2973     xmlChar *ret = NULL;
2974     xmlChar *prefix;
2975     const xmlChar *cur = name;
2976     int c;
2977 
2978     if (prefixOut == NULL) return(NULL);
2979     *prefixOut = NULL;
2980 
2981     if (cur == NULL) return(NULL);
2982 
2983 #ifndef XML_XML_NAMESPACE
2984     /* xml: prefix is not really a namespace */
2985     if ((cur[0] == 'x') && (cur[1] == 'm') &&
2986         (cur[2] == 'l') && (cur[3] == ':'))
2987 	return(xmlStrdup(name));
2988 #endif
2989 
2990     /* nasty but well=formed */
2991     if (cur[0] == ':')
2992 	return(xmlStrdup(name));
2993 
2994     c = *cur++;
2995     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2996 	buf[len++] = c;
2997 	c = *cur++;
2998     }
2999     if (len >= max) {
3000 	/*
3001 	 * Okay someone managed to make a huge name, so he's ready to pay
3002 	 * for the processing speed.
3003 	 */
3004 	max = len * 2;
3005 
3006 	buffer = (xmlChar *) xmlMallocAtomic(max);
3007 	if (buffer == NULL) {
3008 	    xmlErrMemory(ctxt);
3009 	    return(NULL);
3010 	}
3011 	memcpy(buffer, buf, len);
3012 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3013 	    if (len + 10 > max) {
3014 	        xmlChar *tmp;
3015 
3016 		max *= 2;
3017 		tmp = (xmlChar *) xmlRealloc(buffer, max);
3018 		if (tmp == NULL) {
3019 		    xmlFree(buffer);
3020 		    xmlErrMemory(ctxt);
3021 		    return(NULL);
3022 		}
3023 		buffer = tmp;
3024 	    }
3025 	    buffer[len++] = c;
3026 	    c = *cur++;
3027 	}
3028 	buffer[len] = 0;
3029     }
3030 
3031     if ((c == ':') && (*cur == 0)) {
3032         if (buffer != NULL)
3033 	    xmlFree(buffer);
3034 	return(xmlStrdup(name));
3035     }
3036 
3037     if (buffer == NULL) {
3038 	ret = xmlStrndup(buf, len);
3039         if (ret == NULL) {
3040 	    xmlErrMemory(ctxt);
3041 	    return(NULL);
3042         }
3043     } else {
3044 	ret = buffer;
3045 	buffer = NULL;
3046 	max = XML_MAX_NAMELEN;
3047     }
3048 
3049 
3050     if (c == ':') {
3051 	c = *cur;
3052         prefix = ret;
3053 	if (c == 0) {
3054 	    ret = xmlStrndup(BAD_CAST "", 0);
3055             if (ret == NULL) {
3056                 xmlFree(prefix);
3057                 return(NULL);
3058             }
3059             *prefixOut = prefix;
3060             return(ret);
3061 	}
3062 	len = 0;
3063 
3064 	/*
3065 	 * Check that the first character is proper to start
3066 	 * a new name
3067 	 */
3068 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
3069 	      ((c >= 0x41) && (c <= 0x5A)) ||
3070 	      (c == '_') || (c == ':'))) {
3071 	    int l;
3072 	    int first = CUR_SCHAR(cur, l);
3073 
3074 	    if (!IS_LETTER(first) && (first != '_')) {
3075 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3076 			    "Name %s is not XML Namespace compliant\n",
3077 				  name);
3078 	    }
3079 	}
3080 	cur++;
3081 
3082 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3083 	    buf[len++] = c;
3084 	    c = *cur++;
3085 	}
3086 	if (len >= max) {
3087 	    /*
3088 	     * Okay someone managed to make a huge name, so he's ready to pay
3089 	     * for the processing speed.
3090 	     */
3091 	    max = len * 2;
3092 
3093 	    buffer = (xmlChar *) xmlMallocAtomic(max);
3094 	    if (buffer == NULL) {
3095 	        xmlErrMemory(ctxt);
3096                 xmlFree(prefix);
3097 		return(NULL);
3098 	    }
3099 	    memcpy(buffer, buf, len);
3100 	    while (c != 0) { /* tested bigname2.xml */
3101 		if (len + 10 > max) {
3102 		    xmlChar *tmp;
3103 
3104 		    max *= 2;
3105 		    tmp = (xmlChar *) xmlRealloc(buffer, max);
3106 		    if (tmp == NULL) {
3107 			xmlErrMemory(ctxt);
3108                         xmlFree(prefix);
3109 			xmlFree(buffer);
3110 			return(NULL);
3111 		    }
3112 		    buffer = tmp;
3113 		}
3114 		buffer[len++] = c;
3115 		c = *cur++;
3116 	    }
3117 	    buffer[len] = 0;
3118 	}
3119 
3120 	if (buffer == NULL) {
3121 	    ret = xmlStrndup(buf, len);
3122             if (ret == NULL) {
3123                 xmlFree(prefix);
3124                 return(NULL);
3125             }
3126 	} else {
3127 	    ret = buffer;
3128 	}
3129 
3130         *prefixOut = prefix;
3131     }
3132 
3133     return(ret);
3134 }
3135 
3136 /************************************************************************
3137  *									*
3138  *			The parser itself				*
3139  *	Relates to http://www.w3.org/TR/REC-xml				*
3140  *									*
3141  ************************************************************************/
3142 
3143 /************************************************************************
3144  *									*
3145  *	Routines to parse Name, NCName and NmToken			*
3146  *									*
3147  ************************************************************************/
3148 
3149 /*
3150  * The two following functions are related to the change of accepted
3151  * characters for Name and NmToken in the Revision 5 of XML-1.0
3152  * They correspond to the modified production [4] and the new production [4a]
3153  * changes in that revision. Also note that the macros used for the
3154  * productions Letter, Digit, CombiningChar and Extender are not needed
3155  * anymore.
3156  * We still keep compatibility to pre-revision5 parsing semantic if the
3157  * new XML_PARSE_OLD10 option is given to the parser.
3158  */
3159 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3160 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3161     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3162         /*
3163 	 * Use the new checks of production [4] [4a] amd [5] of the
3164 	 * Update 5 of XML-1.0
3165 	 */
3166 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3167 	    (((c >= 'a') && (c <= 'z')) ||
3168 	     ((c >= 'A') && (c <= 'Z')) ||
3169 	     (c == '_') || (c == ':') ||
3170 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3171 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3172 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3173 	     ((c >= 0x370) && (c <= 0x37D)) ||
3174 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3175 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3176 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3177 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3178 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3179 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3180 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3181 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3182 	    return(1);
3183     } else {
3184         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3185 	    return(1);
3186     }
3187     return(0);
3188 }
3189 
3190 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3191 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3192     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3193         /*
3194 	 * Use the new checks of production [4] [4a] amd [5] of the
3195 	 * Update 5 of XML-1.0
3196 	 */
3197 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3198 	    (((c >= 'a') && (c <= 'z')) ||
3199 	     ((c >= 'A') && (c <= 'Z')) ||
3200 	     ((c >= '0') && (c <= '9')) || /* !start */
3201 	     (c == '_') || (c == ':') ||
3202 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3203 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3204 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3205 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3206 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3207 	     ((c >= 0x370) && (c <= 0x37D)) ||
3208 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3209 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3210 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3211 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3212 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3213 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3214 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3215 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3216 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3217 	     return(1);
3218     } else {
3219         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3220             (c == '.') || (c == '-') ||
3221 	    (c == '_') || (c == ':') ||
3222 	    (IS_COMBINING(c)) ||
3223 	    (IS_EXTENDER(c)))
3224 	    return(1);
3225     }
3226     return(0);
3227 }
3228 
3229 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3230 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3231     const xmlChar *ret;
3232     int len = 0, l;
3233     int c;
3234     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3235                     XML_MAX_TEXT_LENGTH :
3236                     XML_MAX_NAME_LENGTH;
3237 
3238     /*
3239      * Handler for more complex cases
3240      */
3241     c = CUR_CHAR(l);
3242     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3243         /*
3244 	 * Use the new checks of production [4] [4a] amd [5] of the
3245 	 * Update 5 of XML-1.0
3246 	 */
3247 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3248 	    (!(((c >= 'a') && (c <= 'z')) ||
3249 	       ((c >= 'A') && (c <= 'Z')) ||
3250 	       (c == '_') || (c == ':') ||
3251 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3252 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3253 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3254 	       ((c >= 0x370) && (c <= 0x37D)) ||
3255 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3256 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3257 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3258 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3259 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3260 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3261 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3262 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3263 	    return(NULL);
3264 	}
3265 	len += l;
3266 	NEXTL(l);
3267 	c = CUR_CHAR(l);
3268 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3269 	       (((c >= 'a') && (c <= 'z')) ||
3270 	        ((c >= 'A') && (c <= 'Z')) ||
3271 	        ((c >= '0') && (c <= '9')) || /* !start */
3272 	        (c == '_') || (c == ':') ||
3273 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3274 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3275 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3276 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3277 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3278 	        ((c >= 0x370) && (c <= 0x37D)) ||
3279 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3280 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3281 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3282 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3283 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3284 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3285 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3286 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3287 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3288 		)) {
3289             if (len <= INT_MAX - l)
3290 	        len += l;
3291 	    NEXTL(l);
3292 	    c = CUR_CHAR(l);
3293 	}
3294     } else {
3295 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296 	    (!IS_LETTER(c) && (c != '_') &&
3297 	     (c != ':'))) {
3298 	    return(NULL);
3299 	}
3300 	len += l;
3301 	NEXTL(l);
3302 	c = CUR_CHAR(l);
3303 
3304 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3305 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3306 		(c == '.') || (c == '-') ||
3307 		(c == '_') || (c == ':') ||
3308 		(IS_COMBINING(c)) ||
3309 		(IS_EXTENDER(c)))) {
3310             if (len <= INT_MAX - l)
3311 	        len += l;
3312 	    NEXTL(l);
3313 	    c = CUR_CHAR(l);
3314 	}
3315     }
3316     if (len > maxLength) {
3317         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3318         return(NULL);
3319     }
3320     if (ctxt->input->cur - ctxt->input->base < len) {
3321         /*
3322          * There were a couple of bugs where PERefs lead to to a change
3323          * of the buffer. Check the buffer size to avoid passing an invalid
3324          * pointer to xmlDictLookup.
3325          */
3326         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3327                     "unexpected change of input buffer");
3328         return (NULL);
3329     }
3330     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3331         ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3332     else
3333         ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3334     if (ret == NULL)
3335         xmlErrMemory(ctxt);
3336     return(ret);
3337 }
3338 
3339 /**
3340  * xmlParseName:
3341  * @ctxt:  an XML parser context
3342  *
3343  * DEPRECATED: Internal function, don't use.
3344  *
3345  * parse an XML name.
3346  *
3347  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3348  *                  CombiningChar | Extender
3349  *
3350  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3351  *
3352  * [6] Names ::= Name (#x20 Name)*
3353  *
3354  * Returns the Name parsed or NULL
3355  */
3356 
3357 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3358 xmlParseName(xmlParserCtxtPtr ctxt) {
3359     const xmlChar *in;
3360     const xmlChar *ret;
3361     size_t count = 0;
3362     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3363                        XML_MAX_TEXT_LENGTH :
3364                        XML_MAX_NAME_LENGTH;
3365 
3366     GROW;
3367 
3368     /*
3369      * Accelerator for simple ASCII names
3370      */
3371     in = ctxt->input->cur;
3372     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373 	((*in >= 0x41) && (*in <= 0x5A)) ||
3374 	(*in == '_') || (*in == ':')) {
3375 	in++;
3376 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3378 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3379 	       (*in == '_') || (*in == '-') ||
3380 	       (*in == ':') || (*in == '.'))
3381 	    in++;
3382 	if ((*in > 0) && (*in < 0x80)) {
3383 	    count = in - ctxt->input->cur;
3384             if (count > maxLength) {
3385                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3386                 return(NULL);
3387             }
3388 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389 	    ctxt->input->cur = in;
3390 	    ctxt->input->col += count;
3391 	    if (ret == NULL)
3392 	        xmlErrMemory(ctxt);
3393 	    return(ret);
3394 	}
3395     }
3396     /* accelerator for special cases */
3397     return(xmlParseNameComplex(ctxt));
3398 }
3399 
3400 static xmlHashedString
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3401 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3402     xmlHashedString ret;
3403     int len = 0, l;
3404     int c;
3405     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3406                     XML_MAX_TEXT_LENGTH :
3407                     XML_MAX_NAME_LENGTH;
3408     size_t startPosition = 0;
3409 
3410     ret.name = NULL;
3411     ret.hashValue = 0;
3412 
3413     /*
3414      * Handler for more complex cases
3415      */
3416     startPosition = CUR_PTR - BASE_PTR;
3417     c = CUR_CHAR(l);
3418     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420 	return(ret);
3421     }
3422 
3423     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425         if (len <= INT_MAX - l)
3426 	    len += l;
3427 	NEXTL(l);
3428 	c = CUR_CHAR(l);
3429     }
3430     if (len > maxLength) {
3431         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3432         return(ret);
3433     }
3434     ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3435     if (ret.name == NULL)
3436         xmlErrMemory(ctxt);
3437     return(ret);
3438 }
3439 
3440 /**
3441  * xmlParseNCName:
3442  * @ctxt:  an XML parser context
3443  * @len:  length of the string parsed
3444  *
3445  * parse an XML name.
3446  *
3447  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3448  *                      CombiningChar | Extender
3449  *
3450  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3451  *
3452  * Returns the Name parsed or NULL
3453  */
3454 
3455 static xmlHashedString
xmlParseNCName(xmlParserCtxtPtr ctxt)3456 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3457     const xmlChar *in, *e;
3458     xmlHashedString ret;
3459     size_t count = 0;
3460     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3461                        XML_MAX_TEXT_LENGTH :
3462                        XML_MAX_NAME_LENGTH;
3463 
3464     ret.name = NULL;
3465 
3466     /*
3467      * Accelerator for simple ASCII names
3468      */
3469     in = ctxt->input->cur;
3470     e = ctxt->input->end;
3471     if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3472 	 ((*in >= 0x41) && (*in <= 0x5A)) ||
3473 	 (*in == '_')) && (in < e)) {
3474 	in++;
3475 	while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3476 	        ((*in >= 0x41) && (*in <= 0x5A)) ||
3477 	        ((*in >= 0x30) && (*in <= 0x39)) ||
3478 	        (*in == '_') || (*in == '-') ||
3479 	        (*in == '.')) && (in < e))
3480 	    in++;
3481 	if (in >= e)
3482 	    goto complex;
3483 	if ((*in > 0) && (*in < 0x80)) {
3484 	    count = in - ctxt->input->cur;
3485             if (count > maxLength) {
3486                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3487                 return(ret);
3488             }
3489 	    ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3490 	    ctxt->input->cur = in;
3491 	    ctxt->input->col += count;
3492 	    if (ret.name == NULL) {
3493 	        xmlErrMemory(ctxt);
3494 	    }
3495 	    return(ret);
3496 	}
3497     }
3498 complex:
3499     return(xmlParseNCNameComplex(ctxt));
3500 }
3501 
3502 /**
3503  * xmlParseNameAndCompare:
3504  * @ctxt:  an XML parser context
3505  *
3506  * parse an XML name and compares for match
3507  * (specialized for endtag parsing)
3508  *
3509  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3510  * and the name for mismatch
3511  */
3512 
3513 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3514 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3515     register const xmlChar *cmp = other;
3516     register const xmlChar *in;
3517     const xmlChar *ret;
3518 
3519     GROW;
3520 
3521     in = ctxt->input->cur;
3522     while (*in != 0 && *in == *cmp) {
3523 	++in;
3524 	++cmp;
3525     }
3526     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3527 	/* success */
3528 	ctxt->input->col += in - ctxt->input->cur;
3529 	ctxt->input->cur = in;
3530 	return (const xmlChar*) 1;
3531     }
3532     /* failure (or end of input buffer), check with full function */
3533     ret = xmlParseName (ctxt);
3534     /* strings coming from the dictionary direct compare possible */
3535     if (ret == other) {
3536 	return (const xmlChar*) 1;
3537     }
3538     return ret;
3539 }
3540 
3541 /**
3542  * xmlParseStringName:
3543  * @ctxt:  an XML parser context
3544  * @str:  a pointer to the string pointer (IN/OUT)
3545  *
3546  * parse an XML name.
3547  *
3548  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3549  *                  CombiningChar | Extender
3550  *
3551  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3552  *
3553  * [6] Names ::= Name (#x20 Name)*
3554  *
3555  * Returns the Name parsed or NULL. The @str pointer
3556  * is updated to the current location in the string.
3557  */
3558 
3559 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3560 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3561     xmlChar buf[XML_MAX_NAMELEN + 5];
3562     xmlChar *ret;
3563     const xmlChar *cur = *str;
3564     int len = 0, l;
3565     int c;
3566     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3567                     XML_MAX_TEXT_LENGTH :
3568                     XML_MAX_NAME_LENGTH;
3569 
3570     c = CUR_SCHAR(cur, l);
3571     if (!xmlIsNameStartChar(ctxt, c)) {
3572 	return(NULL);
3573     }
3574 
3575     COPY_BUF(buf, len, c);
3576     cur += l;
3577     c = CUR_SCHAR(cur, l);
3578     while (xmlIsNameChar(ctxt, c)) {
3579 	COPY_BUF(buf, len, c);
3580 	cur += l;
3581 	c = CUR_SCHAR(cur, l);
3582 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3583 	    /*
3584 	     * Okay someone managed to make a huge name, so he's ready to pay
3585 	     * for the processing speed.
3586 	     */
3587 	    xmlChar *buffer;
3588 	    int max = len * 2;
3589 
3590 	    buffer = (xmlChar *) xmlMallocAtomic(max);
3591 	    if (buffer == NULL) {
3592 	        xmlErrMemory(ctxt);
3593 		return(NULL);
3594 	    }
3595 	    memcpy(buffer, buf, len);
3596 	    while (xmlIsNameChar(ctxt, c)) {
3597 		if (len + 10 > max) {
3598 		    xmlChar *tmp;
3599 
3600 		    max *= 2;
3601 		    tmp = (xmlChar *) xmlRealloc(buffer, max);
3602 		    if (tmp == NULL) {
3603 			xmlErrMemory(ctxt);
3604 			xmlFree(buffer);
3605 			return(NULL);
3606 		    }
3607 		    buffer = tmp;
3608 		}
3609 		COPY_BUF(buffer, len, c);
3610 		cur += l;
3611 		c = CUR_SCHAR(cur, l);
3612                 if (len > maxLength) {
3613                     xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3614                     xmlFree(buffer);
3615                     return(NULL);
3616                 }
3617 	    }
3618 	    buffer[len] = 0;
3619 	    *str = cur;
3620 	    return(buffer);
3621 	}
3622     }
3623     if (len > maxLength) {
3624         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3625         return(NULL);
3626     }
3627     *str = cur;
3628     ret = xmlStrndup(buf, len);
3629     if (ret == NULL)
3630         xmlErrMemory(ctxt);
3631     return(ret);
3632 }
3633 
3634 /**
3635  * xmlParseNmtoken:
3636  * @ctxt:  an XML parser context
3637  *
3638  * DEPRECATED: Internal function, don't use.
3639  *
3640  * parse an XML Nmtoken.
3641  *
3642  * [7] Nmtoken ::= (NameChar)+
3643  *
3644  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3645  *
3646  * Returns the Nmtoken parsed or NULL
3647  */
3648 
3649 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3650 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3651     xmlChar buf[XML_MAX_NAMELEN + 5];
3652     xmlChar *ret;
3653     int len = 0, l;
3654     int c;
3655     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3656                     XML_MAX_TEXT_LENGTH :
3657                     XML_MAX_NAME_LENGTH;
3658 
3659     c = CUR_CHAR(l);
3660 
3661     while (xmlIsNameChar(ctxt, c)) {
3662 	COPY_BUF(buf, len, c);
3663 	NEXTL(l);
3664 	c = CUR_CHAR(l);
3665 	if (len >= XML_MAX_NAMELEN) {
3666 	    /*
3667 	     * Okay someone managed to make a huge token, so he's ready to pay
3668 	     * for the processing speed.
3669 	     */
3670 	    xmlChar *buffer;
3671 	    int max = len * 2;
3672 
3673 	    buffer = (xmlChar *) xmlMallocAtomic(max);
3674 	    if (buffer == NULL) {
3675 	        xmlErrMemory(ctxt);
3676 		return(NULL);
3677 	    }
3678 	    memcpy(buffer, buf, len);
3679 	    while (xmlIsNameChar(ctxt, c)) {
3680 		if (len + 10 > max) {
3681 		    xmlChar *tmp;
3682 
3683 		    max *= 2;
3684 		    tmp = (xmlChar *) xmlRealloc(buffer, max);
3685 		    if (tmp == NULL) {
3686 			xmlErrMemory(ctxt);
3687 			xmlFree(buffer);
3688 			return(NULL);
3689 		    }
3690 		    buffer = tmp;
3691 		}
3692 		COPY_BUF(buffer, len, c);
3693                 if (len > maxLength) {
3694                     xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3695                     xmlFree(buffer);
3696                     return(NULL);
3697                 }
3698 		NEXTL(l);
3699 		c = CUR_CHAR(l);
3700 	    }
3701 	    buffer[len] = 0;
3702 	    return(buffer);
3703 	}
3704     }
3705     if (len == 0)
3706         return(NULL);
3707     if (len > maxLength) {
3708         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3709         return(NULL);
3710     }
3711     ret = xmlStrndup(buf, len);
3712     if (ret == NULL)
3713         xmlErrMemory(ctxt);
3714     return(ret);
3715 }
3716 
3717 /**
3718  * xmlExpandPEsInEntityValue:
3719  * @ctxt:  parser context
3720  * @buf:  string buffer
3721  * @str:  entity value
3722  * @length:  size of entity value
3723  * @depth:  nesting depth
3724  *
3725  * Validate an entity value and expand parameter entities.
3726  */
3727 static void
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt,xmlSBuf * buf,const xmlChar * str,int length,int depth)3728 xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3729                           const xmlChar *str, int length, int depth) {
3730     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3731     const xmlChar *end, *chunk;
3732     int c, l;
3733 
3734     if (str == NULL)
3735         return;
3736 
3737     depth += 1;
3738     if (depth > maxDepth) {
3739 	xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3740                        "Maximum entity nesting depth exceeded");
3741 	return;
3742     }
3743 
3744     end = str + length;
3745     chunk = str;
3746 
3747     while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3748         c = *str;
3749 
3750         if (c >= 0x80) {
3751             l = xmlUTF8MultibyteLen(ctxt, str,
3752                     "invalid character in entity value\n");
3753             if (l == 0) {
3754                 if (chunk < str)
3755                     xmlSBufAddString(buf, chunk, str - chunk);
3756                 xmlSBufAddReplChar(buf);
3757                 str += 1;
3758                 chunk = str;
3759             } else {
3760                 str += l;
3761             }
3762         } else if (c == '&') {
3763             if (str[1] == '#') {
3764                 if (chunk < str)
3765                     xmlSBufAddString(buf, chunk, str - chunk);
3766 
3767                 c = xmlParseStringCharRef(ctxt, &str);
3768                 if (c == 0)
3769                     return;
3770 
3771                 xmlSBufAddChar(buf, c);
3772 
3773                 chunk = str;
3774             } else {
3775                 xmlChar *name;
3776 
3777                 /*
3778                  * General entity references are checked for
3779                  * syntactic validity.
3780                  */
3781                 str++;
3782                 name = xmlParseStringName(ctxt, &str);
3783 
3784                 if ((name == NULL) || (*str++ != ';')) {
3785                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3786                             "EntityValue: '&' forbidden except for entities "
3787                             "references\n");
3788                     xmlFree(name);
3789                     return;
3790                 }
3791 
3792                 xmlFree(name);
3793             }
3794         } else if (c == '%') {
3795             xmlEntityPtr ent;
3796 
3797             if (chunk < str)
3798                 xmlSBufAddString(buf, chunk, str - chunk);
3799 
3800             ent = xmlParseStringPEReference(ctxt, &str);
3801             if (ent == NULL)
3802                 return;
3803 
3804             if (!PARSER_EXTERNAL(ctxt)) {
3805                 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3806                 return;
3807             }
3808 
3809             if (ent->content == NULL) {
3810                 /*
3811                  * Note: external parsed entities will not be loaded,
3812                  * it is not required for a non-validating parser to
3813                  * complete external PEReferences coming from the
3814                  * internal subset
3815                  */
3816                 if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3817                     ((ctxt->replaceEntities) ||
3818                      (ctxt->validate))) {
3819                     xmlLoadEntityContent(ctxt, ent);
3820                 } else {
3821                     xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3822                                   "not validating will not read content for "
3823                                   "PE entity %s\n", ent->name, NULL);
3824                 }
3825             }
3826 
3827             /*
3828              * TODO: Skip if ent->content is still NULL.
3829              */
3830 
3831             if (xmlParserEntityCheck(ctxt, ent->length))
3832                 return;
3833 
3834             if (ent->flags & XML_ENT_EXPANDING) {
3835                 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3836                 xmlHaltParser(ctxt);
3837                 return;
3838             }
3839 
3840             ent->flags |= XML_ENT_EXPANDING;
3841             xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3842                                       depth);
3843             ent->flags &= ~XML_ENT_EXPANDING;
3844 
3845             chunk = str;
3846         } else {
3847             /* Normal ASCII char */
3848             if (!IS_BYTE_CHAR(c)) {
3849                 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3850                         "invalid character in entity value\n");
3851                 if (chunk < str)
3852                     xmlSBufAddString(buf, chunk, str - chunk);
3853                 xmlSBufAddReplChar(buf);
3854                 str += 1;
3855                 chunk = str;
3856             } else {
3857                 str += 1;
3858             }
3859         }
3860     }
3861 
3862     if (chunk < str)
3863         xmlSBufAddString(buf, chunk, str - chunk);
3864 
3865     return;
3866 }
3867 
3868 /**
3869  * xmlParseEntityValue:
3870  * @ctxt:  an XML parser context
3871  * @orig:  if non-NULL store a copy of the original entity value
3872  *
3873  * DEPRECATED: Internal function, don't use.
3874  *
3875  * parse a value for ENTITY declarations
3876  *
3877  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3878  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3879  *
3880  * Returns the EntityValue parsed with reference substituted or NULL
3881  */
3882 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3883 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3884     unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3885                          XML_MAX_HUGE_LENGTH :
3886                          XML_MAX_TEXT_LENGTH;
3887     xmlSBuf buf;
3888     const xmlChar *start;
3889     int quote, length;
3890 
3891     xmlSBufInit(&buf, maxLength);
3892 
3893     GROW;
3894 
3895     quote = CUR;
3896     if ((quote != '"') && (quote != '\'')) {
3897 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3898 	return(NULL);
3899     }
3900     CUR_PTR++;
3901 
3902     length = 0;
3903 
3904     /*
3905      * Copy raw content of the entity into a buffer
3906      */
3907     while (1) {
3908         int c;
3909 
3910         if (PARSER_STOPPED(ctxt))
3911             goto error;
3912 
3913         if (CUR_PTR >= ctxt->input->end) {
3914             xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3915             goto error;
3916         }
3917 
3918         c = CUR;
3919 
3920         if (c == 0) {
3921             xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3922                     "invalid character in entity value\n");
3923             goto error;
3924         }
3925         if (c == quote)
3926             break;
3927         NEXTL(1);
3928         length += 1;
3929 
3930         /*
3931          * TODO: Check growth threshold
3932          */
3933         if (ctxt->input->end - CUR_PTR < 10)
3934             GROW;
3935     }
3936 
3937     start = CUR_PTR - length;
3938 
3939     if (orig != NULL) {
3940         *orig = xmlStrndup(start, length);
3941         if (*orig == NULL)
3942             xmlErrMemory(ctxt);
3943     }
3944 
3945     xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3946 
3947     NEXTL(1);
3948 
3949     return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3950 
3951 error:
3952     xmlSBufCleanup(&buf, ctxt, "entity length too long");
3953     return(NULL);
3954 }
3955 
3956 /**
3957  * xmlCheckEntityInAttValue:
3958  * @ctxt:  parser context
3959  * @pent:  entity
3960  * @depth:  nesting depth
3961  *
3962  * Check an entity reference in an attribute value for validity
3963  * without expanding it.
3964  */
3965 static void
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt,xmlEntityPtr pent,int depth)3966 xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3967     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3968     const xmlChar *str;
3969     unsigned long expandedSize = pent->length;
3970     int c, flags;
3971 
3972     depth += 1;
3973     if (depth > maxDepth) {
3974 	xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3975                        "Maximum entity nesting depth exceeded");
3976 	return;
3977     }
3978 
3979     if (pent->flags & XML_ENT_EXPANDING) {
3980         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3981         xmlHaltParser(ctxt);
3982         return;
3983     }
3984 
3985     /*
3986      * If we're parsing a default attribute value in DTD content,
3987      * the entity might reference other entities which weren't
3988      * defined yet, so the check isn't reliable.
3989      */
3990     if (ctxt->inSubset == 0)
3991         flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3992     else
3993         flags = XML_ENT_VALIDATED;
3994 
3995     str = pent->content;
3996     if (str == NULL)
3997         goto done;
3998 
3999     /*
4000      * Note that entity values are already validated. We only check
4001      * for illegal less-than signs and compute the expanded size
4002      * of the entity. No special handling for multi-byte characters
4003      * is needed.
4004      */
4005     while (!PARSER_STOPPED(ctxt)) {
4006         c = *str;
4007 
4008 	if (c != '&') {
4009             if (c == 0)
4010                 break;
4011 
4012             if (c == '<')
4013                 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4014                         "'<' in entity '%s' is not allowed in attributes "
4015                         "values\n", pent->name);
4016 
4017             str += 1;
4018         } else if (str[1] == '#') {
4019             int val;
4020 
4021 	    val = xmlParseStringCharRef(ctxt, &str);
4022 	    if (val == 0) {
4023                 pent->content[0] = 0;
4024                 break;
4025             }
4026 	} else {
4027             xmlChar *name;
4028             xmlEntityPtr ent;
4029 
4030 	    name = xmlParseStringEntityRef(ctxt, &str);
4031 	    if (name == NULL) {
4032                 pent->content[0] = 0;
4033                 break;
4034             }
4035 
4036             ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4037             xmlFree(name);
4038 
4039             if ((ent != NULL) &&
4040                 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4041                 if ((ent->flags & flags) != flags) {
4042                     pent->flags |= XML_ENT_EXPANDING;
4043                     xmlCheckEntityInAttValue(ctxt, ent, depth);
4044                     pent->flags &= ~XML_ENT_EXPANDING;
4045                 }
4046 
4047                 xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4048                 xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4049             }
4050         }
4051     }
4052 
4053 done:
4054     if (ctxt->inSubset == 0)
4055         pent->expandedSize = expandedSize;
4056 
4057     pent->flags |= flags;
4058 }
4059 
4060 /**
4061  * xmlExpandEntityInAttValue:
4062  * @ctxt:  parser context
4063  * @buf:  string buffer
4064  * @str:  entity or attribute value
4065  * @pent:  entity for entity value, NULL for attribute values
4066  * @normalize:  whether to collapse whitespace
4067  * @inSpace:  whitespace state
4068  * @depth:  nesting depth
4069  * @check:  whether to check for amplification
4070  *
4071  * Expand general entity references in an entity or attribute value.
4072  * Perform attribute value normalization.
4073  */
4074 static void
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt,xmlSBuf * buf,const xmlChar * str,xmlEntityPtr pent,int normalize,int * inSpace,int depth,int check)4075 xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4076                           const xmlChar *str, xmlEntityPtr pent, int normalize,
4077                           int *inSpace, int depth, int check) {
4078     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4079     int c, chunkSize;
4080 
4081     if (str == NULL)
4082         return;
4083 
4084     depth += 1;
4085     if (depth > maxDepth) {
4086 	xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4087                        "Maximum entity nesting depth exceeded");
4088 	return;
4089     }
4090 
4091     if (pent != NULL) {
4092         if (pent->flags & XML_ENT_EXPANDING) {
4093             xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4094             xmlHaltParser(ctxt);
4095             return;
4096         }
4097 
4098         if (check) {
4099             if (xmlParserEntityCheck(ctxt, pent->length))
4100                 return;
4101         }
4102     }
4103 
4104     chunkSize = 0;
4105 
4106     /*
4107      * Note that entity values are already validated. No special
4108      * handling for multi-byte characters is needed.
4109      */
4110     while (!PARSER_STOPPED(ctxt)) {
4111         c = *str;
4112 
4113 	if (c != '&') {
4114             if (c == 0)
4115                 break;
4116 
4117             /*
4118              * If this function is called without an entity, it is used to
4119              * expand entities in an attribute content where less-than was
4120              * already unscaped and is allowed.
4121              */
4122             if ((pent != NULL) && (c == '<')) {
4123                 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4124                         "'<' in entity '%s' is not allowed in attributes "
4125                         "values\n", pent->name);
4126                 break;
4127             }
4128 
4129             if (c <= 0x20) {
4130                 if ((normalize) && (*inSpace)) {
4131                     /* Skip char */
4132                     if (chunkSize > 0) {
4133                         xmlSBufAddString(buf, str - chunkSize, chunkSize);
4134                         chunkSize = 0;
4135                     }
4136                 } else if (c < 0x20) {
4137                     if (chunkSize > 0) {
4138                         xmlSBufAddString(buf, str - chunkSize, chunkSize);
4139                         chunkSize = 0;
4140                     }
4141 
4142                     xmlSBufAddCString(buf, " ", 1);
4143                 } else {
4144                     chunkSize += 1;
4145                 }
4146 
4147                 *inSpace = 1;
4148             } else {
4149                 chunkSize += 1;
4150                 *inSpace = 0;
4151             }
4152 
4153             str += 1;
4154         } else if (str[1] == '#') {
4155             int val;
4156 
4157             if (chunkSize > 0) {
4158                 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4159                 chunkSize = 0;
4160             }
4161 
4162 	    val = xmlParseStringCharRef(ctxt, &str);
4163 	    if (val == 0) {
4164                 if (pent != NULL)
4165                     pent->content[0] = 0;
4166                 break;
4167             }
4168 
4169             if (val == ' ') {
4170                 if ((!normalize) || (!*inSpace))
4171                     xmlSBufAddCString(buf, " ", 1);
4172                 *inSpace = 1;
4173             } else {
4174                 xmlSBufAddChar(buf, val);
4175                 *inSpace = 0;
4176             }
4177 	} else {
4178             xmlChar *name;
4179             xmlEntityPtr ent;
4180 
4181             if (chunkSize > 0) {
4182                 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4183                 chunkSize = 0;
4184             }
4185 
4186 	    name = xmlParseStringEntityRef(ctxt, &str);
4187             if (name == NULL) {
4188                 if (pent != NULL)
4189                     pent->content[0] = 0;
4190                 break;
4191             }
4192 
4193             ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4194             xmlFree(name);
4195 
4196 	    if ((ent != NULL) &&
4197 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4198 		if (ent->content == NULL) {
4199 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4200 			    "predefined entity has no content\n");
4201                     break;
4202                 }
4203 
4204                 xmlSBufAddString(buf, ent->content, ent->length);
4205 
4206                 *inSpace = 0;
4207 	    } else if ((ent != NULL) && (ent->content != NULL)) {
4208                 if (pent != NULL)
4209                     pent->flags |= XML_ENT_EXPANDING;
4210 		xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4211                                           normalize, inSpace, depth, check);
4212                 if (pent != NULL)
4213                     pent->flags &= ~XML_ENT_EXPANDING;
4214 	    }
4215         }
4216     }
4217 
4218     if (chunkSize > 0)
4219         xmlSBufAddString(buf, str - chunkSize, chunkSize);
4220 
4221     return;
4222 }
4223 
4224 /**
4225  * xmlExpandEntitiesInAttValue:
4226  * @ctxt:  parser context
4227  * @str:  entity or attribute value
4228  * @normalize:  whether to collapse whitespace
4229  *
4230  * Expand general entity references in an entity or attribute value.
4231  * Perform attribute value normalization.
4232  *
4233  * Returns the expanded attribtue value.
4234  */
4235 xmlChar *
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt,const xmlChar * str,int normalize)4236 xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4237                             int normalize) {
4238     unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4239                          XML_MAX_HUGE_LENGTH :
4240                          XML_MAX_TEXT_LENGTH;
4241     xmlSBuf buf;
4242     int inSpace = 1;
4243 
4244     xmlSBufInit(&buf, maxLength);
4245 
4246     xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4247                               ctxt->inputNr, /* check */ 0);
4248 
4249     if ((normalize) && (inSpace) && (buf.size > 0))
4250         buf.size--;
4251 
4252     return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4253 }
4254 
4255 /**
4256  * xmlParseAttValueInternal:
4257  * @ctxt:  an XML parser context
4258  * @len:  attribute len result
4259  * @alloc:  whether the attribute was reallocated as a new string
4260  * @normalize:  if 1 then further non-CDATA normalization must be done
4261  *
4262  * parse a value for an attribute.
4263  * NOTE: if no normalization is needed, the routine will return pointers
4264  *       directly from the data buffer.
4265  *
4266  * 3.3.3 Attribute-Value Normalization:
4267  * Before the value of an attribute is passed to the application or
4268  * checked for validity, the XML processor must normalize it as follows:
4269  * - a character reference is processed by appending the referenced
4270  *   character to the attribute value
4271  * - an entity reference is processed by recursively processing the
4272  *   replacement text of the entity
4273  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4274  *   appending #x20 to the normalized value, except that only a single
4275  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4276  *   parsed entity or the literal entity value of an internal parsed entity
4277  * - other characters are processed by appending them to the normalized value
4278  * If the declared value is not CDATA, then the XML processor must further
4279  * process the normalized attribute value by discarding any leading and
4280  * trailing space (#x20) characters, and by replacing sequences of space
4281  * (#x20) characters by a single space (#x20) character.
4282  * All attributes for which no declaration has been read should be treated
4283  * by a non-validating parser as if declared CDATA.
4284  *
4285  * Returns the AttValue parsed or NULL. The value has to be freed by the
4286  *     caller if it was copied, this can be detected by val[*len] == 0.
4287  */
4288 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * attlen,int * alloc,int normalize)4289 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4290                          int normalize) {
4291     unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4292                          XML_MAX_HUGE_LENGTH :
4293                          XML_MAX_TEXT_LENGTH;
4294     xmlSBuf buf;
4295     xmlChar *ret;
4296     int c, l, quote, flags, chunkSize;
4297     int inSpace = 1;
4298 
4299     xmlSBufInit(&buf, maxLength);
4300 
4301     GROW;
4302 
4303     quote = CUR;
4304     if ((quote != '"') && (quote != '\'')) {
4305 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4306 	return(NULL);
4307     }
4308     NEXTL(1);
4309 
4310     if (ctxt->inSubset == 0)
4311         flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4312     else
4313         flags = XML_ENT_VALIDATED;
4314 
4315     inSpace = 1;
4316     chunkSize = 0;
4317 
4318     while (1) {
4319         if (PARSER_STOPPED(ctxt))
4320             goto error;
4321 
4322         if (CUR_PTR >= ctxt->input->end) {
4323             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4324                            "AttValue: ' expected\n");
4325             goto error;
4326         }
4327 
4328         /*
4329          * TODO: Check growth threshold
4330          */
4331         if (ctxt->input->end - CUR_PTR < 10)
4332             GROW;
4333 
4334         c = CUR;
4335 
4336         if (c >= 0x80) {
4337             l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4338                     "invalid character in attribute value\n");
4339             if (l == 0) {
4340                 if (chunkSize > 0) {
4341                     xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4342                     chunkSize = 0;
4343                 }
4344                 xmlSBufAddReplChar(&buf);
4345                 NEXTL(1);
4346             } else {
4347                 chunkSize += l;
4348                 NEXTL(l);
4349             }
4350 
4351             inSpace = 0;
4352         } else if (c != '&') {
4353             if (c > 0x20) {
4354                 if (c == quote)
4355                     break;
4356 
4357                 if (c == '<')
4358                     xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4359 
4360                 chunkSize += 1;
4361                 inSpace = 0;
4362             } else if (!IS_BYTE_CHAR(c)) {
4363                 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4364                         "invalid character in attribute value\n");
4365                 if (chunkSize > 0) {
4366                     xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4367                     chunkSize = 0;
4368                 }
4369                 xmlSBufAddReplChar(&buf);
4370                 inSpace = 0;
4371             } else {
4372                 /* Whitespace */
4373                 if ((normalize) && (inSpace)) {
4374                     /* Skip char */
4375                     if (chunkSize > 0) {
4376                         xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4377                         chunkSize = 0;
4378                     }
4379                 } else if (c < 0x20) {
4380                     /* Convert to space */
4381                     if (chunkSize > 0) {
4382                         xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4383                         chunkSize = 0;
4384                     }
4385 
4386                     xmlSBufAddCString(&buf, " ", 1);
4387                 } else {
4388                     chunkSize += 1;
4389                 }
4390 
4391                 inSpace = 1;
4392 
4393                 if ((c == 0xD) && (NXT(1) == 0xA))
4394                     CUR_PTR++;
4395             }
4396 
4397             NEXTL(1);
4398         } else if (NXT(1) == '#') {
4399             int val;
4400 
4401             if (chunkSize > 0) {
4402                 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4403                 chunkSize = 0;
4404             }
4405 
4406             val = xmlParseCharRef(ctxt);
4407             if (val == 0)
4408                 goto error;
4409 
4410             if ((val == '&') && (!ctxt->replaceEntities)) {
4411                 /*
4412                  * The reparsing will be done in xmlStringGetNodeList()
4413                  * called by the attribute() function in SAX.c
4414                  */
4415                 xmlSBufAddCString(&buf, "&#38;", 5);
4416                 inSpace = 0;
4417             } else if (val == ' ') {
4418                 if ((!normalize) || (!inSpace))
4419                     xmlSBufAddCString(&buf, " ", 1);
4420                 inSpace = 1;
4421             } else {
4422                 xmlSBufAddChar(&buf, val);
4423                 inSpace = 0;
4424             }
4425         } else {
4426             const xmlChar *name;
4427             xmlEntityPtr ent;
4428 
4429             if (chunkSize > 0) {
4430                 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4431                 chunkSize = 0;
4432             }
4433 
4434             name = xmlParseEntityRefInternal(ctxt);
4435             if (name == NULL) {
4436                 /*
4437                  * Probably a literal '&' which wasn't escaped.
4438                  * TODO: Handle gracefully in recovery mode.
4439                  */
4440                 continue;
4441             }
4442 
4443             ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4444             if (ent == NULL)
4445                 continue;
4446 
4447             if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4448                 if ((ent->content[0] == '&') && (!ctxt->replaceEntities))
4449                     xmlSBufAddCString(&buf, "&#38;", 5);
4450                 else
4451                     xmlSBufAddString(&buf, ent->content, ent->length);
4452                 inSpace = 0;
4453             } else if (ctxt->replaceEntities) {
4454                 xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4455                                           normalize, &inSpace, ctxt->inputNr,
4456                                           /* check */ 1);
4457             } else {
4458                 if ((ent->flags & flags) != flags)
4459                     xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4460 
4461                 if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4462                     ent->content[0] = 0;
4463                     goto error;
4464                 }
4465 
4466                 /*
4467                  * Just output the reference
4468                  */
4469                 xmlSBufAddCString(&buf, "&", 1);
4470                 xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4471                 xmlSBufAddCString(&buf, ";", 1);
4472 
4473                 inSpace = 0;
4474             }
4475 	}
4476     }
4477 
4478     if ((buf.mem == NULL) && (alloc != NULL)) {
4479         ret = (xmlChar *) CUR_PTR - chunkSize;
4480 
4481         if (attlen != NULL)
4482             *attlen = chunkSize;
4483         if ((normalize) && (inSpace) && (chunkSize > 0))
4484             *attlen -= 1;
4485         *alloc = 0;
4486 
4487         /* Report potential error */
4488         xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4489     } else {
4490         if (chunkSize > 0)
4491             xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4492 
4493         if ((normalize) && (inSpace) && (buf.size > 0))
4494             buf.size--;
4495 
4496         ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4497 
4498         if (ret != NULL) {
4499             if (attlen != NULL)
4500                 *attlen = buf.size;
4501             if (alloc != NULL)
4502                 *alloc = 1;
4503         }
4504     }
4505 
4506     NEXTL(1);
4507 
4508     return(ret);
4509 
4510 error:
4511     xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4512     return(NULL);
4513 }
4514 
4515 /**
4516  * xmlParseAttValue:
4517  * @ctxt:  an XML parser context
4518  *
4519  * DEPRECATED: Internal function, don't use.
4520  *
4521  * parse a value for an attribute
4522  * Note: the parser won't do substitution of entities here, this
4523  * will be handled later in xmlStringGetNodeList
4524  *
4525  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4526  *                   "'" ([^<&'] | Reference)* "'"
4527  *
4528  * 3.3.3 Attribute-Value Normalization:
4529  * Before the value of an attribute is passed to the application or
4530  * checked for validity, the XML processor must normalize it as follows:
4531  * - a character reference is processed by appending the referenced
4532  *   character to the attribute value
4533  * - an entity reference is processed by recursively processing the
4534  *   replacement text of the entity
4535  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4536  *   appending #x20 to the normalized value, except that only a single
4537  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4538  *   parsed entity or the literal entity value of an internal parsed entity
4539  * - other characters are processed by appending them to the normalized value
4540  * If the declared value is not CDATA, then the XML processor must further
4541  * process the normalized attribute value by discarding any leading and
4542  * trailing space (#x20) characters, and by replacing sequences of space
4543  * (#x20) characters by a single space (#x20) character.
4544  * All attributes for which no declaration has been read should be treated
4545  * by a non-validating parser as if declared CDATA.
4546  *
4547  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4548  */
4549 
4550 
4551 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4552 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4553     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4554     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4555 }
4556 
4557 /**
4558  * xmlParseSystemLiteral:
4559  * @ctxt:  an XML parser context
4560  *
4561  * DEPRECATED: Internal function, don't use.
4562  *
4563  * parse an XML Literal
4564  *
4565  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4566  *
4567  * Returns the SystemLiteral parsed or NULL
4568  */
4569 
4570 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4571 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4572     xmlChar *buf = NULL;
4573     int len = 0;
4574     int size = XML_PARSER_BUFFER_SIZE;
4575     int cur, l;
4576     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4577                     XML_MAX_TEXT_LENGTH :
4578                     XML_MAX_NAME_LENGTH;
4579     xmlChar stop;
4580 
4581     if (RAW == '"') {
4582         NEXT;
4583 	stop = '"';
4584     } else if (RAW == '\'') {
4585         NEXT;
4586 	stop = '\'';
4587     } else {
4588 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4589 	return(NULL);
4590     }
4591 
4592     buf = (xmlChar *) xmlMallocAtomic(size);
4593     if (buf == NULL) {
4594         xmlErrMemory(ctxt);
4595 	return(NULL);
4596     }
4597     cur = CUR_CHAR(l);
4598     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4599 	if (len + 5 >= size) {
4600 	    xmlChar *tmp;
4601 
4602 	    size *= 2;
4603 	    tmp = (xmlChar *) xmlRealloc(buf, size);
4604 	    if (tmp == NULL) {
4605 	        xmlFree(buf);
4606 		xmlErrMemory(ctxt);
4607 		return(NULL);
4608 	    }
4609 	    buf = tmp;
4610 	}
4611 	COPY_BUF(buf, len, cur);
4612         if (len > maxLength) {
4613             xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4614             xmlFree(buf);
4615             return(NULL);
4616         }
4617 	NEXTL(l);
4618 	cur = CUR_CHAR(l);
4619     }
4620     buf[len] = 0;
4621     if (!IS_CHAR(cur)) {
4622 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4623     } else {
4624 	NEXT;
4625     }
4626     return(buf);
4627 }
4628 
4629 /**
4630  * xmlParsePubidLiteral:
4631  * @ctxt:  an XML parser context
4632  *
4633  * DEPRECATED: Internal function, don't use.
4634  *
4635  * parse an XML public literal
4636  *
4637  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4638  *
4639  * Returns the PubidLiteral parsed or NULL.
4640  */
4641 
4642 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4643 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4644     xmlChar *buf = NULL;
4645     int len = 0;
4646     int size = XML_PARSER_BUFFER_SIZE;
4647     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4648                     XML_MAX_TEXT_LENGTH :
4649                     XML_MAX_NAME_LENGTH;
4650     xmlChar cur;
4651     xmlChar stop;
4652 
4653     if (RAW == '"') {
4654         NEXT;
4655 	stop = '"';
4656     } else if (RAW == '\'') {
4657         NEXT;
4658 	stop = '\'';
4659     } else {
4660 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4661 	return(NULL);
4662     }
4663     buf = (xmlChar *) xmlMallocAtomic(size);
4664     if (buf == NULL) {
4665 	xmlErrMemory(ctxt);
4666 	return(NULL);
4667     }
4668     cur = CUR;
4669     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4670            (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4671 	if (len + 1 >= size) {
4672 	    xmlChar *tmp;
4673 
4674 	    size *= 2;
4675 	    tmp = (xmlChar *) xmlRealloc(buf, size);
4676 	    if (tmp == NULL) {
4677 		xmlErrMemory(ctxt);
4678 		xmlFree(buf);
4679 		return(NULL);
4680 	    }
4681 	    buf = tmp;
4682 	}
4683 	buf[len++] = cur;
4684         if (len > maxLength) {
4685             xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4686             xmlFree(buf);
4687             return(NULL);
4688         }
4689 	NEXT;
4690 	cur = CUR;
4691     }
4692     buf[len] = 0;
4693     if (cur != stop) {
4694 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4695     } else {
4696 	NEXTL(1);
4697     }
4698     return(buf);
4699 }
4700 
4701 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4702 
4703 /*
4704  * used for the test in the inner loop of the char data testing
4705  */
4706 static const unsigned char test_char_data[256] = {
4707     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4708     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4709     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4710     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4711     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4712     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4713     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4714     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4715     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4716     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4717     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4718     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4719     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4720     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4721     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4722     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4723     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4724     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4725     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4726     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4727     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4728     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4729     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4730     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4731     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4732     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4733     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4734     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4735     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4736     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4737     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4738     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4739 };
4740 
4741 /**
4742  * xmlParseCharDataInternal:
4743  * @ctxt:  an XML parser context
4744  * @partial:  buffer may contain partial UTF-8 sequences
4745  *
4746  * Parse character data. Always makes progress if the first char isn't
4747  * '<' or '&'.
4748  *
4749  * The right angle bracket (>) may be represented using the string "&gt;",
4750  * and must, for compatibility, be escaped using "&gt;" or a character
4751  * reference when it appears in the string "]]>" in content, when that
4752  * string is not marking the end of a CDATA section.
4753  *
4754  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4755  */
4756 static void
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt,int partial)4757 xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4758     const xmlChar *in;
4759     int nbchar = 0;
4760     int line = ctxt->input->line;
4761     int col = ctxt->input->col;
4762     int ccol;
4763 
4764     GROW;
4765     /*
4766      * Accelerated common case where input don't need to be
4767      * modified before passing it to the handler.
4768      */
4769     in = ctxt->input->cur;
4770     do {
4771 get_more_space:
4772         while (*in == 0x20) { in++; ctxt->input->col++; }
4773         if (*in == 0xA) {
4774             do {
4775                 ctxt->input->line++; ctxt->input->col = 1;
4776                 in++;
4777             } while (*in == 0xA);
4778             goto get_more_space;
4779         }
4780         if (*in == '<') {
4781             nbchar = in - ctxt->input->cur;
4782             if (nbchar > 0) {
4783                 const xmlChar *tmp = ctxt->input->cur;
4784                 ctxt->input->cur = in;
4785 
4786                 if ((ctxt->sax != NULL) &&
4787                     (ctxt->disableSAX == 0) &&
4788                     (ctxt->sax->ignorableWhitespace !=
4789                      ctxt->sax->characters)) {
4790                     if (areBlanks(ctxt, tmp, nbchar, 1)) {
4791                         if (ctxt->sax->ignorableWhitespace != NULL)
4792                             ctxt->sax->ignorableWhitespace(ctxt->userData,
4793                                                    tmp, nbchar);
4794                     } else {
4795                         if (ctxt->sax->characters != NULL)
4796                             ctxt->sax->characters(ctxt->userData,
4797                                                   tmp, nbchar);
4798                         if (*ctxt->space == -1)
4799                             *ctxt->space = -2;
4800                     }
4801                 } else if ((ctxt->sax != NULL) &&
4802                            (ctxt->disableSAX == 0) &&
4803                            (ctxt->sax->characters != NULL)) {
4804                     ctxt->sax->characters(ctxt->userData,
4805                                           tmp, nbchar);
4806                 }
4807             }
4808             return;
4809         }
4810 
4811 get_more:
4812         ccol = ctxt->input->col;
4813         while (test_char_data[*in]) {
4814             in++;
4815             ccol++;
4816         }
4817         ctxt->input->col = ccol;
4818         if (*in == 0xA) {
4819             do {
4820                 ctxt->input->line++; ctxt->input->col = 1;
4821                 in++;
4822             } while (*in == 0xA);
4823             goto get_more;
4824         }
4825         if (*in == ']') {
4826             if ((in[1] == ']') && (in[2] == '>')) {
4827                 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4828                 ctxt->input->cur = in + 1;
4829                 return;
4830             }
4831             in++;
4832             ctxt->input->col++;
4833             goto get_more;
4834         }
4835         nbchar = in - ctxt->input->cur;
4836         if (nbchar > 0) {
4837             if ((ctxt->sax != NULL) &&
4838                 (ctxt->disableSAX == 0) &&
4839                 (ctxt->sax->ignorableWhitespace !=
4840                  ctxt->sax->characters) &&
4841                 (IS_BLANK_CH(*ctxt->input->cur))) {
4842                 const xmlChar *tmp = ctxt->input->cur;
4843                 ctxt->input->cur = in;
4844 
4845                 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4846                     if (ctxt->sax->ignorableWhitespace != NULL)
4847                         ctxt->sax->ignorableWhitespace(ctxt->userData,
4848                                                        tmp, nbchar);
4849                 } else {
4850                     if (ctxt->sax->characters != NULL)
4851                         ctxt->sax->characters(ctxt->userData,
4852                                               tmp, nbchar);
4853                     if (*ctxt->space == -1)
4854                         *ctxt->space = -2;
4855                 }
4856                 line = ctxt->input->line;
4857                 col = ctxt->input->col;
4858             } else if ((ctxt->sax != NULL) &&
4859                        (ctxt->disableSAX == 0)) {
4860                 if (ctxt->sax->characters != NULL)
4861                     ctxt->sax->characters(ctxt->userData,
4862                                           ctxt->input->cur, nbchar);
4863                 line = ctxt->input->line;
4864                 col = ctxt->input->col;
4865             }
4866         }
4867         ctxt->input->cur = in;
4868         if (*in == 0xD) {
4869             in++;
4870             if (*in == 0xA) {
4871                 ctxt->input->cur = in;
4872                 in++;
4873                 ctxt->input->line++; ctxt->input->col = 1;
4874                 continue; /* while */
4875             }
4876             in--;
4877         }
4878         if (*in == '<') {
4879             return;
4880         }
4881         if (*in == '&') {
4882             return;
4883         }
4884         SHRINK;
4885         GROW;
4886         in = ctxt->input->cur;
4887     } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4888              (*in == 0x09) || (*in == 0x0a));
4889     ctxt->input->line = line;
4890     ctxt->input->col = col;
4891     xmlParseCharDataComplex(ctxt, partial);
4892 }
4893 
4894 /**
4895  * xmlParseCharDataComplex:
4896  * @ctxt:  an XML parser context
4897  * @cdata:  int indicating whether we are within a CDATA section
4898  *
4899  * Always makes progress if the first char isn't '<' or '&'.
4900  *
4901  * parse a CharData section.this is the fallback function
4902  * of xmlParseCharData() when the parsing requires handling
4903  * of non-ASCII characters.
4904  */
4905 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int partial)4906 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4907     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4908     int nbchar = 0;
4909     int cur, l;
4910 
4911     cur = CUR_CHAR(l);
4912     while ((cur != '<') && /* checked */
4913            (cur != '&') &&
4914 	   (IS_CHAR(cur))) {
4915 	if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4916 	    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4917 	}
4918 	COPY_BUF(buf, nbchar, cur);
4919 	/* move current position before possible calling of ctxt->sax->characters */
4920 	NEXTL(l);
4921 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4922 	    buf[nbchar] = 0;
4923 
4924 	    /*
4925 	     * OK the segment is to be consumed as chars.
4926 	     */
4927 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4928 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4929 		    if (ctxt->sax->ignorableWhitespace != NULL)
4930 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4931 			                               buf, nbchar);
4932 		} else {
4933 		    if (ctxt->sax->characters != NULL)
4934 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4935 		    if ((ctxt->sax->characters !=
4936 		         ctxt->sax->ignorableWhitespace) &&
4937 			(*ctxt->space == -1))
4938 			*ctxt->space = -2;
4939 		}
4940 	    }
4941 	    nbchar = 0;
4942             SHRINK;
4943 	}
4944 	cur = CUR_CHAR(l);
4945     }
4946     if (nbchar != 0) {
4947         buf[nbchar] = 0;
4948 	/*
4949 	 * OK the segment is to be consumed as chars.
4950 	 */
4951 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4952 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4953 		if (ctxt->sax->ignorableWhitespace != NULL)
4954 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4955 	    } else {
4956 		if (ctxt->sax->characters != NULL)
4957 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4958 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4959 		    (*ctxt->space == -1))
4960 		    *ctxt->space = -2;
4961 	    }
4962 	}
4963     }
4964     /*
4965      * cur == 0 can mean
4966      *
4967      * - End of buffer.
4968      * - An actual 0 character.
4969      * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4970      */
4971     if (ctxt->input->cur < ctxt->input->end) {
4972         if ((cur == 0) && (CUR != 0)) {
4973             if (partial == 0) {
4974                 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4975                         "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4976                 NEXTL(1);
4977             }
4978         } else if ((cur != '<') && (cur != '&')) {
4979             /* Generate the error and skip the offending character */
4980             xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4981                               "PCDATA invalid Char value %d\n", cur);
4982             NEXTL(l);
4983         }
4984     }
4985 }
4986 
4987 /**
4988  * xmlParseCharData:
4989  * @ctxt:  an XML parser context
4990  * @cdata:  unused
4991  *
4992  * DEPRECATED: Internal function, don't use.
4993  */
4994 void
xmlParseCharData(xmlParserCtxtPtr ctxt,ATTRIBUTE_UNUSED int cdata)4995 xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4996     xmlParseCharDataInternal(ctxt, 0);
4997 }
4998 
4999 /**
5000  * xmlParseExternalID:
5001  * @ctxt:  an XML parser context
5002  * @publicID:  a xmlChar** receiving PubidLiteral
5003  * @strict: indicate whether we should restrict parsing to only
5004  *          production [75], see NOTE below
5005  *
5006  * DEPRECATED: Internal function, don't use.
5007  *
5008  * Parse an External ID or a Public ID
5009  *
5010  * NOTE: Productions [75] and [83] interact badly since [75] can generate
5011  *       'PUBLIC' S PubidLiteral S SystemLiteral
5012  *
5013  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5014  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5015  *
5016  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5017  *
5018  * Returns the function returns SystemLiteral and in the second
5019  *                case publicID receives PubidLiteral, is strict is off
5020  *                it is possible to return NULL and have publicID set.
5021  */
5022 
5023 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)5024 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5025     xmlChar *URI = NULL;
5026 
5027     *publicID = NULL;
5028     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5029         SKIP(6);
5030 	if (SKIP_BLANKS == 0) {
5031 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5032 	                   "Space required after 'SYSTEM'\n");
5033 	}
5034 	URI = xmlParseSystemLiteral(ctxt);
5035 	if (URI == NULL) {
5036 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5037         }
5038     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5039         SKIP(6);
5040 	if (SKIP_BLANKS == 0) {
5041 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5042 		    "Space required after 'PUBLIC'\n");
5043 	}
5044 	*publicID = xmlParsePubidLiteral(ctxt);
5045 	if (*publicID == NULL) {
5046 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5047 	}
5048 	if (strict) {
5049 	    /*
5050 	     * We don't handle [83] so "S SystemLiteral" is required.
5051 	     */
5052 	    if (SKIP_BLANKS == 0) {
5053 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5054 			"Space required after the Public Identifier\n");
5055 	    }
5056 	} else {
5057 	    /*
5058 	     * We handle [83] so we return immediately, if
5059 	     * "S SystemLiteral" is not detected. We skip blanks if no
5060              * system literal was found, but this is harmless since we must
5061              * be at the end of a NotationDecl.
5062 	     */
5063 	    if (SKIP_BLANKS == 0) return(NULL);
5064 	    if ((CUR != '\'') && (CUR != '"')) return(NULL);
5065 	}
5066 	URI = xmlParseSystemLiteral(ctxt);
5067 	if (URI == NULL) {
5068 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5069         }
5070     }
5071     return(URI);
5072 }
5073 
5074 /**
5075  * xmlParseCommentComplex:
5076  * @ctxt:  an XML parser context
5077  * @buf:  the already parsed part of the buffer
5078  * @len:  number of bytes in the buffer
5079  * @size:  allocated size of the buffer
5080  *
5081  * Skip an XML (SGML) comment <!-- .... -->
5082  *  The spec says that "For compatibility, the string "--" (double-hyphen)
5083  *  must not occur within comments. "
5084  * This is the slow routine in case the accelerator for ascii didn't work
5085  *
5086  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5087  */
5088 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)5089 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5090                        size_t len, size_t size) {
5091     int q, ql;
5092     int r, rl;
5093     int cur, l;
5094     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5095                        XML_MAX_HUGE_LENGTH :
5096                        XML_MAX_TEXT_LENGTH;
5097 
5098     if (buf == NULL) {
5099         len = 0;
5100 	size = XML_PARSER_BUFFER_SIZE;
5101 	buf = (xmlChar *) xmlMallocAtomic(size);
5102 	if (buf == NULL) {
5103 	    xmlErrMemory(ctxt);
5104 	    return;
5105 	}
5106     }
5107     q = CUR_CHAR(ql);
5108     if (q == 0)
5109         goto not_terminated;
5110     if (!IS_CHAR(q)) {
5111         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5112                           "xmlParseComment: invalid xmlChar value %d\n",
5113 	                  q);
5114 	xmlFree (buf);
5115 	return;
5116     }
5117     NEXTL(ql);
5118     r = CUR_CHAR(rl);
5119     if (r == 0)
5120         goto not_terminated;
5121     if (!IS_CHAR(r)) {
5122         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5123                           "xmlParseComment: invalid xmlChar value %d\n",
5124 	                  r);
5125 	xmlFree (buf);
5126 	return;
5127     }
5128     NEXTL(rl);
5129     cur = CUR_CHAR(l);
5130     if (cur == 0)
5131         goto not_terminated;
5132     while (IS_CHAR(cur) && /* checked */
5133            ((cur != '>') ||
5134 	    (r != '-') || (q != '-'))) {
5135 	if ((r == '-') && (q == '-')) {
5136 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5137 	}
5138 	if (len + 5 >= size) {
5139 	    xmlChar *new_buf;
5140             size_t new_size;
5141 
5142 	    new_size = size * 2;
5143 	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5144 	    if (new_buf == NULL) {
5145 		xmlFree (buf);
5146 		xmlErrMemory(ctxt);
5147 		return;
5148 	    }
5149 	    buf = new_buf;
5150             size = new_size;
5151 	}
5152 	COPY_BUF(buf, len, q);
5153         if (len > maxLength) {
5154             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5155                          "Comment too big found", NULL);
5156             xmlFree (buf);
5157             return;
5158         }
5159 
5160 	q = r;
5161 	ql = rl;
5162 	r = cur;
5163 	rl = l;
5164 
5165 	NEXTL(l);
5166 	cur = CUR_CHAR(l);
5167 
5168     }
5169     buf[len] = 0;
5170     if (cur == 0) {
5171 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5172 	                     "Comment not terminated \n<!--%.50s\n", buf);
5173     } else if (!IS_CHAR(cur)) {
5174         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5175                           "xmlParseComment: invalid xmlChar value %d\n",
5176 	                  cur);
5177     } else {
5178         NEXT;
5179 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5180 	    (!ctxt->disableSAX))
5181 	    ctxt->sax->comment(ctxt->userData, buf);
5182     }
5183     xmlFree(buf);
5184     return;
5185 not_terminated:
5186     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5187 			 "Comment not terminated\n", NULL);
5188     xmlFree(buf);
5189     return;
5190 }
5191 
5192 /**
5193  * xmlParseComment:
5194  * @ctxt:  an XML parser context
5195  *
5196  * DEPRECATED: Internal function, don't use.
5197  *
5198  * Parse an XML (SGML) comment. Always consumes '<!'.
5199  *
5200  *  The spec says that "For compatibility, the string "--" (double-hyphen)
5201  *  must not occur within comments. "
5202  *
5203  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5204  */
5205 void
xmlParseComment(xmlParserCtxtPtr ctxt)5206 xmlParseComment(xmlParserCtxtPtr ctxt) {
5207     xmlChar *buf = NULL;
5208     size_t size = XML_PARSER_BUFFER_SIZE;
5209     size_t len = 0;
5210     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5211                        XML_MAX_HUGE_LENGTH :
5212                        XML_MAX_TEXT_LENGTH;
5213     const xmlChar *in;
5214     size_t nbchar = 0;
5215     int ccol;
5216 
5217     /*
5218      * Check that there is a comment right here.
5219      */
5220     if ((RAW != '<') || (NXT(1) != '!'))
5221         return;
5222     SKIP(2);
5223     if ((RAW != '-') || (NXT(1) != '-'))
5224         return;
5225     SKIP(2);
5226     GROW;
5227 
5228     /*
5229      * Accelerated common case where input don't need to be
5230      * modified before passing it to the handler.
5231      */
5232     in = ctxt->input->cur;
5233     do {
5234 	if (*in == 0xA) {
5235 	    do {
5236 		ctxt->input->line++; ctxt->input->col = 1;
5237 		in++;
5238 	    } while (*in == 0xA);
5239 	}
5240 get_more:
5241         ccol = ctxt->input->col;
5242 	while (((*in > '-') && (*in <= 0x7F)) ||
5243 	       ((*in >= 0x20) && (*in < '-')) ||
5244 	       (*in == 0x09)) {
5245 		    in++;
5246 		    ccol++;
5247 	}
5248 	ctxt->input->col = ccol;
5249 	if (*in == 0xA) {
5250 	    do {
5251 		ctxt->input->line++; ctxt->input->col = 1;
5252 		in++;
5253 	    } while (*in == 0xA);
5254 	    goto get_more;
5255 	}
5256 	nbchar = in - ctxt->input->cur;
5257 	/*
5258 	 * save current set of data
5259 	 */
5260 	if (nbchar > 0) {
5261             if (buf == NULL) {
5262                 if ((*in == '-') && (in[1] == '-'))
5263                     size = nbchar + 1;
5264                 else
5265                     size = XML_PARSER_BUFFER_SIZE + nbchar;
5266                 buf = (xmlChar *) xmlMallocAtomic(size);
5267                 if (buf == NULL) {
5268                     xmlErrMemory(ctxt);
5269                     return;
5270                 }
5271                 len = 0;
5272             } else if (len + nbchar + 1 >= size) {
5273                 xmlChar *new_buf;
5274                 size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5275                 new_buf = (xmlChar *) xmlRealloc(buf, size);
5276                 if (new_buf == NULL) {
5277                     xmlFree (buf);
5278                     xmlErrMemory(ctxt);
5279                     return;
5280                 }
5281                 buf = new_buf;
5282             }
5283             memcpy(&buf[len], ctxt->input->cur, nbchar);
5284             len += nbchar;
5285             buf[len] = 0;
5286 	}
5287         if (len > maxLength) {
5288             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5289                          "Comment too big found", NULL);
5290             xmlFree (buf);
5291             return;
5292         }
5293 	ctxt->input->cur = in;
5294 	if (*in == 0xA) {
5295 	    in++;
5296 	    ctxt->input->line++; ctxt->input->col = 1;
5297 	}
5298 	if (*in == 0xD) {
5299 	    in++;
5300 	    if (*in == 0xA) {
5301 		ctxt->input->cur = in;
5302 		in++;
5303 		ctxt->input->line++; ctxt->input->col = 1;
5304 		goto get_more;
5305 	    }
5306 	    in--;
5307 	}
5308 	SHRINK;
5309 	GROW;
5310 	in = ctxt->input->cur;
5311 	if (*in == '-') {
5312 	    if (in[1] == '-') {
5313 	        if (in[2] == '>') {
5314 		    SKIP(3);
5315 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5316 		        (!ctxt->disableSAX)) {
5317 			if (buf != NULL)
5318 			    ctxt->sax->comment(ctxt->userData, buf);
5319 			else
5320 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5321 		    }
5322 		    if (buf != NULL)
5323 		        xmlFree(buf);
5324 		    return;
5325 		}
5326 		if (buf != NULL) {
5327 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5328 		                      "Double hyphen within comment: "
5329                                       "<!--%.50s\n",
5330 				      buf);
5331 		} else
5332 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5333 		                      "Double hyphen within comment\n", NULL);
5334 		in++;
5335 		ctxt->input->col++;
5336 	    }
5337 	    in++;
5338 	    ctxt->input->col++;
5339 	    goto get_more;
5340 	}
5341     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5342     xmlParseCommentComplex(ctxt, buf, len, size);
5343     return;
5344 }
5345 
5346 
5347 /**
5348  * xmlParsePITarget:
5349  * @ctxt:  an XML parser context
5350  *
5351  * DEPRECATED: Internal function, don't use.
5352  *
5353  * parse the name of a PI
5354  *
5355  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5356  *
5357  * Returns the PITarget name or NULL
5358  */
5359 
5360 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5361 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5362     const xmlChar *name;
5363 
5364     name = xmlParseName(ctxt);
5365     if ((name != NULL) &&
5366         ((name[0] == 'x') || (name[0] == 'X')) &&
5367         ((name[1] == 'm') || (name[1] == 'M')) &&
5368         ((name[2] == 'l') || (name[2] == 'L'))) {
5369 	int i;
5370 	if ((name[0] == 'x') && (name[1] == 'm') &&
5371 	    (name[2] == 'l') && (name[3] == 0)) {
5372 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5373 		 "XML declaration allowed only at the start of the document\n");
5374 	    return(name);
5375 	} else if (name[3] == 0) {
5376 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5377 	    return(name);
5378 	}
5379 	for (i = 0;;i++) {
5380 	    if (xmlW3CPIs[i] == NULL) break;
5381 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5382 	        return(name);
5383 	}
5384 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5385 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5386 		      NULL, NULL);
5387     }
5388     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5389 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5390 		 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5391     }
5392     return(name);
5393 }
5394 
5395 #ifdef LIBXML_CATALOG_ENABLED
5396 /**
5397  * xmlParseCatalogPI:
5398  * @ctxt:  an XML parser context
5399  * @catalog:  the PI value string
5400  *
5401  * parse an XML Catalog Processing Instruction.
5402  *
5403  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5404  *
5405  * Occurs only if allowed by the user and if happening in the Misc
5406  * part of the document before any doctype information
5407  * This will add the given catalog to the parsing context in order
5408  * to be used if there is a resolution need further down in the document
5409  */
5410 
5411 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5412 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5413     xmlChar *URL = NULL;
5414     const xmlChar *tmp, *base;
5415     xmlChar marker;
5416 
5417     tmp = catalog;
5418     while (IS_BLANK_CH(*tmp)) tmp++;
5419     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5420 	goto error;
5421     tmp += 7;
5422     while (IS_BLANK_CH(*tmp)) tmp++;
5423     if (*tmp != '=') {
5424 	return;
5425     }
5426     tmp++;
5427     while (IS_BLANK_CH(*tmp)) tmp++;
5428     marker = *tmp;
5429     if ((marker != '\'') && (marker != '"'))
5430 	goto error;
5431     tmp++;
5432     base = tmp;
5433     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5434     if (*tmp == 0)
5435 	goto error;
5436     URL = xmlStrndup(base, tmp - base);
5437     tmp++;
5438     while (IS_BLANK_CH(*tmp)) tmp++;
5439     if (*tmp != 0)
5440 	goto error;
5441 
5442     if (URL != NULL) {
5443         /*
5444          * Unfortunately, the catalog API doesn't report OOM errors.
5445          * xmlGetLastError isn't very helpful since we don't know
5446          * where the last error came from. We'd have to reset it
5447          * before this call and restore it afterwards.
5448          */
5449 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5450 	xmlFree(URL);
5451     }
5452     return;
5453 
5454 error:
5455     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5456 	          "Catalog PI syntax error: %s\n",
5457 		  catalog, NULL);
5458     if (URL != NULL)
5459 	xmlFree(URL);
5460 }
5461 #endif
5462 
5463 /**
5464  * xmlParsePI:
5465  * @ctxt:  an XML parser context
5466  *
5467  * DEPRECATED: Internal function, don't use.
5468  *
5469  * parse an XML Processing Instruction.
5470  *
5471  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5472  *
5473  * The processing is transferred to SAX once parsed.
5474  */
5475 
5476 void
xmlParsePI(xmlParserCtxtPtr ctxt)5477 xmlParsePI(xmlParserCtxtPtr ctxt) {
5478     xmlChar *buf = NULL;
5479     size_t len = 0;
5480     size_t size = XML_PARSER_BUFFER_SIZE;
5481     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5482                        XML_MAX_HUGE_LENGTH :
5483                        XML_MAX_TEXT_LENGTH;
5484     int cur, l;
5485     const xmlChar *target;
5486 
5487     if ((RAW == '<') && (NXT(1) == '?')) {
5488 	/*
5489 	 * this is a Processing Instruction.
5490 	 */
5491 	SKIP(2);
5492 
5493 	/*
5494 	 * Parse the target name and check for special support like
5495 	 * namespace.
5496 	 */
5497         target = xmlParsePITarget(ctxt);
5498 	if (target != NULL) {
5499 	    if ((RAW == '?') && (NXT(1) == '>')) {
5500 		SKIP(2);
5501 
5502 		/*
5503 		 * SAX: PI detected.
5504 		 */
5505 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5506 		    (ctxt->sax->processingInstruction != NULL))
5507 		    ctxt->sax->processingInstruction(ctxt->userData,
5508 		                                     target, NULL);
5509 		return;
5510 	    }
5511 	    buf = (xmlChar *) xmlMallocAtomic(size);
5512 	    if (buf == NULL) {
5513 		xmlErrMemory(ctxt);
5514 		return;
5515 	    }
5516 	    if (SKIP_BLANKS == 0) {
5517 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5518 			  "ParsePI: PI %s space expected\n", target);
5519 	    }
5520 	    cur = CUR_CHAR(l);
5521 	    while (IS_CHAR(cur) && /* checked */
5522 		   ((cur != '?') || (NXT(1) != '>'))) {
5523 		if (len + 5 >= size) {
5524 		    xmlChar *tmp;
5525                     size_t new_size = size * 2;
5526 		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5527 		    if (tmp == NULL) {
5528 			xmlErrMemory(ctxt);
5529 			xmlFree(buf);
5530 			return;
5531 		    }
5532 		    buf = tmp;
5533                     size = new_size;
5534 		}
5535 		COPY_BUF(buf, len, cur);
5536                 if (len > maxLength) {
5537                     xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5538                                       "PI %s too big found", target);
5539                     xmlFree(buf);
5540                     return;
5541                 }
5542 		NEXTL(l);
5543 		cur = CUR_CHAR(l);
5544 	    }
5545 	    buf[len] = 0;
5546 	    if (cur != '?') {
5547 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5548 		      "ParsePI: PI %s never end ...\n", target);
5549 	    } else {
5550 		SKIP(2);
5551 
5552 #ifdef LIBXML_CATALOG_ENABLED
5553 		if ((ctxt->inSubset == 0) &&
5554 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5555 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5556 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5557 			(allow == XML_CATA_ALLOW_ALL))
5558 			xmlParseCatalogPI(ctxt, buf);
5559 		}
5560 #endif
5561 
5562 
5563 		/*
5564 		 * SAX: PI detected.
5565 		 */
5566 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5567 		    (ctxt->sax->processingInstruction != NULL))
5568 		    ctxt->sax->processingInstruction(ctxt->userData,
5569 		                                     target, buf);
5570 	    }
5571 	    xmlFree(buf);
5572 	} else {
5573 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5574 	}
5575     }
5576 }
5577 
5578 /**
5579  * xmlParseNotationDecl:
5580  * @ctxt:  an XML parser context
5581  *
5582  * DEPRECATED: Internal function, don't use.
5583  *
5584  * Parse a notation declaration. Always consumes '<!'.
5585  *
5586  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5587  *
5588  * Hence there is actually 3 choices:
5589  *     'PUBLIC' S PubidLiteral
5590  *     'PUBLIC' S PubidLiteral S SystemLiteral
5591  * and 'SYSTEM' S SystemLiteral
5592  *
5593  * See the NOTE on xmlParseExternalID().
5594  */
5595 
5596 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5597 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5598     const xmlChar *name;
5599     xmlChar *Pubid;
5600     xmlChar *Systemid;
5601 
5602     if ((CUR != '<') || (NXT(1) != '!'))
5603         return;
5604     SKIP(2);
5605 
5606     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5607 	int inputid = ctxt->input->id;
5608 	SKIP(8);
5609 	if (SKIP_BLANKS_PE == 0) {
5610 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5611 			   "Space required after '<!NOTATION'\n");
5612 	    return;
5613 	}
5614 
5615         name = xmlParseName(ctxt);
5616 	if (name == NULL) {
5617 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5618 	    return;
5619 	}
5620 	if (xmlStrchr(name, ':') != NULL) {
5621 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5622 		     "colons are forbidden from notation names '%s'\n",
5623 		     name, NULL, NULL);
5624 	}
5625 	if (SKIP_BLANKS_PE == 0) {
5626 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5627 		     "Space required after the NOTATION name'\n");
5628 	    return;
5629 	}
5630 
5631 	/*
5632 	 * Parse the IDs.
5633 	 */
5634 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5635 	SKIP_BLANKS_PE;
5636 
5637 	if (RAW == '>') {
5638 	    if (inputid != ctxt->input->id) {
5639 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5640 	                       "Notation declaration doesn't start and stop"
5641                                " in the same entity\n");
5642 	    }
5643 	    NEXT;
5644 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5645 		(ctxt->sax->notationDecl != NULL))
5646 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5647 	} else {
5648 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5649 	}
5650 	if (Systemid != NULL) xmlFree(Systemid);
5651 	if (Pubid != NULL) xmlFree(Pubid);
5652     }
5653 }
5654 
5655 /**
5656  * xmlParseEntityDecl:
5657  * @ctxt:  an XML parser context
5658  *
5659  * DEPRECATED: Internal function, don't use.
5660  *
5661  * Parse an entity declaration. Always consumes '<!'.
5662  *
5663  * [70] EntityDecl ::= GEDecl | PEDecl
5664  *
5665  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5666  *
5667  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5668  *
5669  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5670  *
5671  * [74] PEDef ::= EntityValue | ExternalID
5672  *
5673  * [76] NDataDecl ::= S 'NDATA' S Name
5674  *
5675  * [ VC: Notation Declared ]
5676  * The Name must match the declared name of a notation.
5677  */
5678 
5679 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5680 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5681     const xmlChar *name = NULL;
5682     xmlChar *value = NULL;
5683     xmlChar *URI = NULL, *literal = NULL;
5684     const xmlChar *ndata = NULL;
5685     int isParameter = 0;
5686     xmlChar *orig = NULL;
5687 
5688     if ((CUR != '<') || (NXT(1) != '!'))
5689         return;
5690     SKIP(2);
5691 
5692     /* GROW; done in the caller */
5693     if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5694 	int inputid = ctxt->input->id;
5695 	SKIP(6);
5696 	if (SKIP_BLANKS_PE == 0) {
5697 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5698 			   "Space required after '<!ENTITY'\n");
5699 	}
5700 
5701 	if (RAW == '%') {
5702 	    NEXT;
5703 	    if (SKIP_BLANKS_PE == 0) {
5704 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5705 			       "Space required after '%%'\n");
5706 	    }
5707 	    isParameter = 1;
5708 	}
5709 
5710         name = xmlParseName(ctxt);
5711 	if (name == NULL) {
5712 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5713 	                   "xmlParseEntityDecl: no name\n");
5714             return;
5715 	}
5716 	if (xmlStrchr(name, ':') != NULL) {
5717 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5718 		     "colons are forbidden from entities names '%s'\n",
5719 		     name, NULL, NULL);
5720 	}
5721 	if (SKIP_BLANKS_PE == 0) {
5722 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5723 			   "Space required after the entity name\n");
5724 	}
5725 
5726 	/*
5727 	 * handle the various case of definitions...
5728 	 */
5729 	if (isParameter) {
5730 	    if ((RAW == '"') || (RAW == '\'')) {
5731 	        value = xmlParseEntityValue(ctxt, &orig);
5732 		if (value) {
5733 		    if ((ctxt->sax != NULL) &&
5734 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5735 			ctxt->sax->entityDecl(ctxt->userData, name,
5736 		                    XML_INTERNAL_PARAMETER_ENTITY,
5737 				    NULL, NULL, value);
5738 		}
5739 	    } else {
5740 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5741 		if ((URI == NULL) && (literal == NULL)) {
5742 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5743 		}
5744 		if (URI) {
5745 		    xmlURIPtr uri;
5746 
5747                     if (xmlParseURISafe((const char *) URI, &uri) < 0) {
5748                         xmlErrMemory(ctxt);
5749                     } else if (uri == NULL) {
5750                         /*
5751                          * This really ought to be a well formedness error
5752                          * but the XML Core WG decided otherwise c.f. issue
5753                          * E26 of the XML erratas.
5754                          */
5755                         xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5756                                      "Invalid URI: %s\n", URI);
5757                     } else if (uri->fragment != NULL) {
5758                         /*
5759                          * Okay this is foolish to block those but not
5760                          * invalid URIs.
5761                          */
5762                         xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5763                     } else {
5764                         if ((ctxt->sax != NULL) &&
5765                             (!ctxt->disableSAX) &&
5766                             (ctxt->sax->entityDecl != NULL))
5767                             ctxt->sax->entityDecl(ctxt->userData, name,
5768                                         XML_EXTERNAL_PARAMETER_ENTITY,
5769                                         literal, URI, NULL);
5770                     }
5771 		    xmlFreeURI(uri);
5772 		}
5773 	    }
5774 	} else {
5775 	    if ((RAW == '"') || (RAW == '\'')) {
5776 	        value = xmlParseEntityValue(ctxt, &orig);
5777 		if ((ctxt->sax != NULL) &&
5778 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5779 		    ctxt->sax->entityDecl(ctxt->userData, name,
5780 				XML_INTERNAL_GENERAL_ENTITY,
5781 				NULL, NULL, value);
5782 		/*
5783 		 * For expat compatibility in SAX mode.
5784 		 */
5785 		if ((ctxt->myDoc == NULL) ||
5786 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5787 		    if (ctxt->myDoc == NULL) {
5788 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5789 			if (ctxt->myDoc == NULL) {
5790 			    xmlErrMemory(ctxt);
5791 			    goto done;
5792 			}
5793 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5794 		    }
5795 		    if (ctxt->myDoc->intSubset == NULL) {
5796 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5797 					    BAD_CAST "fake", NULL, NULL);
5798                         if (ctxt->myDoc->intSubset == NULL) {
5799                             xmlErrMemory(ctxt);
5800                             goto done;
5801                         }
5802                     }
5803 
5804 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5805 			              NULL, NULL, value);
5806 		}
5807 	    } else {
5808 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5809 		if ((URI == NULL) && (literal == NULL)) {
5810 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5811 		}
5812 		if (URI) {
5813 		    xmlURIPtr uri;
5814 
5815                     if (xmlParseURISafe((const char *) URI, &uri) < 0) {
5816                         xmlErrMemory(ctxt);
5817                     } else if (uri == NULL) {
5818                         /*
5819                          * This really ought to be a well formedness error
5820                          * but the XML Core WG decided otherwise c.f. issue
5821                          * E26 of the XML erratas.
5822                          */
5823                         xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5824                                      "Invalid URI: %s\n", URI);
5825                     } else if (uri->fragment != NULL) {
5826                         /*
5827                          * Okay this is foolish to block those but not
5828                          * invalid URIs.
5829                          */
5830                         xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5831                     }
5832                     xmlFreeURI(uri);
5833 		}
5834 		if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5835 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5836 				   "Space required before 'NDATA'\n");
5837 		}
5838 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5839 		    SKIP(5);
5840 		    if (SKIP_BLANKS_PE == 0) {
5841 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5842 				       "Space required after 'NDATA'\n");
5843 		    }
5844 		    ndata = xmlParseName(ctxt);
5845 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5846 		        (ctxt->sax->unparsedEntityDecl != NULL))
5847 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5848 				    literal, URI, ndata);
5849 		} else {
5850 		    if ((ctxt->sax != NULL) &&
5851 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5852 			ctxt->sax->entityDecl(ctxt->userData, name,
5853 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5854 				    literal, URI, NULL);
5855 		    /*
5856 		     * For expat compatibility in SAX mode.
5857 		     * assuming the entity replacement was asked for
5858 		     */
5859 		    if ((ctxt->replaceEntities != 0) &&
5860 			((ctxt->myDoc == NULL) ||
5861 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5862 			if (ctxt->myDoc == NULL) {
5863 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5864 			    if (ctxt->myDoc == NULL) {
5865 			        xmlErrMemory(ctxt);
5866 				goto done;
5867 			    }
5868 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5869 			}
5870 
5871 			if (ctxt->myDoc->intSubset == NULL) {
5872 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5873 						BAD_CAST "fake", NULL, NULL);
5874                             if (ctxt->myDoc->intSubset == NULL) {
5875                                 xmlErrMemory(ctxt);
5876                                 goto done;
5877                             }
5878                         }
5879 			xmlSAX2EntityDecl(ctxt, name,
5880 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5881 				          literal, URI, NULL);
5882 		    }
5883 		}
5884 	    }
5885 	}
5886 	SKIP_BLANKS_PE;
5887 	if (RAW != '>') {
5888 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5889 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5890 	    xmlHaltParser(ctxt);
5891 	} else {
5892 	    if (inputid != ctxt->input->id) {
5893 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5894 	                       "Entity declaration doesn't start and stop in"
5895                                " the same entity\n");
5896 	    }
5897 	    NEXT;
5898 	}
5899 	if (orig != NULL) {
5900 	    /*
5901 	     * Ugly mechanism to save the raw entity value.
5902 	     */
5903 	    xmlEntityPtr cur = NULL;
5904 
5905 	    if (isParameter) {
5906 	        if ((ctxt->sax != NULL) &&
5907 		    (ctxt->sax->getParameterEntity != NULL))
5908 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5909 	    } else {
5910 	        if ((ctxt->sax != NULL) &&
5911 		    (ctxt->sax->getEntity != NULL))
5912 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5913 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5914 		    cur = xmlSAX2GetEntity(ctxt, name);
5915 		}
5916 	    }
5917             if ((cur != NULL) && (cur->orig == NULL)) {
5918 		cur->orig = orig;
5919                 orig = NULL;
5920 	    }
5921 	}
5922 
5923 done:
5924 	if (value != NULL) xmlFree(value);
5925 	if (URI != NULL) xmlFree(URI);
5926 	if (literal != NULL) xmlFree(literal);
5927         if (orig != NULL) xmlFree(orig);
5928     }
5929 }
5930 
5931 /**
5932  * xmlParseDefaultDecl:
5933  * @ctxt:  an XML parser context
5934  * @value:  Receive a possible fixed default value for the attribute
5935  *
5936  * DEPRECATED: Internal function, don't use.
5937  *
5938  * Parse an attribute default declaration
5939  *
5940  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5941  *
5942  * [ VC: Required Attribute ]
5943  * if the default declaration is the keyword #REQUIRED, then the
5944  * attribute must be specified for all elements of the type in the
5945  * attribute-list declaration.
5946  *
5947  * [ VC: Attribute Default Legal ]
5948  * The declared default value must meet the lexical constraints of
5949  * the declared attribute type c.f. xmlValidateAttributeDecl()
5950  *
5951  * [ VC: Fixed Attribute Default ]
5952  * if an attribute has a default value declared with the #FIXED
5953  * keyword, instances of that attribute must match the default value.
5954  *
5955  * [ WFC: No < in Attribute Values ]
5956  * handled in xmlParseAttValue()
5957  *
5958  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5959  *          or XML_ATTRIBUTE_FIXED.
5960  */
5961 
5962 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5963 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5964     int val;
5965     xmlChar *ret;
5966 
5967     *value = NULL;
5968     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5969 	SKIP(9);
5970 	return(XML_ATTRIBUTE_REQUIRED);
5971     }
5972     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5973 	SKIP(8);
5974 	return(XML_ATTRIBUTE_IMPLIED);
5975     }
5976     val = XML_ATTRIBUTE_NONE;
5977     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5978 	SKIP(6);
5979 	val = XML_ATTRIBUTE_FIXED;
5980 	if (SKIP_BLANKS_PE == 0) {
5981 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5982 			   "Space required after '#FIXED'\n");
5983 	}
5984     }
5985     ret = xmlParseAttValue(ctxt);
5986     if (ret == NULL) {
5987 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5988 		       "Attribute default value declaration error\n");
5989     } else
5990         *value = ret;
5991     return(val);
5992 }
5993 
5994 /**
5995  * xmlParseNotationType:
5996  * @ctxt:  an XML parser context
5997  *
5998  * DEPRECATED: Internal function, don't use.
5999  *
6000  * parse an Notation attribute type.
6001  *
6002  * Note: the leading 'NOTATION' S part has already being parsed...
6003  *
6004  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6005  *
6006  * [ VC: Notation Attributes ]
6007  * Values of this type must match one of the notation names included
6008  * in the declaration; all notation names in the declaration must be declared.
6009  *
6010  * Returns: the notation attribute tree built while parsing
6011  */
6012 
6013 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)6014 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
6015     const xmlChar *name;
6016     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6017 
6018     if (RAW != '(') {
6019 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
6020 	return(NULL);
6021     }
6022     do {
6023         NEXT;
6024 	SKIP_BLANKS_PE;
6025         name = xmlParseName(ctxt);
6026 	if (name == NULL) {
6027 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6028 			   "Name expected in NOTATION declaration\n");
6029             xmlFreeEnumeration(ret);
6030 	    return(NULL);
6031 	}
6032 	tmp = ret;
6033 	while (tmp != NULL) {
6034 	    if (xmlStrEqual(name, tmp->name)) {
6035 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6036 	  "standalone: attribute notation value token %s duplicated\n",
6037 				 name, NULL);
6038 		if (!xmlDictOwns(ctxt->dict, name))
6039 		    xmlFree((xmlChar *) name);
6040 		break;
6041 	    }
6042 	    tmp = tmp->next;
6043 	}
6044 	if (tmp == NULL) {
6045 	    cur = xmlCreateEnumeration(name);
6046 	    if (cur == NULL) {
6047                 xmlErrMemory(ctxt);
6048                 xmlFreeEnumeration(ret);
6049                 return(NULL);
6050             }
6051 	    if (last == NULL) ret = last = cur;
6052 	    else {
6053 		last->next = cur;
6054 		last = cur;
6055 	    }
6056 	}
6057 	SKIP_BLANKS_PE;
6058     } while (RAW == '|');
6059     if (RAW != ')') {
6060 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6061         xmlFreeEnumeration(ret);
6062 	return(NULL);
6063     }
6064     NEXT;
6065     return(ret);
6066 }
6067 
6068 /**
6069  * xmlParseEnumerationType:
6070  * @ctxt:  an XML parser context
6071  *
6072  * DEPRECATED: Internal function, don't use.
6073  *
6074  * parse an Enumeration attribute type.
6075  *
6076  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6077  *
6078  * [ VC: Enumeration ]
6079  * Values of this type must match one of the Nmtoken tokens in
6080  * the declaration
6081  *
6082  * Returns: the enumeration attribute tree built while parsing
6083  */
6084 
6085 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)6086 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6087     xmlChar *name;
6088     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6089 
6090     if (RAW != '(') {
6091 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6092 	return(NULL);
6093     }
6094     do {
6095         NEXT;
6096 	SKIP_BLANKS_PE;
6097         name = xmlParseNmtoken(ctxt);
6098 	if (name == NULL) {
6099 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6100 	    return(ret);
6101 	}
6102 	tmp = ret;
6103 	while (tmp != NULL) {
6104 	    if (xmlStrEqual(name, tmp->name)) {
6105 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6106 	  "standalone: attribute enumeration value token %s duplicated\n",
6107 				 name, NULL);
6108 		if (!xmlDictOwns(ctxt->dict, name))
6109 		    xmlFree(name);
6110 		break;
6111 	    }
6112 	    tmp = tmp->next;
6113 	}
6114 	if (tmp == NULL) {
6115 	    cur = xmlCreateEnumeration(name);
6116 	    if (!xmlDictOwns(ctxt->dict, name))
6117 		xmlFree(name);
6118 	    if (cur == NULL) {
6119                 xmlErrMemory(ctxt);
6120                 xmlFreeEnumeration(ret);
6121                 return(NULL);
6122             }
6123 	    if (last == NULL) ret = last = cur;
6124 	    else {
6125 		last->next = cur;
6126 		last = cur;
6127 	    }
6128 	}
6129 	SKIP_BLANKS_PE;
6130     } while (RAW == '|');
6131     if (RAW != ')') {
6132 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6133 	return(ret);
6134     }
6135     NEXT;
6136     return(ret);
6137 }
6138 
6139 /**
6140  * xmlParseEnumeratedType:
6141  * @ctxt:  an XML parser context
6142  * @tree:  the enumeration tree built while parsing
6143  *
6144  * DEPRECATED: Internal function, don't use.
6145  *
6146  * parse an Enumerated attribute type.
6147  *
6148  * [57] EnumeratedType ::= NotationType | Enumeration
6149  *
6150  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6151  *
6152  *
6153  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6154  */
6155 
6156 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6157 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6158     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6159 	SKIP(8);
6160 	if (SKIP_BLANKS_PE == 0) {
6161 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6162 			   "Space required after 'NOTATION'\n");
6163 	    return(0);
6164 	}
6165 	*tree = xmlParseNotationType(ctxt);
6166 	if (*tree == NULL) return(0);
6167 	return(XML_ATTRIBUTE_NOTATION);
6168     }
6169     *tree = xmlParseEnumerationType(ctxt);
6170     if (*tree == NULL) return(0);
6171     return(XML_ATTRIBUTE_ENUMERATION);
6172 }
6173 
6174 /**
6175  * xmlParseAttributeType:
6176  * @ctxt:  an XML parser context
6177  * @tree:  the enumeration tree built while parsing
6178  *
6179  * DEPRECATED: Internal function, don't use.
6180  *
6181  * parse the Attribute list def for an element
6182  *
6183  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6184  *
6185  * [55] StringType ::= 'CDATA'
6186  *
6187  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6188  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6189  *
6190  * Validity constraints for attribute values syntax are checked in
6191  * xmlValidateAttributeValue()
6192  *
6193  * [ VC: ID ]
6194  * Values of type ID must match the Name production. A name must not
6195  * appear more than once in an XML document as a value of this type;
6196  * i.e., ID values must uniquely identify the elements which bear them.
6197  *
6198  * [ VC: One ID per Element Type ]
6199  * No element type may have more than one ID attribute specified.
6200  *
6201  * [ VC: ID Attribute Default ]
6202  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6203  *
6204  * [ VC: IDREF ]
6205  * Values of type IDREF must match the Name production, and values
6206  * of type IDREFS must match Names; each IDREF Name must match the value
6207  * of an ID attribute on some element in the XML document; i.e. IDREF
6208  * values must match the value of some ID attribute.
6209  *
6210  * [ VC: Entity Name ]
6211  * Values of type ENTITY must match the Name production, values
6212  * of type ENTITIES must match Names; each Entity Name must match the
6213  * name of an unparsed entity declared in the DTD.
6214  *
6215  * [ VC: Name Token ]
6216  * Values of type NMTOKEN must match the Nmtoken production; values
6217  * of type NMTOKENS must match Nmtokens.
6218  *
6219  * Returns the attribute type
6220  */
6221 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6222 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6223     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6224 	SKIP(5);
6225 	return(XML_ATTRIBUTE_CDATA);
6226      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6227 	SKIP(6);
6228 	return(XML_ATTRIBUTE_IDREFS);
6229      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6230 	SKIP(5);
6231 	return(XML_ATTRIBUTE_IDREF);
6232      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6233         SKIP(2);
6234 	return(XML_ATTRIBUTE_ID);
6235      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6236 	SKIP(6);
6237 	return(XML_ATTRIBUTE_ENTITY);
6238      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6239 	SKIP(8);
6240 	return(XML_ATTRIBUTE_ENTITIES);
6241      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6242 	SKIP(8);
6243 	return(XML_ATTRIBUTE_NMTOKENS);
6244      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6245 	SKIP(7);
6246 	return(XML_ATTRIBUTE_NMTOKEN);
6247      }
6248      return(xmlParseEnumeratedType(ctxt, tree));
6249 }
6250 
6251 /**
6252  * xmlParseAttributeListDecl:
6253  * @ctxt:  an XML parser context
6254  *
6255  * DEPRECATED: Internal function, don't use.
6256  *
6257  * Parse an attribute list declaration for an element. Always consumes '<!'.
6258  *
6259  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6260  *
6261  * [53] AttDef ::= S Name S AttType S DefaultDecl
6262  *
6263  */
6264 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)6265 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6266     const xmlChar *elemName;
6267     const xmlChar *attrName;
6268     xmlEnumerationPtr tree;
6269 
6270     if ((CUR != '<') || (NXT(1) != '!'))
6271         return;
6272     SKIP(2);
6273 
6274     if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6275 	int inputid = ctxt->input->id;
6276 
6277 	SKIP(7);
6278 	if (SKIP_BLANKS_PE == 0) {
6279 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6280 		                 "Space required after '<!ATTLIST'\n");
6281 	}
6282         elemName = xmlParseName(ctxt);
6283 	if (elemName == NULL) {
6284 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6285 			   "ATTLIST: no name for Element\n");
6286 	    return;
6287 	}
6288 	SKIP_BLANKS_PE;
6289 	GROW;
6290 	while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6291 	    int type;
6292 	    int def;
6293 	    xmlChar *defaultValue = NULL;
6294 
6295 	    GROW;
6296             tree = NULL;
6297 	    attrName = xmlParseName(ctxt);
6298 	    if (attrName == NULL) {
6299 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6300 			       "ATTLIST: no name for Attribute\n");
6301 		break;
6302 	    }
6303 	    GROW;
6304 	    if (SKIP_BLANKS_PE == 0) {
6305 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6306 		        "Space required after the attribute name\n");
6307 		break;
6308 	    }
6309 
6310 	    type = xmlParseAttributeType(ctxt, &tree);
6311 	    if (type <= 0) {
6312 	        break;
6313 	    }
6314 
6315 	    GROW;
6316 	    if (SKIP_BLANKS_PE == 0) {
6317 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6318 			       "Space required after the attribute type\n");
6319 	        if (tree != NULL)
6320 		    xmlFreeEnumeration(tree);
6321 		break;
6322 	    }
6323 
6324 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
6325 	    if (def <= 0) {
6326                 if (defaultValue != NULL)
6327 		    xmlFree(defaultValue);
6328 	        if (tree != NULL)
6329 		    xmlFreeEnumeration(tree);
6330 	        break;
6331 	    }
6332 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6333 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
6334 
6335 	    GROW;
6336             if (RAW != '>') {
6337 		if (SKIP_BLANKS_PE == 0) {
6338 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6339 			"Space required after the attribute default value\n");
6340 		    if (defaultValue != NULL)
6341 			xmlFree(defaultValue);
6342 		    if (tree != NULL)
6343 			xmlFreeEnumeration(tree);
6344 		    break;
6345 		}
6346 	    }
6347 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6348 		(ctxt->sax->attributeDecl != NULL))
6349 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6350 	                        type, def, defaultValue, tree);
6351 	    else if (tree != NULL)
6352 		xmlFreeEnumeration(tree);
6353 
6354 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
6355 	        (def != XML_ATTRIBUTE_IMPLIED) &&
6356 		(def != XML_ATTRIBUTE_REQUIRED)) {
6357 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6358 	    }
6359 	    if (ctxt->sax2) {
6360 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6361 	    }
6362 	    if (defaultValue != NULL)
6363 	        xmlFree(defaultValue);
6364 	    GROW;
6365 	}
6366 	if (RAW == '>') {
6367 	    if (inputid != ctxt->input->id) {
6368 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6369                                "Attribute list declaration doesn't start and"
6370                                " stop in the same entity\n");
6371 	    }
6372 	    NEXT;
6373 	}
6374     }
6375 }
6376 
6377 /**
6378  * xmlParseElementMixedContentDecl:
6379  * @ctxt:  an XML parser context
6380  * @inputchk:  the input used for the current entity, needed for boundary checks
6381  *
6382  * DEPRECATED: Internal function, don't use.
6383  *
6384  * parse the declaration for a Mixed Element content
6385  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6386  *
6387  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6388  *                '(' S? '#PCDATA' S? ')'
6389  *
6390  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6391  *
6392  * [ VC: No Duplicate Types ]
6393  * The same name must not appear more than once in a single
6394  * mixed-content declaration.
6395  *
6396  * returns: the list of the xmlElementContentPtr describing the element choices
6397  */
6398 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6399 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6400     xmlElementContentPtr ret = NULL, cur = NULL, n;
6401     const xmlChar *elem = NULL;
6402 
6403     GROW;
6404     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6405 	SKIP(7);
6406 	SKIP_BLANKS_PE;
6407 	if (RAW == ')') {
6408 	    if (ctxt->input->id != inputchk) {
6409 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6410                                "Element content declaration doesn't start and"
6411                                " stop in the same entity\n");
6412 	    }
6413 	    NEXT;
6414 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6415 	    if (ret == NULL)
6416                 goto mem_error;
6417 	    if (RAW == '*') {
6418 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6419 		NEXT;
6420 	    }
6421 	    return(ret);
6422 	}
6423 	if ((RAW == '(') || (RAW == '|')) {
6424 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6425 	    if (ret == NULL)
6426                 goto mem_error;
6427 	}
6428 	while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6429 	    NEXT;
6430             n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6431             if (n == NULL)
6432                 goto mem_error;
6433 	    if (elem == NULL) {
6434 		n->c1 = cur;
6435 		if (cur != NULL)
6436 		    cur->parent = n;
6437 		ret = cur = n;
6438 	    } else {
6439 	        cur->c2 = n;
6440 		n->parent = cur;
6441 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6442                 if (n->c1 == NULL)
6443                     goto mem_error;
6444 		n->c1->parent = n;
6445 		cur = n;
6446 	    }
6447 	    SKIP_BLANKS_PE;
6448 	    elem = xmlParseName(ctxt);
6449 	    if (elem == NULL) {
6450 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6451 			"xmlParseElementMixedContentDecl : Name expected\n");
6452 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6453 		return(NULL);
6454 	    }
6455 	    SKIP_BLANKS_PE;
6456 	    GROW;
6457 	}
6458 	if ((RAW == ')') && (NXT(1) == '*')) {
6459 	    if (elem != NULL) {
6460 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6461 		                               XML_ELEMENT_CONTENT_ELEMENT);
6462 		if (cur->c2 == NULL)
6463                     goto mem_error;
6464 		cur->c2->parent = cur;
6465             }
6466             if (ret != NULL)
6467                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6468 	    if (ctxt->input->id != inputchk) {
6469 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6470                                "Element content declaration doesn't start and"
6471                                " stop in the same entity\n");
6472 	    }
6473 	    SKIP(2);
6474 	} else {
6475 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6476 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6477 	    return(NULL);
6478 	}
6479 
6480     } else {
6481 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6482     }
6483     return(ret);
6484 
6485 mem_error:
6486     xmlErrMemory(ctxt);
6487     xmlFreeDocElementContent(ctxt->myDoc, ret);
6488     return(NULL);
6489 }
6490 
6491 /**
6492  * xmlParseElementChildrenContentDeclPriv:
6493  * @ctxt:  an XML parser context
6494  * @inputchk:  the input used for the current entity, needed for boundary checks
6495  * @depth: the level of recursion
6496  *
6497  * parse the declaration for a Mixed Element content
6498  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6499  *
6500  *
6501  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6502  *
6503  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6504  *
6505  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6506  *
6507  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6508  *
6509  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6510  * TODO Parameter-entity replacement text must be properly nested
6511  *	with parenthesized groups. That is to say, if either of the
6512  *	opening or closing parentheses in a choice, seq, or Mixed
6513  *	construct is contained in the replacement text for a parameter
6514  *	entity, both must be contained in the same replacement text. For
6515  *	interoperability, if a parameter-entity reference appears in a
6516  *	choice, seq, or Mixed construct, its replacement text should not
6517  *	be empty, and neither the first nor last non-blank character of
6518  *	the replacement text should be a connector (| or ,).
6519  *
6520  * Returns the tree of xmlElementContentPtr describing the element
6521  *          hierarchy.
6522  */
6523 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6524 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6525                                        int depth) {
6526     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6527     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6528     const xmlChar *elem;
6529     xmlChar type = 0;
6530 
6531     if (depth > maxDepth) {
6532         xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6533                 "xmlParseElementChildrenContentDecl : depth %d too deep, "
6534                 "use XML_PARSE_HUGE\n", depth);
6535 	return(NULL);
6536     }
6537     SKIP_BLANKS_PE;
6538     GROW;
6539     if (RAW == '(') {
6540 	int inputid = ctxt->input->id;
6541 
6542         /* Recurse on first child */
6543 	NEXT;
6544 	SKIP_BLANKS_PE;
6545         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6546                                                            depth + 1);
6547         if (cur == NULL)
6548             return(NULL);
6549 	SKIP_BLANKS_PE;
6550 	GROW;
6551     } else {
6552 	elem = xmlParseName(ctxt);
6553 	if (elem == NULL) {
6554 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6555 	    return(NULL);
6556 	}
6557         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6558 	if (cur == NULL) {
6559 	    xmlErrMemory(ctxt);
6560 	    return(NULL);
6561 	}
6562 	GROW;
6563 	if (RAW == '?') {
6564 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6565 	    NEXT;
6566 	} else if (RAW == '*') {
6567 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6568 	    NEXT;
6569 	} else if (RAW == '+') {
6570 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6571 	    NEXT;
6572 	} else {
6573 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6574 	}
6575 	GROW;
6576     }
6577     SKIP_BLANKS_PE;
6578     while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6579         /*
6580 	 * Each loop we parse one separator and one element.
6581 	 */
6582         if (RAW == ',') {
6583 	    if (type == 0) type = CUR;
6584 
6585 	    /*
6586 	     * Detect "Name | Name , Name" error
6587 	     */
6588 	    else if (type != CUR) {
6589 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6590 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6591 		                  type);
6592 		if ((last != NULL) && (last != ret))
6593 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6594 		if (ret != NULL)
6595 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6596 		return(NULL);
6597 	    }
6598 	    NEXT;
6599 
6600 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6601 	    if (op == NULL) {
6602                 xmlErrMemory(ctxt);
6603 		if ((last != NULL) && (last != ret))
6604 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6605 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6606 		return(NULL);
6607 	    }
6608 	    if (last == NULL) {
6609 		op->c1 = ret;
6610 		if (ret != NULL)
6611 		    ret->parent = op;
6612 		ret = cur = op;
6613 	    } else {
6614 	        cur->c2 = op;
6615 		if (op != NULL)
6616 		    op->parent = cur;
6617 		op->c1 = last;
6618 		if (last != NULL)
6619 		    last->parent = op;
6620 		cur =op;
6621 		last = NULL;
6622 	    }
6623 	} else if (RAW == '|') {
6624 	    if (type == 0) type = CUR;
6625 
6626 	    /*
6627 	     * Detect "Name , Name | Name" error
6628 	     */
6629 	    else if (type != CUR) {
6630 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6631 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6632 				  type);
6633 		if ((last != NULL) && (last != ret))
6634 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6635 		if (ret != NULL)
6636 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6637 		return(NULL);
6638 	    }
6639 	    NEXT;
6640 
6641 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6642 	    if (op == NULL) {
6643                 xmlErrMemory(ctxt);
6644 		if ((last != NULL) && (last != ret))
6645 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6646 		if (ret != NULL)
6647 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6648 		return(NULL);
6649 	    }
6650 	    if (last == NULL) {
6651 		op->c1 = ret;
6652 		if (ret != NULL)
6653 		    ret->parent = op;
6654 		ret = cur = op;
6655 	    } else {
6656 	        cur->c2 = op;
6657 		if (op != NULL)
6658 		    op->parent = cur;
6659 		op->c1 = last;
6660 		if (last != NULL)
6661 		    last->parent = op;
6662 		cur =op;
6663 		last = NULL;
6664 	    }
6665 	} else {
6666 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6667 	    if ((last != NULL) && (last != ret))
6668 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6669 	    if (ret != NULL)
6670 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6671 	    return(NULL);
6672 	}
6673 	GROW;
6674 	SKIP_BLANKS_PE;
6675 	GROW;
6676 	if (RAW == '(') {
6677 	    int inputid = ctxt->input->id;
6678 	    /* Recurse on second child */
6679 	    NEXT;
6680 	    SKIP_BLANKS_PE;
6681 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6682                                                           depth + 1);
6683             if (last == NULL) {
6684 		if (ret != NULL)
6685 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6686 		return(NULL);
6687             }
6688 	    SKIP_BLANKS_PE;
6689 	} else {
6690 	    elem = xmlParseName(ctxt);
6691 	    if (elem == NULL) {
6692 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6693 		if (ret != NULL)
6694 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6695 		return(NULL);
6696 	    }
6697 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6698 	    if (last == NULL) {
6699                 xmlErrMemory(ctxt);
6700 		if (ret != NULL)
6701 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6702 		return(NULL);
6703 	    }
6704 	    if (RAW == '?') {
6705 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6706 		NEXT;
6707 	    } else if (RAW == '*') {
6708 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6709 		NEXT;
6710 	    } else if (RAW == '+') {
6711 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6712 		NEXT;
6713 	    } else {
6714 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6715 	    }
6716 	}
6717 	SKIP_BLANKS_PE;
6718 	GROW;
6719     }
6720     if ((cur != NULL) && (last != NULL)) {
6721         cur->c2 = last;
6722 	if (last != NULL)
6723 	    last->parent = cur;
6724     }
6725     if (ctxt->input->id != inputchk) {
6726 	xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6727                        "Element content declaration doesn't start and stop in"
6728                        " the same entity\n");
6729     }
6730     NEXT;
6731     if (RAW == '?') {
6732 	if (ret != NULL) {
6733 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6734 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6735 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6736 	    else
6737 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6738 	}
6739 	NEXT;
6740     } else if (RAW == '*') {
6741 	if (ret != NULL) {
6742 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6743 	    cur = ret;
6744 	    /*
6745 	     * Some normalization:
6746 	     * (a | b* | c?)* == (a | b | c)*
6747 	     */
6748 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6749 		if ((cur->c1 != NULL) &&
6750 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6751 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6752 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6753 		if ((cur->c2 != NULL) &&
6754 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6755 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6756 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6757 		cur = cur->c2;
6758 	    }
6759 	}
6760 	NEXT;
6761     } else if (RAW == '+') {
6762 	if (ret != NULL) {
6763 	    int found = 0;
6764 
6765 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6766 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6767 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6768 	    else
6769 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6770 	    /*
6771 	     * Some normalization:
6772 	     * (a | b*)+ == (a | b)*
6773 	     * (a | b?)+ == (a | b)*
6774 	     */
6775 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6776 		if ((cur->c1 != NULL) &&
6777 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6778 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6779 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6780 		    found = 1;
6781 		}
6782 		if ((cur->c2 != NULL) &&
6783 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6784 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6785 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6786 		    found = 1;
6787 		}
6788 		cur = cur->c2;
6789 	    }
6790 	    if (found)
6791 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6792 	}
6793 	NEXT;
6794     }
6795     return(ret);
6796 }
6797 
6798 /**
6799  * xmlParseElementChildrenContentDecl:
6800  * @ctxt:  an XML parser context
6801  * @inputchk:  the input used for the current entity, needed for boundary checks
6802  *
6803  * DEPRECATED: Internal function, don't use.
6804  *
6805  * parse the declaration for a Mixed Element content
6806  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6807  *
6808  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6809  *
6810  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6811  *
6812  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6813  *
6814  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6815  *
6816  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6817  * TODO Parameter-entity replacement text must be properly nested
6818  *	with parenthesized groups. That is to say, if either of the
6819  *	opening or closing parentheses in a choice, seq, or Mixed
6820  *	construct is contained in the replacement text for a parameter
6821  *	entity, both must be contained in the same replacement text. For
6822  *	interoperability, if a parameter-entity reference appears in a
6823  *	choice, seq, or Mixed construct, its replacement text should not
6824  *	be empty, and neither the first nor last non-blank character of
6825  *	the replacement text should be a connector (| or ,).
6826  *
6827  * Returns the tree of xmlElementContentPtr describing the element
6828  *          hierarchy.
6829  */
6830 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6831 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6832     /* stub left for API/ABI compat */
6833     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6834 }
6835 
6836 /**
6837  * xmlParseElementContentDecl:
6838  * @ctxt:  an XML parser context
6839  * @name:  the name of the element being defined.
6840  * @result:  the Element Content pointer will be stored here if any
6841  *
6842  * DEPRECATED: Internal function, don't use.
6843  *
6844  * parse the declaration for an Element content either Mixed or Children,
6845  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6846  *
6847  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6848  *
6849  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6850  */
6851 
6852 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6853 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6854                            xmlElementContentPtr *result) {
6855 
6856     xmlElementContentPtr tree = NULL;
6857     int inputid = ctxt->input->id;
6858     int res;
6859 
6860     *result = NULL;
6861 
6862     if (RAW != '(') {
6863 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6864 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6865 	return(-1);
6866     }
6867     NEXT;
6868     GROW;
6869     SKIP_BLANKS_PE;
6870     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6871         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6872 	res = XML_ELEMENT_TYPE_MIXED;
6873     } else {
6874         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6875 	res = XML_ELEMENT_TYPE_ELEMENT;
6876     }
6877     SKIP_BLANKS_PE;
6878     *result = tree;
6879     return(res);
6880 }
6881 
6882 /**
6883  * xmlParseElementDecl:
6884  * @ctxt:  an XML parser context
6885  *
6886  * DEPRECATED: Internal function, don't use.
6887  *
6888  * Parse an element declaration. Always consumes '<!'.
6889  *
6890  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6891  *
6892  * [ VC: Unique Element Type Declaration ]
6893  * No element type may be declared more than once
6894  *
6895  * Returns the type of the element, or -1 in case of error
6896  */
6897 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6898 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6899     const xmlChar *name;
6900     int ret = -1;
6901     xmlElementContentPtr content  = NULL;
6902 
6903     if ((CUR != '<') || (NXT(1) != '!'))
6904         return(ret);
6905     SKIP(2);
6906 
6907     /* GROW; done in the caller */
6908     if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6909 	int inputid = ctxt->input->id;
6910 
6911 	SKIP(7);
6912 	if (SKIP_BLANKS_PE == 0) {
6913 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6914 		           "Space required after 'ELEMENT'\n");
6915 	    return(-1);
6916 	}
6917         name = xmlParseName(ctxt);
6918 	if (name == NULL) {
6919 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6920 			   "xmlParseElementDecl: no name for Element\n");
6921 	    return(-1);
6922 	}
6923 	if (SKIP_BLANKS_PE == 0) {
6924 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6925 			   "Space required after the element name\n");
6926 	}
6927 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6928 	    SKIP(5);
6929 	    /*
6930 	     * Element must always be empty.
6931 	     */
6932 	    ret = XML_ELEMENT_TYPE_EMPTY;
6933 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6934 	           (NXT(2) == 'Y')) {
6935 	    SKIP(3);
6936 	    /*
6937 	     * Element is a generic container.
6938 	     */
6939 	    ret = XML_ELEMENT_TYPE_ANY;
6940 	} else if (RAW == '(') {
6941 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6942 	} else {
6943 	    /*
6944 	     * [ WFC: PEs in Internal Subset ] error handling.
6945 	     */
6946             xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6947                   "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6948 	    return(-1);
6949 	}
6950 
6951 	SKIP_BLANKS_PE;
6952 
6953 	if (RAW != '>') {
6954 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6955 	    if (content != NULL) {
6956 		xmlFreeDocElementContent(ctxt->myDoc, content);
6957 	    }
6958 	} else {
6959 	    if (inputid != ctxt->input->id) {
6960 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6961                                "Element declaration doesn't start and stop in"
6962                                " the same entity\n");
6963 	    }
6964 
6965 	    NEXT;
6966 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6967 		(ctxt->sax->elementDecl != NULL)) {
6968 		if (content != NULL)
6969 		    content->parent = NULL;
6970 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6971 		                       content);
6972 		if ((content != NULL) && (content->parent == NULL)) {
6973 		    /*
6974 		     * this is a trick: if xmlAddElementDecl is called,
6975 		     * instead of copying the full tree it is plugged directly
6976 		     * if called from the parser. Avoid duplicating the
6977 		     * interfaces or change the API/ABI
6978 		     */
6979 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6980 		}
6981 	    } else if (content != NULL) {
6982 		xmlFreeDocElementContent(ctxt->myDoc, content);
6983 	    }
6984 	}
6985     }
6986     return(ret);
6987 }
6988 
6989 /**
6990  * xmlParseConditionalSections
6991  * @ctxt:  an XML parser context
6992  *
6993  * Parse a conditional section. Always consumes '<!['.
6994  *
6995  * [61] conditionalSect ::= includeSect | ignoreSect
6996  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6997  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6998  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6999  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
7000  */
7001 
7002 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)7003 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
7004     int *inputIds = NULL;
7005     size_t inputIdsSize = 0;
7006     size_t depth = 0;
7007 
7008     while (PARSER_STOPPED(ctxt) == 0) {
7009         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7010             int id = ctxt->input->id;
7011 
7012             SKIP(3);
7013             SKIP_BLANKS_PE;
7014 
7015             if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
7016                 SKIP(7);
7017                 SKIP_BLANKS_PE;
7018                 if (RAW != '[') {
7019                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7020                     xmlHaltParser(ctxt);
7021                     goto error;
7022                 }
7023                 if (ctxt->input->id != id) {
7024                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7025                                    "All markup of the conditional section is"
7026                                    " not in the same entity\n");
7027                 }
7028                 NEXT;
7029 
7030                 if (inputIdsSize <= depth) {
7031                     int *tmp;
7032 
7033                     inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
7034                     tmp = (int *) xmlRealloc(inputIds,
7035                             inputIdsSize * sizeof(int));
7036                     if (tmp == NULL) {
7037                         xmlErrMemory(ctxt);
7038                         goto error;
7039                     }
7040                     inputIds = tmp;
7041                 }
7042                 inputIds[depth] = id;
7043                 depth++;
7044             } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7045                 size_t ignoreDepth = 0;
7046 
7047                 SKIP(6);
7048                 SKIP_BLANKS_PE;
7049                 if (RAW != '[') {
7050                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7051                     xmlHaltParser(ctxt);
7052                     goto error;
7053                 }
7054                 if (ctxt->input->id != id) {
7055                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7056                                    "All markup of the conditional section is"
7057                                    " not in the same entity\n");
7058                 }
7059                 NEXT;
7060 
7061                 while (PARSER_STOPPED(ctxt) == 0) {
7062                     if (RAW == 0) {
7063                         xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7064                         goto error;
7065                     }
7066                     if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7067                         SKIP(3);
7068                         ignoreDepth++;
7069                         /* Check for integer overflow */
7070                         if (ignoreDepth == 0) {
7071                             xmlErrMemory(ctxt);
7072                             goto error;
7073                         }
7074                     } else if ((RAW == ']') && (NXT(1) == ']') &&
7075                                (NXT(2) == '>')) {
7076                         SKIP(3);
7077                         if (ignoreDepth == 0)
7078                             break;
7079                         ignoreDepth--;
7080                     } else {
7081                         NEXT;
7082                     }
7083                 }
7084 
7085                 if (ctxt->input->id != id) {
7086                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7087                                    "All markup of the conditional section is"
7088                                    " not in the same entity\n");
7089                 }
7090             } else {
7091                 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7092                 xmlHaltParser(ctxt);
7093                 goto error;
7094             }
7095         } else if ((depth > 0) &&
7096                    (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7097             depth--;
7098             if (ctxt->input->id != inputIds[depth]) {
7099                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7100                                "All markup of the conditional section is not"
7101                                " in the same entity\n");
7102             }
7103             SKIP(3);
7104         } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7105             xmlParseMarkupDecl(ctxt);
7106         } else {
7107             xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7108             xmlHaltParser(ctxt);
7109             goto error;
7110         }
7111 
7112         if (depth == 0)
7113             break;
7114 
7115         SKIP_BLANKS_PE;
7116         SHRINK;
7117         GROW;
7118     }
7119 
7120 error:
7121     xmlFree(inputIds);
7122 }
7123 
7124 /**
7125  * xmlParseMarkupDecl:
7126  * @ctxt:  an XML parser context
7127  *
7128  * DEPRECATED: Internal function, don't use.
7129  *
7130  * Parse markup declarations. Always consumes '<!' or '<?'.
7131  *
7132  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7133  *                     NotationDecl | PI | Comment
7134  *
7135  * [ VC: Proper Declaration/PE Nesting ]
7136  * Parameter-entity replacement text must be properly nested with
7137  * markup declarations. That is to say, if either the first character
7138  * or the last character of a markup declaration (markupdecl above) is
7139  * contained in the replacement text for a parameter-entity reference,
7140  * both must be contained in the same replacement text.
7141  *
7142  * [ WFC: PEs in Internal Subset ]
7143  * In the internal DTD subset, parameter-entity references can occur
7144  * only where markup declarations can occur, not within markup declarations.
7145  * (This does not apply to references that occur in external parameter
7146  * entities or to the external subset.)
7147  */
7148 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)7149 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7150     GROW;
7151     if (CUR == '<') {
7152         if (NXT(1) == '!') {
7153 	    switch (NXT(2)) {
7154 	        case 'E':
7155 		    if (NXT(3) == 'L')
7156 			xmlParseElementDecl(ctxt);
7157 		    else if (NXT(3) == 'N')
7158 			xmlParseEntityDecl(ctxt);
7159                     else
7160                         SKIP(2);
7161 		    break;
7162 	        case 'A':
7163 		    xmlParseAttributeListDecl(ctxt);
7164 		    break;
7165 	        case 'N':
7166 		    xmlParseNotationDecl(ctxt);
7167 		    break;
7168 	        case '-':
7169 		    xmlParseComment(ctxt);
7170 		    break;
7171 		default:
7172 		    /* there is an error but it will be detected later */
7173                     SKIP(2);
7174 		    break;
7175 	    }
7176 	} else if (NXT(1) == '?') {
7177 	    xmlParsePI(ctxt);
7178 	}
7179     }
7180 }
7181 
7182 /**
7183  * xmlParseTextDecl:
7184  * @ctxt:  an XML parser context
7185  *
7186  * DEPRECATED: Internal function, don't use.
7187  *
7188  * parse an XML declaration header for external entities
7189  *
7190  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7191  */
7192 
7193 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)7194 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7195     xmlChar *version;
7196 
7197     /*
7198      * We know that '<?xml' is here.
7199      */
7200     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7201 	SKIP(5);
7202     } else {
7203 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7204 	return;
7205     }
7206 
7207     if (SKIP_BLANKS == 0) {
7208 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7209 		       "Space needed after '<?xml'\n");
7210     }
7211 
7212     /*
7213      * We may have the VersionInfo here.
7214      */
7215     version = xmlParseVersionInfo(ctxt);
7216     if (version == NULL) {
7217 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
7218         if (version == NULL) {
7219             xmlErrMemory(ctxt);
7220             return;
7221         }
7222     } else {
7223 	if (SKIP_BLANKS == 0) {
7224 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7225 		           "Space needed here\n");
7226 	}
7227     }
7228     ctxt->input->version = version;
7229 
7230     /*
7231      * We must have the encoding declaration
7232      */
7233     xmlParseEncodingDecl(ctxt);
7234 
7235     SKIP_BLANKS;
7236     if ((RAW == '?') && (NXT(1) == '>')) {
7237         SKIP(2);
7238     } else if (RAW == '>') {
7239         /* Deprecated old WD ... */
7240 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7241 	NEXT;
7242     } else {
7243         int c;
7244 
7245 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7246         while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7247             NEXT;
7248             if (c == '>')
7249                 break;
7250         }
7251     }
7252 }
7253 
7254 /**
7255  * xmlParseExternalSubset:
7256  * @ctxt:  an XML parser context
7257  * @ExternalID: the external identifier
7258  * @SystemID: the system identifier (or URL)
7259  *
7260  * parse Markup declarations from an external subset
7261  *
7262  * [30] extSubset ::= textDecl? extSubsetDecl
7263  *
7264  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7265  */
7266 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7267 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7268                        const xmlChar *SystemID) {
7269     int oldInputNr;
7270 
7271     xmlCtxtInitializeLate(ctxt);
7272 
7273     xmlDetectEncoding(ctxt);
7274 
7275     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7276 	xmlParseTextDecl(ctxt);
7277     }
7278     if (ctxt->myDoc == NULL) {
7279         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7280 	if (ctxt->myDoc == NULL) {
7281 	    xmlErrMemory(ctxt);
7282 	    return;
7283 	}
7284 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
7285     }
7286     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7287         (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7288         xmlErrMemory(ctxt);
7289     }
7290 
7291     ctxt->inSubset = 2;
7292     oldInputNr = ctxt->inputNr;
7293 
7294     SKIP_BLANKS_PE;
7295     while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7296            (!PARSER_STOPPED(ctxt))) {
7297 	GROW;
7298         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7299             xmlParseConditionalSections(ctxt);
7300         } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7301             xmlParseMarkupDecl(ctxt);
7302         } else {
7303             xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7304             xmlHaltParser(ctxt);
7305             return;
7306         }
7307         SKIP_BLANKS_PE;
7308         SHRINK;
7309     }
7310 
7311     while (ctxt->inputNr > oldInputNr)
7312         xmlPopPE(ctxt);
7313 
7314     if (RAW != 0) {
7315 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7316     }
7317 }
7318 
7319 /**
7320  * xmlParseReference:
7321  * @ctxt:  an XML parser context
7322  *
7323  * DEPRECATED: Internal function, don't use.
7324  *
7325  * parse and handle entity references in content, depending on the SAX
7326  * interface, this may end-up in a call to character() if this is a
7327  * CharRef, a predefined entity, if there is no reference() callback.
7328  * or if the parser was asked to switch to that mode.
7329  *
7330  * Always consumes '&'.
7331  *
7332  * [67] Reference ::= EntityRef | CharRef
7333  */
7334 void
xmlParseReference(xmlParserCtxtPtr ctxt)7335 xmlParseReference(xmlParserCtxtPtr ctxt) {
7336     xmlEntityPtr ent = NULL;
7337     const xmlChar *name;
7338     xmlChar *val;
7339 
7340     if (RAW != '&')
7341         return;
7342 
7343     /*
7344      * Simple case of a CharRef
7345      */
7346     if (NXT(1) == '#') {
7347 	int i = 0;
7348 	xmlChar out[16];
7349 	int value = xmlParseCharRef(ctxt);
7350 
7351 	if (value == 0)
7352 	    return;
7353 
7354         /*
7355          * Just encode the value in UTF-8
7356          */
7357         COPY_BUF(out, i, value);
7358         out[i] = 0;
7359         if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7360             (!ctxt->disableSAX))
7361             ctxt->sax->characters(ctxt->userData, out, i);
7362 	return;
7363     }
7364 
7365     /*
7366      * We are seeing an entity reference
7367      */
7368     name = xmlParseEntityRefInternal(ctxt);
7369     if (name != NULL)
7370         ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7371     if (ent == NULL) return;
7372     if (!ctxt->wellFormed)
7373 	return;
7374 
7375     /* special case of predefined entities */
7376     if ((ent->name == NULL) ||
7377         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7378 	val = ent->content;
7379 	if (val == NULL) return;
7380 	/*
7381 	 * inline the entity.
7382 	 */
7383 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7384 	    (!ctxt->disableSAX))
7385 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7386 	return;
7387     }
7388 
7389     /*
7390      * The first reference to the entity trigger a parsing phase
7391      * where the ent->children is filled with the result from
7392      * the parsing.
7393      * Note: external parsed entities will not be loaded, it is not
7394      * required for a non-validating parser, unless the parsing option
7395      * of validating, or substituting entities were given. Doing so is
7396      * far more secure as the parser will only process data coming from
7397      * the document entity by default.
7398      *
7399      * FIXME: This doesn't work correctly since entities can be
7400      * expanded with different namespace declarations in scope.
7401      * For example:
7402      *
7403      * <!DOCTYPE doc [
7404      *   <!ENTITY ent "<ns:elem/>">
7405      * ]>
7406      * <doc>
7407      *   <decl1 xmlns:ns="urn:ns1">
7408      *     &ent;
7409      *   </decl1>
7410      *   <decl2 xmlns:ns="urn:ns2">
7411      *     &ent;
7412      *   </decl2>
7413      * </doc>
7414      *
7415      * Proposed fix:
7416      *
7417      * - Ignore current namespace declarations when parsing the
7418      *   entity. If a prefix can't be resolved, don't report an error
7419      *   but mark it as unresolved.
7420      * - Try to resolve these prefixes when expanding the entity.
7421      *   This will require a specialized version of xmlStaticCopyNode
7422      *   which can also make use of the namespace hash table to avoid
7423      *   quadratic behavior.
7424      *
7425      * Alternatively, we could simply reparse the entity on each
7426      * expansion like we already do with custom SAX callbacks.
7427      * External entity content should be cached in this case.
7428      */
7429     if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7430         (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7431          ((ctxt->replaceEntities) ||
7432           (ctxt->validate)))) {
7433         if ((ent->flags & XML_ENT_PARSED) == 0) {
7434             xmlCtxtParseEntity(ctxt, ent);
7435         } else if (ent->children == NULL) {
7436             /*
7437              * Probably running in SAX mode and the callbacks don't
7438              * build the entity content. Parse the entity again.
7439              *
7440              * This will also be triggered in normal tree builder mode
7441              * if an entity happens to be empty, causing unnecessary
7442              * reloads. It's hard to come up with a reliable check in
7443              * which mode we're running.
7444              */
7445             xmlCtxtParseEntity(ctxt, ent);
7446         }
7447     }
7448 
7449     /*
7450      * We also check for amplification if entities aren't substituted.
7451      * They might be expanded later.
7452      */
7453     if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7454         return;
7455 
7456     if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7457         return;
7458 
7459     if (ctxt->replaceEntities == 0) {
7460 	/*
7461 	 * Create a reference
7462 	 */
7463         if (ctxt->sax->reference != NULL)
7464 	    ctxt->sax->reference(ctxt->userData, ent->name);
7465     } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7466         xmlNodePtr copy, cur;
7467 
7468         /*
7469          * Seems we are generating the DOM content, copy the tree
7470 	 */
7471         cur = ent->children;
7472 
7473         /*
7474          * Handle first text node with SAX to coalesce text efficiently
7475          */
7476         if ((cur->type == XML_TEXT_NODE) ||
7477             (cur->type == XML_CDATA_SECTION_NODE)) {
7478             int len = xmlStrlen(cur->content);
7479 
7480             if ((cur->type == XML_TEXT_NODE) ||
7481                 (ctxt->sax->cdataBlock == NULL)) {
7482                 if (ctxt->sax->characters != NULL)
7483                     ctxt->sax->characters(ctxt, cur->content, len);
7484             } else {
7485                 if (ctxt->sax->cdataBlock != NULL)
7486                     ctxt->sax->cdataBlock(ctxt, cur->content, len);
7487             }
7488 
7489             cur = cur->next;
7490         }
7491 
7492         while (cur != NULL) {
7493             xmlNodePtr last;
7494 
7495             /*
7496              * Handle last text node with SAX to coalesce text efficiently
7497              */
7498             if ((cur->next == NULL) &&
7499                 ((cur->type == XML_TEXT_NODE) ||
7500                  (cur->type == XML_CDATA_SECTION_NODE))) {
7501                 int len = xmlStrlen(cur->content);
7502 
7503                 if ((cur->type == XML_TEXT_NODE) ||
7504                     (ctxt->sax->cdataBlock == NULL)) {
7505                     if (ctxt->sax->characters != NULL)
7506                         ctxt->sax->characters(ctxt, cur->content, len);
7507                 } else {
7508                     if (ctxt->sax->cdataBlock != NULL)
7509                         ctxt->sax->cdataBlock(ctxt, cur->content, len);
7510                 }
7511 
7512                 break;
7513             }
7514 
7515             /*
7516              * Reset coalesce buffer stats only for non-text nodes.
7517              */
7518             ctxt->nodemem = 0;
7519             ctxt->nodelen = 0;
7520 
7521             copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7522 
7523             if (copy == NULL) {
7524                 xmlErrMemory(ctxt);
7525                 break;
7526             }
7527 
7528             if (ctxt->parseMode == XML_PARSE_READER) {
7529                 /* Needed for reader */
7530                 copy->extra = cur->extra;
7531                 /* Maybe needed for reader */
7532                 copy->_private = cur->_private;
7533             }
7534 
7535             copy->parent = ctxt->node;
7536             last = ctxt->node->last;
7537             if (last == NULL) {
7538                 ctxt->node->children = copy;
7539             } else {
7540                 last->next = copy;
7541                 copy->prev = last;
7542             }
7543             ctxt->node->last = copy;
7544 
7545             cur = cur->next;
7546         }
7547     }
7548 }
7549 
7550 static xmlEntityPtr
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt,const xmlChar * name,int inAttr)7551 xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7552     xmlEntityPtr ent;
7553 
7554     /*
7555      * Predefined entities override any extra definition
7556      */
7557     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7558         ent = xmlGetPredefinedEntity(name);
7559         if (ent != NULL)
7560             return(ent);
7561     }
7562 
7563     /*
7564      * Ask first SAX for entity resolution, otherwise try the
7565      * entities which may have stored in the parser context.
7566      */
7567     if (ctxt->sax != NULL) {
7568 	if (ctxt->sax->getEntity != NULL)
7569 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7570 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7571 	    (ctxt->options & XML_PARSE_OLDSAX))
7572 	    ent = xmlGetPredefinedEntity(name);
7573 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7574 	    (ctxt->userData==ctxt)) {
7575 	    ent = xmlSAX2GetEntity(ctxt, name);
7576 	}
7577     }
7578     /*
7579      * [ WFC: Entity Declared ]
7580      * In a document without any DTD, a document with only an
7581      * internal DTD subset which contains no parameter entity
7582      * references, or a document with "standalone='yes'", the
7583      * Name given in the entity reference must match that in an
7584      * entity declaration, except that well-formed documents
7585      * need not declare any of the following entities: amp, lt,
7586      * gt, apos, quot.
7587      * The declaration of a parameter entity must precede any
7588      * reference to it.
7589      * Similarly, the declaration of a general entity must
7590      * precede any reference to it which appears in a default
7591      * value in an attribute-list declaration. Note that if
7592      * entities are declared in the external subset or in
7593      * external parameter entities, a non-validating processor
7594      * is not obligated to read and process their declarations;
7595      * for such documents, the rule that an entity must be
7596      * declared is a well-formedness constraint only if
7597      * standalone='yes'.
7598      */
7599     if (ent == NULL) {
7600 	if ((ctxt->standalone == 1) ||
7601 	    ((ctxt->hasExternalSubset == 0) &&
7602 	     (ctxt->hasPErefs == 0))) {
7603 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7604 		     "Entity '%s' not defined\n", name);
7605 	} else {
7606 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7607 		     "Entity '%s' not defined\n", name);
7608 	    if ((ctxt->inSubset == 0) &&
7609 		(ctxt->sax != NULL) &&
7610                 (ctxt->disableSAX == 0) &&
7611 		(ctxt->sax->reference != NULL)) {
7612 		ctxt->sax->reference(ctxt->userData, name);
7613 	    }
7614 	}
7615 	ctxt->valid = 0;
7616     }
7617 
7618     /*
7619      * [ WFC: Parsed Entity ]
7620      * An entity reference must not contain the name of an
7621      * unparsed entity
7622      */
7623     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7624 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7625 		 "Entity reference to unparsed entity %s\n", name);
7626         ent = NULL;
7627     }
7628 
7629     /*
7630      * [ WFC: No External Entity References ]
7631      * Attribute values cannot contain direct or indirect
7632      * entity references to external entities.
7633      */
7634     else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7635         if (inAttr) {
7636             xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7637                  "Attribute references external entity '%s'\n", name);
7638             ent = NULL;
7639         }
7640     }
7641 
7642     return(ent);
7643 }
7644 
7645 /**
7646  * xmlParseEntityRefInternal:
7647  * @ctxt:  an XML parser context
7648  * @inAttr:  whether we are in an attribute value
7649  *
7650  * Parse an entity reference. Always consumes '&'.
7651  *
7652  * [68] EntityRef ::= '&' Name ';'
7653  *
7654  * Returns the name, or NULL in case of error.
7655  */
7656 static const xmlChar *
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt)7657 xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7658     const xmlChar *name;
7659 
7660     GROW;
7661 
7662     if (RAW != '&')
7663         return(NULL);
7664     NEXT;
7665     name = xmlParseName(ctxt);
7666     if (name == NULL) {
7667 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7668 		       "xmlParseEntityRef: no name\n");
7669         return(NULL);
7670     }
7671     if (RAW != ';') {
7672 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7673 	return(NULL);
7674     }
7675     NEXT;
7676 
7677     return(name);
7678 }
7679 
7680 /**
7681  * xmlParseEntityRef:
7682  * @ctxt:  an XML parser context
7683  *
7684  * DEPRECATED: Internal function, don't use.
7685  *
7686  * Returns the xmlEntityPtr if found, or NULL otherwise.
7687  */
7688 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7689 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7690     const xmlChar *name;
7691 
7692     if (ctxt == NULL)
7693         return(NULL);
7694 
7695     name = xmlParseEntityRefInternal(ctxt);
7696     if (name == NULL)
7697         return(NULL);
7698 
7699     return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7700 }
7701 
7702 /**
7703  * xmlParseStringEntityRef:
7704  * @ctxt:  an XML parser context
7705  * @str:  a pointer to an index in the string
7706  *
7707  * parse ENTITY references declarations, but this version parses it from
7708  * a string value.
7709  *
7710  * [68] EntityRef ::= '&' Name ';'
7711  *
7712  * [ WFC: Entity Declared ]
7713  * In a document without any DTD, a document with only an internal DTD
7714  * subset which contains no parameter entity references, or a document
7715  * with "standalone='yes'", the Name given in the entity reference
7716  * must match that in an entity declaration, except that well-formed
7717  * documents need not declare any of the following entities: amp, lt,
7718  * gt, apos, quot.  The declaration of a parameter entity must precede
7719  * any reference to it.  Similarly, the declaration of a general entity
7720  * must precede any reference to it which appears in a default value in an
7721  * attribute-list declaration. Note that if entities are declared in the
7722  * external subset or in external parameter entities, a non-validating
7723  * processor is not obligated to read and process their declarations;
7724  * for such documents, the rule that an entity must be declared is a
7725  * well-formedness constraint only if standalone='yes'.
7726  *
7727  * [ WFC: Parsed Entity ]
7728  * An entity reference must not contain the name of an unparsed entity
7729  *
7730  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7731  * is updated to the current location in the string.
7732  */
7733 static xmlChar *
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7734 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7735     xmlChar *name;
7736     const xmlChar *ptr;
7737     xmlChar cur;
7738 
7739     if ((str == NULL) || (*str == NULL))
7740         return(NULL);
7741     ptr = *str;
7742     cur = *ptr;
7743     if (cur != '&')
7744 	return(NULL);
7745 
7746     ptr++;
7747     name = xmlParseStringName(ctxt, &ptr);
7748     if (name == NULL) {
7749 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7750 		       "xmlParseStringEntityRef: no name\n");
7751 	*str = ptr;
7752 	return(NULL);
7753     }
7754     if (*ptr != ';') {
7755 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7756         xmlFree(name);
7757 	*str = ptr;
7758 	return(NULL);
7759     }
7760     ptr++;
7761 
7762     *str = ptr;
7763     return(name);
7764 }
7765 
7766 /**
7767  * xmlParsePEReference:
7768  * @ctxt:  an XML parser context
7769  *
7770  * DEPRECATED: Internal function, don't use.
7771  *
7772  * Parse a parameter entity reference. Always consumes '%'.
7773  *
7774  * The entity content is handled directly by pushing it's content as
7775  * a new input stream.
7776  *
7777  * [69] PEReference ::= '%' Name ';'
7778  *
7779  * [ WFC: No Recursion ]
7780  * A parsed entity must not contain a recursive
7781  * reference to itself, either directly or indirectly.
7782  *
7783  * [ WFC: Entity Declared ]
7784  * In a document without any DTD, a document with only an internal DTD
7785  * subset which contains no parameter entity references, or a document
7786  * with "standalone='yes'", ...  ... The declaration of a parameter
7787  * entity must precede any reference to it...
7788  *
7789  * [ VC: Entity Declared ]
7790  * In a document with an external subset or external parameter entities
7791  * with "standalone='no'", ...  ... The declaration of a parameter entity
7792  * must precede any reference to it...
7793  *
7794  * [ WFC: In DTD ]
7795  * Parameter-entity references may only appear in the DTD.
7796  * NOTE: misleading but this is handled.
7797  */
7798 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7799 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7800 {
7801     const xmlChar *name;
7802     xmlEntityPtr entity = NULL;
7803     xmlParserInputPtr input;
7804 
7805     if (RAW != '%')
7806         return;
7807     NEXT;
7808     name = xmlParseName(ctxt);
7809     if (name == NULL) {
7810 	xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7811 	return;
7812     }
7813     if (RAW != ';') {
7814 	xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7815         return;
7816     }
7817 
7818     NEXT;
7819 
7820     /*
7821      * Request the entity from SAX
7822      */
7823     if ((ctxt->sax != NULL) &&
7824 	(ctxt->sax->getParameterEntity != NULL))
7825 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7826     if (entity == NULL) {
7827 	/*
7828 	 * [ WFC: Entity Declared ]
7829 	 * In a document without any DTD, a document with only an
7830 	 * internal DTD subset which contains no parameter entity
7831 	 * references, or a document with "standalone='yes'", ...
7832 	 * ... The declaration of a parameter entity must precede
7833 	 * any reference to it...
7834 	 */
7835 	if ((ctxt->standalone == 1) ||
7836 	    ((ctxt->hasExternalSubset == 0) &&
7837 	     (ctxt->hasPErefs == 0))) {
7838 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7839 			      "PEReference: %%%s; not found\n",
7840 			      name);
7841 	} else {
7842 	    /*
7843 	     * [ VC: Entity Declared ]
7844 	     * In a document with an external subset or external
7845 	     * parameter entities with "standalone='no'", ...
7846 	     * ... The declaration of a parameter entity must
7847 	     * precede any reference to it...
7848 	     */
7849             if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7850                 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7851                                  "PEReference: %%%s; not found\n",
7852                                  name, NULL);
7853             } else
7854                 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7855                               "PEReference: %%%s; not found\n",
7856                               name, NULL);
7857             ctxt->valid = 0;
7858 	}
7859     } else {
7860 	/*
7861 	 * Internal checking in case the entity quest barfed
7862 	 */
7863 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7864 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7865 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7866 		  "Internal: %%%s; is not a parameter entity\n",
7867 			  name, NULL);
7868 	} else {
7869 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7870                 ((ctxt->options & XML_PARSE_NO_XXE) ||
7871 		 ((ctxt->loadsubset == 0) &&
7872 		  (ctxt->replaceEntities == 0) &&
7873 		  (ctxt->validate == 0))))
7874 		return;
7875 
7876             if (entity->flags & XML_ENT_EXPANDING) {
7877                 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7878                 xmlHaltParser(ctxt);
7879                 return;
7880             }
7881 
7882 	    input = xmlNewEntityInputStream(ctxt, entity);
7883 	    if (xmlPushInput(ctxt, input) < 0) {
7884                 xmlFreeInputStream(input);
7885 		return;
7886             }
7887 
7888             entity->flags |= XML_ENT_EXPANDING;
7889 
7890 	    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7891                 xmlDetectEncoding(ctxt);
7892 
7893                 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7894                     (IS_BLANK_CH(NXT(5)))) {
7895                     xmlParseTextDecl(ctxt);
7896                 }
7897             }
7898 	}
7899     }
7900     ctxt->hasPErefs = 1;
7901 }
7902 
7903 /**
7904  * xmlLoadEntityContent:
7905  * @ctxt:  an XML parser context
7906  * @entity: an unloaded system entity
7907  *
7908  * Load the original content of the given system entity from the
7909  * ExternalID/SystemID given. This is to be used for Included in Literal
7910  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7911  *
7912  * Returns 0 in case of success and -1 in case of failure
7913  */
7914 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7915 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7916     xmlParserInputPtr oldinput, input = NULL;
7917     xmlParserInputPtr *oldinputTab;
7918     const xmlChar *oldencoding;
7919     xmlChar *content = NULL;
7920     size_t length, i;
7921     int oldinputNr, oldinputMax;
7922     int ret = -1;
7923     int res;
7924 
7925     if ((ctxt == NULL) || (entity == NULL) ||
7926         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7927 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7928 	(entity->content != NULL)) {
7929 	xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7930 	            "xmlLoadEntityContent parameter error");
7931         return(-1);
7932     }
7933 
7934     input = xmlLoadExternalEntity((char *) entity->URI,
7935            (char *) entity->ExternalID, ctxt);
7936     if (input == NULL)
7937         return(-1);
7938 
7939     oldinput = ctxt->input;
7940     oldinputNr = ctxt->inputNr;
7941     oldinputMax = ctxt->inputMax;
7942     oldinputTab = ctxt->inputTab;
7943     oldencoding = ctxt->encoding;
7944 
7945     ctxt->input = NULL;
7946     ctxt->inputNr = 0;
7947     ctxt->inputMax = 1;
7948     ctxt->encoding = NULL;
7949     ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7950     if (ctxt->inputTab == NULL) {
7951         xmlErrMemory(ctxt);
7952         xmlFreeInputStream(input);
7953         goto error;
7954     }
7955 
7956     xmlBufResetInput(input->buf->buffer, input);
7957 
7958     inputPush(ctxt, input);
7959 
7960     xmlDetectEncoding(ctxt);
7961 
7962     /*
7963      * Parse a possible text declaration first
7964      */
7965     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7966 	xmlParseTextDecl(ctxt);
7967         /*
7968          * An XML-1.0 document can't reference an entity not XML-1.0
7969          */
7970         if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7971             (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7972             xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7973                            "Version mismatch between document and entity\n");
7974         }
7975     }
7976 
7977     length = input->cur - input->base;
7978     xmlBufShrink(input->buf->buffer, length);
7979     xmlSaturatedAdd(&ctxt->sizeentities, length);
7980 
7981     while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7982         ;
7983 
7984     xmlBufResetInput(input->buf->buffer, input);
7985 
7986     if (res < 0) {
7987         xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7988         goto error;
7989     }
7990 
7991     length = xmlBufUse(input->buf->buffer);
7992     content = xmlBufDetach(input->buf->buffer);
7993 
7994     if (length > INT_MAX) {
7995         xmlErrMemory(ctxt);
7996         goto error;
7997     }
7998 
7999     for (i = 0; i < length; ) {
8000         int clen = length - i;
8001         int c = xmlGetUTF8Char(content + i, &clen);
8002 
8003         if ((c < 0) || (!IS_CHAR(c))) {
8004             xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8005                               "xmlLoadEntityContent: invalid char value %d\n",
8006                               content[i]);
8007             goto error;
8008         }
8009         i += clen;
8010     }
8011 
8012     xmlSaturatedAdd(&ctxt->sizeentities, length);
8013     entity->content = content;
8014     entity->length = length;
8015     content = NULL;
8016     ret = 0;
8017 
8018 error:
8019     while (ctxt->inputNr > 0)
8020         xmlFreeInputStream(inputPop(ctxt));
8021     xmlFree(ctxt->inputTab);
8022     xmlFree((xmlChar *) ctxt->encoding);
8023 
8024     ctxt->input = oldinput;
8025     ctxt->inputNr = oldinputNr;
8026     ctxt->inputMax = oldinputMax;
8027     ctxt->inputTab = oldinputTab;
8028     ctxt->encoding = oldencoding;
8029 
8030     xmlFree(content);
8031 
8032     return(ret);
8033 }
8034 
8035 /**
8036  * xmlParseStringPEReference:
8037  * @ctxt:  an XML parser context
8038  * @str:  a pointer to an index in the string
8039  *
8040  * parse PEReference declarations
8041  *
8042  * [69] PEReference ::= '%' Name ';'
8043  *
8044  * [ WFC: No Recursion ]
8045  * A parsed entity must not contain a recursive
8046  * reference to itself, either directly or indirectly.
8047  *
8048  * [ WFC: Entity Declared ]
8049  * In a document without any DTD, a document with only an internal DTD
8050  * subset which contains no parameter entity references, or a document
8051  * with "standalone='yes'", ...  ... The declaration of a parameter
8052  * entity must precede any reference to it...
8053  *
8054  * [ VC: Entity Declared ]
8055  * In a document with an external subset or external parameter entities
8056  * with "standalone='no'", ...  ... The declaration of a parameter entity
8057  * must precede any reference to it...
8058  *
8059  * [ WFC: In DTD ]
8060  * Parameter-entity references may only appear in the DTD.
8061  * NOTE: misleading but this is handled.
8062  *
8063  * Returns the string of the entity content.
8064  *         str is updated to the current value of the index
8065  */
8066 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8067 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8068     const xmlChar *ptr;
8069     xmlChar cur;
8070     xmlChar *name;
8071     xmlEntityPtr entity = NULL;
8072 
8073     if ((str == NULL) || (*str == NULL)) return(NULL);
8074     ptr = *str;
8075     cur = *ptr;
8076     if (cur != '%')
8077         return(NULL);
8078     ptr++;
8079     name = xmlParseStringName(ctxt, &ptr);
8080     if (name == NULL) {
8081 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8082 		       "xmlParseStringPEReference: no name\n");
8083 	*str = ptr;
8084 	return(NULL);
8085     }
8086     cur = *ptr;
8087     if (cur != ';') {
8088 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8089 	xmlFree(name);
8090 	*str = ptr;
8091 	return(NULL);
8092     }
8093     ptr++;
8094 
8095     /*
8096      * Request the entity from SAX
8097      */
8098     if ((ctxt->sax != NULL) &&
8099 	(ctxt->sax->getParameterEntity != NULL))
8100 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8101     if (entity == NULL) {
8102 	/*
8103 	 * [ WFC: Entity Declared ]
8104 	 * In a document without any DTD, a document with only an
8105 	 * internal DTD subset which contains no parameter entity
8106 	 * references, or a document with "standalone='yes'", ...
8107 	 * ... The declaration of a parameter entity must precede
8108 	 * any reference to it...
8109 	 */
8110 	if ((ctxt->standalone == 1) ||
8111 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8112 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8113 		 "PEReference: %%%s; not found\n", name);
8114 	} else {
8115 	    /*
8116 	     * [ VC: Entity Declared ]
8117 	     * In a document with an external subset or external
8118 	     * parameter entities with "standalone='no'", ...
8119 	     * ... The declaration of a parameter entity must
8120 	     * precede any reference to it...
8121 	     */
8122 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8123 			  "PEReference: %%%s; not found\n",
8124 			  name, NULL);
8125 	    ctxt->valid = 0;
8126 	}
8127     } else {
8128 	/*
8129 	 * Internal checking in case the entity quest barfed
8130 	 */
8131 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8132 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8133 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8134 			  "%%%s; is not a parameter entity\n",
8135 			  name, NULL);
8136 	}
8137     }
8138     ctxt->hasPErefs = 1;
8139     xmlFree(name);
8140     *str = ptr;
8141     return(entity);
8142 }
8143 
8144 /**
8145  * xmlParseDocTypeDecl:
8146  * @ctxt:  an XML parser context
8147  *
8148  * DEPRECATED: Internal function, don't use.
8149  *
8150  * parse a DOCTYPE declaration
8151  *
8152  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8153  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8154  *
8155  * [ VC: Root Element Type ]
8156  * The Name in the document type declaration must match the element
8157  * type of the root element.
8158  */
8159 
8160 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8161 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8162     const xmlChar *name = NULL;
8163     xmlChar *ExternalID = NULL;
8164     xmlChar *URI = NULL;
8165 
8166     /*
8167      * We know that '<!DOCTYPE' has been detected.
8168      */
8169     SKIP(9);
8170 
8171     SKIP_BLANKS;
8172 
8173     /*
8174      * Parse the DOCTYPE name.
8175      */
8176     name = xmlParseName(ctxt);
8177     if (name == NULL) {
8178 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8179 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8180     }
8181     ctxt->intSubName = name;
8182 
8183     SKIP_BLANKS;
8184 
8185     /*
8186      * Check for SystemID and ExternalID
8187      */
8188     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8189 
8190     if ((URI != NULL) || (ExternalID != NULL)) {
8191         ctxt->hasExternalSubset = 1;
8192     }
8193     ctxt->extSubURI = URI;
8194     ctxt->extSubSystem = ExternalID;
8195 
8196     SKIP_BLANKS;
8197 
8198     /*
8199      * Create and update the internal subset.
8200      */
8201     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8202 	(!ctxt->disableSAX))
8203 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8204 
8205     /*
8206      * Is there any internal subset declarations ?
8207      * they are handled separately in xmlParseInternalSubset()
8208      */
8209     if (RAW == '[')
8210 	return;
8211 
8212     /*
8213      * We should be at the end of the DOCTYPE declaration.
8214      */
8215     if (RAW != '>') {
8216 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8217     }
8218     NEXT;
8219 }
8220 
8221 /**
8222  * xmlParseInternalSubset:
8223  * @ctxt:  an XML parser context
8224  *
8225  * parse the internal subset declaration
8226  *
8227  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8228  */
8229 
8230 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8231 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8232     /*
8233      * Is there any DTD definition ?
8234      */
8235     if (RAW == '[') {
8236         int oldInputNr = ctxt->inputNr;
8237 
8238         NEXT;
8239 	/*
8240 	 * Parse the succession of Markup declarations and
8241 	 * PEReferences.
8242 	 * Subsequence (markupdecl | PEReference | S)*
8243 	 */
8244 	SKIP_BLANKS;
8245 	while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8246                (PARSER_STOPPED(ctxt) == 0)) {
8247 
8248             /*
8249              * Conditional sections are allowed from external entities included
8250              * by PE References in the internal subset.
8251              */
8252             if ((PARSER_EXTERNAL(ctxt)) &&
8253                 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8254                 xmlParseConditionalSections(ctxt);
8255             } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8256 	        xmlParseMarkupDecl(ctxt);
8257             } else if (RAW == '%') {
8258 	        xmlParsePEReference(ctxt);
8259             } else {
8260 		xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8261                 break;
8262             }
8263 	    SKIP_BLANKS_PE;
8264             SHRINK;
8265             GROW;
8266 	}
8267 
8268         while (ctxt->inputNr > oldInputNr)
8269             xmlPopPE(ctxt);
8270 
8271 	if (RAW == ']') {
8272 	    NEXT;
8273 	    SKIP_BLANKS;
8274 	}
8275     }
8276 
8277     /*
8278      * We should be at the end of the DOCTYPE declaration.
8279      */
8280     if ((ctxt->wellFormed) && (RAW != '>')) {
8281 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8282 	return;
8283     }
8284     NEXT;
8285 }
8286 
8287 #ifdef LIBXML_SAX1_ENABLED
8288 /**
8289  * xmlParseAttribute:
8290  * @ctxt:  an XML parser context
8291  * @value:  a xmlChar ** used to store the value of the attribute
8292  *
8293  * DEPRECATED: Internal function, don't use.
8294  *
8295  * parse an attribute
8296  *
8297  * [41] Attribute ::= Name Eq AttValue
8298  *
8299  * [ WFC: No External Entity References ]
8300  * Attribute values cannot contain direct or indirect entity references
8301  * to external entities.
8302  *
8303  * [ WFC: No < in Attribute Values ]
8304  * The replacement text of any entity referred to directly or indirectly in
8305  * an attribute value (other than "&lt;") must not contain a <.
8306  *
8307  * [ VC: Attribute Value Type ]
8308  * The attribute must have been declared; the value must be of the type
8309  * declared for it.
8310  *
8311  * [25] Eq ::= S? '=' S?
8312  *
8313  * With namespace:
8314  *
8315  * [NS 11] Attribute ::= QName Eq AttValue
8316  *
8317  * Also the case QName == xmlns:??? is handled independently as a namespace
8318  * definition.
8319  *
8320  * Returns the attribute name, and the value in *value.
8321  */
8322 
8323 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8324 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8325     const xmlChar *name;
8326     xmlChar *val;
8327 
8328     *value = NULL;
8329     GROW;
8330     name = xmlParseName(ctxt);
8331     if (name == NULL) {
8332 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8333 	               "error parsing attribute name\n");
8334         return(NULL);
8335     }
8336 
8337     /*
8338      * read the value
8339      */
8340     SKIP_BLANKS;
8341     if (RAW == '=') {
8342         NEXT;
8343 	SKIP_BLANKS;
8344 	val = xmlParseAttValue(ctxt);
8345     } else {
8346 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8347 	       "Specification mandates value for attribute %s\n", name);
8348 	return(name);
8349     }
8350 
8351     /*
8352      * Check that xml:lang conforms to the specification
8353      * No more registered as an error, just generate a warning now
8354      * since this was deprecated in XML second edition
8355      */
8356     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8357 	if (!xmlCheckLanguageID(val)) {
8358 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8359 		          "Malformed value for xml:lang : %s\n",
8360 			  val, NULL);
8361 	}
8362     }
8363 
8364     /*
8365      * Check that xml:space conforms to the specification
8366      */
8367     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8368 	if (xmlStrEqual(val, BAD_CAST "default"))
8369 	    *(ctxt->space) = 0;
8370 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8371 	    *(ctxt->space) = 1;
8372 	else {
8373 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8374 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8375                                  val, NULL);
8376 	}
8377     }
8378 
8379     *value = val;
8380     return(name);
8381 }
8382 
8383 /**
8384  * xmlParseStartTag:
8385  * @ctxt:  an XML parser context
8386  *
8387  * DEPRECATED: Internal function, don't use.
8388  *
8389  * Parse a start tag. Always consumes '<'.
8390  *
8391  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8392  *
8393  * [ WFC: Unique Att Spec ]
8394  * No attribute name may appear more than once in the same start-tag or
8395  * empty-element tag.
8396  *
8397  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8398  *
8399  * [ WFC: Unique Att Spec ]
8400  * No attribute name may appear more than once in the same start-tag or
8401  * empty-element tag.
8402  *
8403  * With namespace:
8404  *
8405  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8406  *
8407  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8408  *
8409  * Returns the element name parsed
8410  */
8411 
8412 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8413 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8414     const xmlChar *name;
8415     const xmlChar *attname;
8416     xmlChar *attvalue;
8417     const xmlChar **atts = ctxt->atts;
8418     int nbatts = 0;
8419     int maxatts = ctxt->maxatts;
8420     int i;
8421 
8422     if (RAW != '<') return(NULL);
8423     NEXT1;
8424 
8425     name = xmlParseName(ctxt);
8426     if (name == NULL) {
8427 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8428 	     "xmlParseStartTag: invalid element name\n");
8429         return(NULL);
8430     }
8431 
8432     /*
8433      * Now parse the attributes, it ends up with the ending
8434      *
8435      * (S Attribute)* S?
8436      */
8437     SKIP_BLANKS;
8438     GROW;
8439 
8440     while (((RAW != '>') &&
8441 	   ((RAW != '/') || (NXT(1) != '>')) &&
8442 	   (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8443 	attname = xmlParseAttribute(ctxt, &attvalue);
8444         if (attname == NULL)
8445 	    break;
8446         if (attvalue != NULL) {
8447 	    /*
8448 	     * [ WFC: Unique Att Spec ]
8449 	     * No attribute name may appear more than once in the same
8450 	     * start-tag or empty-element tag.
8451 	     */
8452 	    for (i = 0; i < nbatts;i += 2) {
8453 	        if (xmlStrEqual(atts[i], attname)) {
8454 		    xmlErrAttributeDup(ctxt, NULL, attname);
8455 		    xmlFree(attvalue);
8456 		    goto failed;
8457 		}
8458 	    }
8459 	    /*
8460 	     * Add the pair to atts
8461 	     */
8462 	    if (atts == NULL) {
8463 	        maxatts = 22; /* allow for 10 attrs by default */
8464 	        atts = (const xmlChar **)
8465 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8466 		if (atts == NULL) {
8467 		    xmlErrMemory(ctxt);
8468 		    if (attvalue != NULL)
8469 			xmlFree(attvalue);
8470 		    goto failed;
8471 		}
8472 		ctxt->atts = atts;
8473 		ctxt->maxatts = maxatts;
8474 	    } else if (nbatts + 4 > maxatts) {
8475 	        const xmlChar **n;
8476 
8477 	        maxatts *= 2;
8478 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8479 					     maxatts * sizeof(const xmlChar *));
8480 		if (n == NULL) {
8481 		    xmlErrMemory(ctxt);
8482 		    if (attvalue != NULL)
8483 			xmlFree(attvalue);
8484 		    goto failed;
8485 		}
8486 		atts = n;
8487 		ctxt->atts = atts;
8488 		ctxt->maxatts = maxatts;
8489 	    }
8490 	    atts[nbatts++] = attname;
8491 	    atts[nbatts++] = attvalue;
8492 	    atts[nbatts] = NULL;
8493 	    atts[nbatts + 1] = NULL;
8494 	} else {
8495 	    if (attvalue != NULL)
8496 		xmlFree(attvalue);
8497 	}
8498 
8499 failed:
8500 
8501 	GROW
8502 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8503 	    break;
8504 	if (SKIP_BLANKS == 0) {
8505 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8506 			   "attributes construct error\n");
8507 	}
8508 	SHRINK;
8509         GROW;
8510     }
8511 
8512     /*
8513      * SAX: Start of Element !
8514      */
8515     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8516 	(!ctxt->disableSAX)) {
8517 	if (nbatts > 0)
8518 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8519 	else
8520 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8521     }
8522 
8523     if (atts != NULL) {
8524         /* Free only the content strings */
8525         for (i = 1;i < nbatts;i+=2)
8526 	    if (atts[i] != NULL)
8527 	       xmlFree((xmlChar *) atts[i]);
8528     }
8529     return(name);
8530 }
8531 
8532 /**
8533  * xmlParseEndTag1:
8534  * @ctxt:  an XML parser context
8535  * @line:  line of the start tag
8536  * @nsNr:  number of namespaces on the start tag
8537  *
8538  * Parse an end tag. Always consumes '</'.
8539  *
8540  * [42] ETag ::= '</' Name S? '>'
8541  *
8542  * With namespace
8543  *
8544  * [NS 9] ETag ::= '</' QName S? '>'
8545  */
8546 
8547 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8548 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8549     const xmlChar *name;
8550 
8551     GROW;
8552     if ((RAW != '<') || (NXT(1) != '/')) {
8553 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8554 		       "xmlParseEndTag: '</' not found\n");
8555 	return;
8556     }
8557     SKIP(2);
8558 
8559     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8560 
8561     /*
8562      * We should definitely be at the ending "S? '>'" part
8563      */
8564     GROW;
8565     SKIP_BLANKS;
8566     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8567 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8568     } else
8569 	NEXT1;
8570 
8571     /*
8572      * [ WFC: Element Type Match ]
8573      * The Name in an element's end-tag must match the element type in the
8574      * start-tag.
8575      *
8576      */
8577     if (name != (xmlChar*)1) {
8578         if (name == NULL) name = BAD_CAST "unparsable";
8579         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8580 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8581 		                ctxt->name, line, name);
8582     }
8583 
8584     /*
8585      * SAX: End of Tag
8586      */
8587     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8588 	(!ctxt->disableSAX))
8589         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8590 
8591     namePop(ctxt);
8592     spacePop(ctxt);
8593     return;
8594 }
8595 
8596 /**
8597  * xmlParseEndTag:
8598  * @ctxt:  an XML parser context
8599  *
8600  * DEPRECATED: Internal function, don't use.
8601  *
8602  * parse an end of tag
8603  *
8604  * [42] ETag ::= '</' Name S? '>'
8605  *
8606  * With namespace
8607  *
8608  * [NS 9] ETag ::= '</' QName S? '>'
8609  */
8610 
8611 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8612 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8613     xmlParseEndTag1(ctxt, 0);
8614 }
8615 #endif /* LIBXML_SAX1_ENABLED */
8616 
8617 /************************************************************************
8618  *									*
8619  *		      SAX 2 specific operations				*
8620  *									*
8621  ************************************************************************/
8622 
8623 /**
8624  * xmlParseQNameHashed:
8625  * @ctxt:  an XML parser context
8626  * @prefix:  pointer to store the prefix part
8627  *
8628  * parse an XML Namespace QName
8629  *
8630  * [6]  QName  ::= (Prefix ':')? LocalPart
8631  * [7]  Prefix  ::= NCName
8632  * [8]  LocalPart  ::= NCName
8633  *
8634  * Returns the Name parsed or NULL
8635  */
8636 
8637 static xmlHashedString
xmlParseQNameHashed(xmlParserCtxtPtr ctxt,xmlHashedString * prefix)8638 xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8639     xmlHashedString l, p;
8640     int start, isNCName = 0;
8641 
8642     l.name = NULL;
8643     p.name = NULL;
8644 
8645     GROW;
8646     start = CUR_PTR - BASE_PTR;
8647 
8648     l = xmlParseNCName(ctxt);
8649     if (l.name != NULL) {
8650         isNCName = 1;
8651         if (CUR == ':') {
8652             NEXT;
8653             p = l;
8654             l = xmlParseNCName(ctxt);
8655         }
8656     }
8657     if ((l.name == NULL) || (CUR == ':')) {
8658         xmlChar *tmp;
8659 
8660         l.name = NULL;
8661         p.name = NULL;
8662         if ((isNCName == 0) && (CUR != ':'))
8663             return(l);
8664         tmp = xmlParseNmtoken(ctxt);
8665         if (tmp != NULL)
8666             xmlFree(tmp);
8667         l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8668                                 CUR_PTR - (BASE_PTR + start));
8669         if (l.name == NULL) {
8670             xmlErrMemory(ctxt);
8671             return(l);
8672         }
8673         xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8674                  "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8675     }
8676 
8677     *prefix = p;
8678     return(l);
8679 }
8680 
8681 /**
8682  * xmlParseQName:
8683  * @ctxt:  an XML parser context
8684  * @prefix:  pointer to store the prefix part
8685  *
8686  * parse an XML Namespace QName
8687  *
8688  * [6]  QName  ::= (Prefix ':')? LocalPart
8689  * [7]  Prefix  ::= NCName
8690  * [8]  LocalPart  ::= NCName
8691  *
8692  * Returns the Name parsed or NULL
8693  */
8694 
8695 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8696 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8697     xmlHashedString n, p;
8698 
8699     n = xmlParseQNameHashed(ctxt, &p);
8700     if (n.name == NULL)
8701         return(NULL);
8702     *prefix = p.name;
8703     return(n.name);
8704 }
8705 
8706 /**
8707  * xmlParseQNameAndCompare:
8708  * @ctxt:  an XML parser context
8709  * @name:  the localname
8710  * @prefix:  the prefix, if any.
8711  *
8712  * parse an XML name and compares for match
8713  * (specialized for endtag parsing)
8714  *
8715  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8716  * and the name for mismatch
8717  */
8718 
8719 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8720 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8721                         xmlChar const *prefix) {
8722     const xmlChar *cmp;
8723     const xmlChar *in;
8724     const xmlChar *ret;
8725     const xmlChar *prefix2;
8726 
8727     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8728 
8729     GROW;
8730     in = ctxt->input->cur;
8731 
8732     cmp = prefix;
8733     while (*in != 0 && *in == *cmp) {
8734 	++in;
8735 	++cmp;
8736     }
8737     if ((*cmp == 0) && (*in == ':')) {
8738         in++;
8739 	cmp = name;
8740 	while (*in != 0 && *in == *cmp) {
8741 	    ++in;
8742 	    ++cmp;
8743 	}
8744 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8745 	    /* success */
8746             ctxt->input->col += in - ctxt->input->cur;
8747 	    ctxt->input->cur = in;
8748 	    return((const xmlChar*) 1);
8749 	}
8750     }
8751     /*
8752      * all strings coms from the dictionary, equality can be done directly
8753      */
8754     ret = xmlParseQName (ctxt, &prefix2);
8755     if (ret == NULL)
8756         return(NULL);
8757     if ((ret == name) && (prefix == prefix2))
8758 	return((const xmlChar*) 1);
8759     return ret;
8760 }
8761 
8762 /**
8763  * xmlParseAttribute2:
8764  * @ctxt:  an XML parser context
8765  * @pref:  the element prefix
8766  * @elem:  the element name
8767  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8768  * @value:  a xmlChar ** used to store the value of the attribute
8769  * @len:  an int * to save the length of the attribute
8770  * @alloc:  an int * to indicate if the attribute was allocated
8771  *
8772  * parse an attribute in the new SAX2 framework.
8773  *
8774  * Returns the attribute name, and the value in *value, .
8775  */
8776 
8777 static xmlHashedString
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,xmlHashedString * hprefix,xmlChar ** value,int * len,int * alloc)8778 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8779                    const xmlChar * pref, const xmlChar * elem,
8780                    xmlHashedString * hprefix, xmlChar ** value,
8781                    int *len, int *alloc)
8782 {
8783     xmlHashedString hname;
8784     const xmlChar *prefix, *name;
8785     xmlChar *val = NULL, *internal_val = NULL;
8786     int normalize = 0;
8787 
8788     *value = NULL;
8789     GROW;
8790     hname = xmlParseQNameHashed(ctxt, hprefix);
8791     if (hname.name == NULL) {
8792         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8793                        "error parsing attribute name\n");
8794         return(hname);
8795     }
8796     name = hname.name;
8797     if (hprefix->name != NULL)
8798         prefix = hprefix->name;
8799     else
8800         prefix = NULL;
8801 
8802     /*
8803      * get the type if needed
8804      */
8805     if (ctxt->attsSpecial != NULL) {
8806         int type;
8807 
8808         type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8809                                                  pref, elem,
8810                                                  prefix, name);
8811         if (type != 0)
8812             normalize = 1;
8813     }
8814 
8815     /*
8816      * read the value
8817      */
8818     SKIP_BLANKS;
8819     if (RAW == '=') {
8820         NEXT;
8821         SKIP_BLANKS;
8822         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8823         if (val == NULL)
8824             goto error;
8825     } else {
8826         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8827                           "Specification mandates value for attribute %s\n",
8828                           name);
8829         goto error;
8830     }
8831 
8832     if (prefix == ctxt->str_xml) {
8833         /*
8834          * Check that xml:lang conforms to the specification
8835          * No more registered as an error, just generate a warning now
8836          * since this was deprecated in XML second edition
8837          */
8838         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8839             internal_val = xmlStrndup(val, *len);
8840             if (internal_val == NULL)
8841                 goto mem_error;
8842             if (!xmlCheckLanguageID(internal_val)) {
8843                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8844                               "Malformed value for xml:lang : %s\n",
8845                               internal_val, NULL);
8846             }
8847         }
8848 
8849         /*
8850          * Check that xml:space conforms to the specification
8851          */
8852         if (xmlStrEqual(name, BAD_CAST "space")) {
8853             internal_val = xmlStrndup(val, *len);
8854             if (internal_val == NULL)
8855                 goto mem_error;
8856             if (xmlStrEqual(internal_val, BAD_CAST "default"))
8857                 *(ctxt->space) = 0;
8858             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8859                 *(ctxt->space) = 1;
8860             else {
8861                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8862                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8863                               internal_val, NULL);
8864             }
8865         }
8866         if (internal_val) {
8867             xmlFree(internal_val);
8868         }
8869     }
8870 
8871     *value = val;
8872     return (hname);
8873 
8874 mem_error:
8875     xmlErrMemory(ctxt);
8876 error:
8877     if ((val != NULL) && (*alloc != 0))
8878         xmlFree(val);
8879     return(hname);
8880 }
8881 
8882 /**
8883  * xmlAttrHashInsert:
8884  * @ctxt: parser context
8885  * @size: size of the hash table
8886  * @name: attribute name
8887  * @uri: namespace uri
8888  * @hashValue: combined hash value of name and uri
8889  * @aindex: attribute index (this is a multiple of 5)
8890  *
8891  * Inserts a new attribute into the hash table.
8892  *
8893  * Returns INT_MAX if no existing attribute was found, the attribute
8894  * index if an attribute was found, -1 if a memory allocation failed.
8895  */
8896 static int
xmlAttrHashInsert(xmlParserCtxtPtr ctxt,unsigned size,const xmlChar * name,const xmlChar * uri,unsigned hashValue,int aindex)8897 xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8898                   const xmlChar *uri, unsigned hashValue, int aindex) {
8899     xmlAttrHashBucket *table = ctxt->attrHash;
8900     xmlAttrHashBucket *bucket;
8901     unsigned hindex;
8902 
8903     hindex = hashValue & (size - 1);
8904     bucket = &table[hindex];
8905 
8906     while (bucket->index >= 0) {
8907         const xmlChar **atts = &ctxt->atts[bucket->index];
8908 
8909         if (name == atts[0]) {
8910             int nsIndex = (int) (ptrdiff_t) atts[2];
8911 
8912             if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8913                 (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) :
8914                 (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8915                 return(bucket->index);
8916         }
8917 
8918         hindex++;
8919         bucket++;
8920         if (hindex >= size) {
8921             hindex = 0;
8922             bucket = table;
8923         }
8924     }
8925 
8926     bucket->index = aindex;
8927 
8928     return(INT_MAX);
8929 }
8930 
8931 /**
8932  * xmlParseStartTag2:
8933  * @ctxt:  an XML parser context
8934  *
8935  * Parse a start tag. Always consumes '<'.
8936  *
8937  * This routine is called when running SAX2 parsing
8938  *
8939  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8940  *
8941  * [ WFC: Unique Att Spec ]
8942  * No attribute name may appear more than once in the same start-tag or
8943  * empty-element tag.
8944  *
8945  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8946  *
8947  * [ WFC: Unique Att Spec ]
8948  * No attribute name may appear more than once in the same start-tag or
8949  * empty-element tag.
8950  *
8951  * With namespace:
8952  *
8953  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8954  *
8955  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8956  *
8957  * Returns the element name parsed
8958  */
8959 
8960 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * nbNsPtr)8961 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8962                   const xmlChar **URI, int *nbNsPtr) {
8963     xmlHashedString hlocalname;
8964     xmlHashedString hprefix;
8965     xmlHashedString hattname;
8966     xmlHashedString haprefix;
8967     const xmlChar *localname;
8968     const xmlChar *prefix;
8969     const xmlChar *attname;
8970     const xmlChar *aprefix;
8971     const xmlChar *uri;
8972     xmlChar *attvalue = NULL;
8973     const xmlChar **atts = ctxt->atts;
8974     unsigned attrHashSize = 0;
8975     int maxatts = ctxt->maxatts;
8976     int nratts, nbatts, nbdef;
8977     int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8978     int alloc = 0;
8979 
8980     if (RAW != '<') return(NULL);
8981     NEXT1;
8982 
8983     nbatts = 0;
8984     nratts = 0;
8985     nbdef = 0;
8986     nbNs = 0;
8987     nbTotalDef = 0;
8988     attval = 0;
8989 
8990     if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8991         xmlErrMemory(ctxt);
8992         return(NULL);
8993     }
8994 
8995     hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8996     if (hlocalname.name == NULL) {
8997 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8998 		       "StartTag: invalid element name\n");
8999         return(NULL);
9000     }
9001     localname = hlocalname.name;
9002     prefix = hprefix.name;
9003 
9004     /*
9005      * Now parse the attributes, it ends up with the ending
9006      *
9007      * (S Attribute)* S?
9008      */
9009     SKIP_BLANKS;
9010     GROW;
9011 
9012     /*
9013      * The ctxt->atts array will be ultimately passed to the SAX callback
9014      * containing five xmlChar pointers for each attribute:
9015      *
9016      * [0] attribute name
9017      * [1] attribute prefix
9018      * [2] namespace URI
9019      * [3] attribute value
9020      * [4] end of attribute value
9021      *
9022      * To save memory, we reuse this array temporarily and store integers
9023      * in these pointer variables.
9024      *
9025      * [0] attribute name
9026      * [1] attribute prefix
9027      * [2] hash value of attribute prefix, and later namespace index
9028      * [3] for non-allocated values: ptrdiff_t offset into input buffer
9029      * [4] for non-allocated values: ptrdiff_t offset into input buffer
9030      *
9031      * The ctxt->attallocs array contains an additional unsigned int for
9032      * each attribute, containing the hash value of the attribute name
9033      * and the alloc flag in bit 31.
9034      */
9035 
9036     while (((RAW != '>') &&
9037 	   ((RAW != '/') || (NXT(1) != '>')) &&
9038 	   (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9039 	int len = -1;
9040 
9041 	hattname = xmlParseAttribute2(ctxt, prefix, localname,
9042                                           &haprefix, &attvalue, &len,
9043                                           &alloc);
9044         if (hattname.name == NULL)
9045 	    break;
9046         if (attvalue == NULL)
9047             goto next_attr;
9048         attname = hattname.name;
9049         aprefix = haprefix.name;
9050 	if (len < 0) len = xmlStrlen(attvalue);
9051 
9052         if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9053             xmlHashedString huri;
9054             xmlURIPtr parsedUri;
9055 
9056             huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9057             uri = huri.name;
9058             if (uri == NULL) {
9059                 xmlErrMemory(ctxt);
9060                 goto next_attr;
9061             }
9062             if (*uri != 0) {
9063                 if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9064                     xmlErrMemory(ctxt);
9065                     goto next_attr;
9066                 }
9067                 if (parsedUri == NULL) {
9068                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9069                              "xmlns: '%s' is not a valid URI\n",
9070                                        uri, NULL, NULL);
9071                 } else {
9072                     if (parsedUri->scheme == NULL) {
9073                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9074                                   "xmlns: URI %s is not absolute\n",
9075                                   uri, NULL, NULL);
9076                     }
9077                     xmlFreeURI(parsedUri);
9078                 }
9079                 if (uri == ctxt->str_xml_ns) {
9080                     if (attname != ctxt->str_xml) {
9081                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9082                      "xml namespace URI cannot be the default namespace\n",
9083                                  NULL, NULL, NULL);
9084                     }
9085                     goto next_attr;
9086                 }
9087                 if ((len == 29) &&
9088                     (xmlStrEqual(uri,
9089                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9090                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9091                          "reuse of the xmlns namespace name is forbidden\n",
9092                              NULL, NULL, NULL);
9093                     goto next_attr;
9094                 }
9095             }
9096 
9097             if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9098                 nbNs++;
9099         } else if (aprefix == ctxt->str_xmlns) {
9100             xmlHashedString huri;
9101             xmlURIPtr parsedUri;
9102 
9103             huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9104             uri = huri.name;
9105             if (uri == NULL) {
9106                 xmlErrMemory(ctxt);
9107                 goto next_attr;
9108             }
9109 
9110             if (attname == ctxt->str_xml) {
9111                 if (uri != ctxt->str_xml_ns) {
9112                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9113                              "xml namespace prefix mapped to wrong URI\n",
9114                              NULL, NULL, NULL);
9115                 }
9116                 /*
9117                  * Do not keep a namespace definition node
9118                  */
9119                 goto next_attr;
9120             }
9121             if (uri == ctxt->str_xml_ns) {
9122                 if (attname != ctxt->str_xml) {
9123                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9124                              "xml namespace URI mapped to wrong prefix\n",
9125                              NULL, NULL, NULL);
9126                 }
9127                 goto next_attr;
9128             }
9129             if (attname == ctxt->str_xmlns) {
9130                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9131                          "redefinition of the xmlns prefix is forbidden\n",
9132                          NULL, NULL, NULL);
9133                 goto next_attr;
9134             }
9135             if ((len == 29) &&
9136                 (xmlStrEqual(uri,
9137                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9138                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9139                          "reuse of the xmlns namespace name is forbidden\n",
9140                          NULL, NULL, NULL);
9141                 goto next_attr;
9142             }
9143             if ((uri == NULL) || (uri[0] == 0)) {
9144                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9145                          "xmlns:%s: Empty XML namespace is not allowed\n",
9146                               attname, NULL, NULL);
9147                 goto next_attr;
9148             } else {
9149                 if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9150                     xmlErrMemory(ctxt);
9151                     goto next_attr;
9152                 }
9153                 if (parsedUri == NULL) {
9154                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9155                          "xmlns:%s: '%s' is not a valid URI\n",
9156                                        attname, uri, NULL);
9157                 } else {
9158                     if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9159                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9160                                   "xmlns:%s: URI %s is not absolute\n",
9161                                   attname, uri, NULL);
9162                     }
9163                     xmlFreeURI(parsedUri);
9164                 }
9165             }
9166 
9167             if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9168                 nbNs++;
9169         } else {
9170             /*
9171              * Populate attributes array, see above for repurposing
9172              * of xmlChar pointers.
9173              */
9174             if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9175                 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9176                     goto next_attr;
9177                 }
9178                 maxatts = ctxt->maxatts;
9179                 atts = ctxt->atts;
9180             }
9181             ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9182                                         ((unsigned) alloc << 31);
9183             atts[nbatts++] = attname;
9184             atts[nbatts++] = aprefix;
9185             atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9186             if (alloc) {
9187                 atts[nbatts++] = attvalue;
9188                 attvalue += len;
9189                 atts[nbatts++] = attvalue;
9190             } else {
9191                 /*
9192                  * attvalue points into the input buffer which can be
9193                  * reallocated. Store differences to input->base instead.
9194                  * The pointers will be reconstructed later.
9195                  */
9196                 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9197                 attvalue += len;
9198                 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9199             }
9200             /*
9201              * tag if some deallocation is needed
9202              */
9203             if (alloc != 0) attval = 1;
9204             attvalue = NULL; /* moved into atts */
9205         }
9206 
9207 next_attr:
9208         if ((attvalue != NULL) && (alloc != 0)) {
9209             xmlFree(attvalue);
9210             attvalue = NULL;
9211         }
9212 
9213 	GROW
9214 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9215 	    break;
9216 	if (SKIP_BLANKS == 0) {
9217 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9218 			   "attributes construct error\n");
9219 	    break;
9220 	}
9221         GROW;
9222     }
9223 
9224     /*
9225      * Namespaces from default attributes
9226      */
9227     if (ctxt->attsDefault != NULL) {
9228         xmlDefAttrsPtr defaults;
9229 
9230 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9231 	if (defaults != NULL) {
9232 	    for (i = 0; i < defaults->nbAttrs; i++) {
9233                 xmlDefAttr *attr = &defaults->attrs[i];
9234 
9235 	        attname = attr->name.name;
9236 		aprefix = attr->prefix.name;
9237 
9238 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9239                     xmlParserEntityCheck(ctxt, attr->expandedSize);
9240 
9241                     if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9242                         nbNs++;
9243 		} else if (aprefix == ctxt->str_xmlns) {
9244                     xmlParserEntityCheck(ctxt, attr->expandedSize);
9245 
9246                     if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9247                                       NULL, 1) > 0)
9248                         nbNs++;
9249 		} else {
9250                     nbTotalDef += 1;
9251                 }
9252 	    }
9253 	}
9254     }
9255 
9256     /*
9257      * Resolve attribute namespaces
9258      */
9259     for (i = 0; i < nbatts; i += 5) {
9260         attname = atts[i];
9261         aprefix = atts[i+1];
9262 
9263         /*
9264 	* The default namespace does not apply to attribute names.
9265 	*/
9266 	if (aprefix == NULL) {
9267             nsIndex = NS_INDEX_EMPTY;
9268         } else if (aprefix == ctxt->str_xml) {
9269             nsIndex = NS_INDEX_XML;
9270         } else {
9271             haprefix.name = aprefix;
9272             haprefix.hashValue = (size_t) atts[i+2];
9273             nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9274 
9275 	    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9276                 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9277 		    "Namespace prefix %s for %s on %s is not defined\n",
9278 		    aprefix, attname, localname);
9279                 nsIndex = NS_INDEX_EMPTY;
9280             }
9281         }
9282 
9283         atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9284     }
9285 
9286     /*
9287      * Maximum number of attributes including default attributes.
9288      */
9289     maxAtts = nratts + nbTotalDef;
9290 
9291     /*
9292      * Verify that attribute names are unique.
9293      */
9294     if (maxAtts > 1) {
9295         attrHashSize = 4;
9296         while (attrHashSize / 2 < (unsigned) maxAtts)
9297             attrHashSize *= 2;
9298 
9299         if (attrHashSize > ctxt->attrHashMax) {
9300             xmlAttrHashBucket *tmp;
9301 
9302             tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9303             if (tmp == NULL) {
9304                 xmlErrMemory(ctxt);
9305                 goto done;
9306             }
9307 
9308             ctxt->attrHash = tmp;
9309             ctxt->attrHashMax = attrHashSize;
9310         }
9311 
9312         memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9313 
9314         for (i = 0, j = 0; j < nratts; i += 5, j++) {
9315             const xmlChar *nsuri;
9316             unsigned hashValue, nameHashValue, uriHashValue;
9317             int res;
9318 
9319             attname = atts[i];
9320             aprefix = atts[i+1];
9321             nsIndex = (ptrdiff_t) atts[i+2];
9322             /* Hash values always have bit 31 set, see dict.c */
9323             nameHashValue = ctxt->attallocs[j] | 0x80000000;
9324 
9325             if (nsIndex == NS_INDEX_EMPTY) {
9326                 /*
9327                  * Prefix with empty namespace means an undeclared
9328                  * prefix which was already reported above.
9329                  */
9330                 if (aprefix != NULL)
9331                     continue;
9332                 nsuri = NULL;
9333                 uriHashValue = URI_HASH_EMPTY;
9334             } else if (nsIndex == NS_INDEX_XML) {
9335                 nsuri = ctxt->str_xml_ns;
9336                 uriHashValue = URI_HASH_XML;
9337             } else {
9338                 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9339                 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9340             }
9341 
9342             hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9343             res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9344                                     hashValue, i);
9345             if (res < 0)
9346                 continue;
9347 
9348             /*
9349              * [ WFC: Unique Att Spec ]
9350              * No attribute name may appear more than once in the same
9351              * start-tag or empty-element tag.
9352              * As extended by the Namespace in XML REC.
9353              */
9354             if (res < INT_MAX) {
9355                 if (aprefix == atts[res+1]) {
9356                     xmlErrAttributeDup(ctxt, aprefix, attname);
9357                 } else {
9358                     xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9359                              "Namespaced Attribute %s in '%s' redefined\n",
9360                              attname, nsuri, NULL);
9361                 }
9362             }
9363         }
9364     }
9365 
9366     /*
9367      * Default attributes
9368      */
9369     if (ctxt->attsDefault != NULL) {
9370         xmlDefAttrsPtr defaults;
9371 
9372 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9373 	if (defaults != NULL) {
9374 	    for (i = 0; i < defaults->nbAttrs; i++) {
9375                 xmlDefAttr *attr = &defaults->attrs[i];
9376                 const xmlChar *nsuri;
9377                 unsigned hashValue, uriHashValue;
9378                 int res;
9379 
9380 	        attname = attr->name.name;
9381 		aprefix = attr->prefix.name;
9382 
9383 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9384                     continue;
9385 		if (aprefix == ctxt->str_xmlns)
9386                     continue;
9387 
9388                 if (aprefix == NULL) {
9389                     nsIndex = NS_INDEX_EMPTY;
9390                     nsuri = NULL;
9391                     uriHashValue = URI_HASH_EMPTY;
9392                 } if (aprefix == ctxt->str_xml) {
9393                     nsIndex = NS_INDEX_XML;
9394                     nsuri = ctxt->str_xml_ns;
9395                     uriHashValue = URI_HASH_XML;
9396                 } else if (aprefix != NULL) {
9397                     nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9398                     if ((nsIndex == INT_MAX) ||
9399                         (nsIndex < ctxt->nsdb->minNsIndex)) {
9400                         xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9401                                  "Namespace prefix %s for %s on %s is not "
9402                                  "defined\n",
9403                                  aprefix, attname, localname);
9404                         nsIndex = NS_INDEX_EMPTY;
9405                         nsuri = NULL;
9406                         uriHashValue = URI_HASH_EMPTY;
9407                     } else {
9408                         nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9409                         uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9410                     }
9411                 }
9412 
9413                 /*
9414                  * Check whether the attribute exists
9415                  */
9416                 if (maxAtts > 1) {
9417                     hashValue = xmlDictCombineHash(attr->name.hashValue,
9418                                                    uriHashValue);
9419                     res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9420                                             hashValue, nbatts);
9421                     if (res < 0)
9422                         continue;
9423                     if (res < INT_MAX) {
9424                         if (aprefix == atts[res+1])
9425                             continue;
9426                         xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9427                                  "Namespaced Attribute %s in '%s' redefined\n",
9428                                  attname, nsuri, NULL);
9429                     }
9430                 }
9431 
9432                 xmlParserEntityCheck(ctxt, attr->expandedSize);
9433 
9434                 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9435                     if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9436                         localname = NULL;
9437                         goto done;
9438                     }
9439                     maxatts = ctxt->maxatts;
9440                     atts = ctxt->atts;
9441                 }
9442 
9443                 atts[nbatts++] = attname;
9444                 atts[nbatts++] = aprefix;
9445                 atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
9446                 atts[nbatts++] = attr->value.name;
9447                 atts[nbatts++] = attr->valueEnd;
9448                 if ((ctxt->standalone == 1) && (attr->external != 0)) {
9449                     xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9450                             "standalone: attribute %s on %s defaulted "
9451                             "from external subset\n",
9452                             attname, localname);
9453                 }
9454                 nbdef++;
9455 	    }
9456 	}
9457     }
9458 
9459     /*
9460      * Reconstruct attribute pointers
9461      */
9462     for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9463         /* namespace URI */
9464         nsIndex = (ptrdiff_t) atts[i+2];
9465         if (nsIndex == INT_MAX)
9466             atts[i+2] = NULL;
9467         else if (nsIndex == INT_MAX - 1)
9468             atts[i+2] = ctxt->str_xml_ns;
9469         else
9470             atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9471 
9472         if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9473             atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3];  /* value */
9474             atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4];  /* valuend */
9475         }
9476     }
9477 
9478     uri = xmlParserNsLookupUri(ctxt, &hprefix);
9479     if ((prefix != NULL) && (uri == NULL)) {
9480 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9481 	         "Namespace prefix %s on %s is not defined\n",
9482 		 prefix, localname, NULL);
9483     }
9484     *pref = prefix;
9485     *URI = uri;
9486 
9487     /*
9488      * SAX callback
9489      */
9490     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9491 	(!ctxt->disableSAX)) {
9492 	if (nbNs > 0)
9493 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9494                           nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9495 			  nbatts / 5, nbdef, atts);
9496 	else
9497 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9498                           0, NULL, nbatts / 5, nbdef, atts);
9499     }
9500 
9501 done:
9502     /*
9503      * Free allocated attribute values
9504      */
9505     if (attval != 0) {
9506 	for (i = 0, j = 0; j < nratts; i += 5, j++)
9507 	    if (ctxt->attallocs[j] & 0x80000000)
9508 	        xmlFree((xmlChar *) atts[i+3]);
9509     }
9510 
9511     *nbNsPtr = nbNs;
9512     return(localname);
9513 }
9514 
9515 /**
9516  * xmlParseEndTag2:
9517  * @ctxt:  an XML parser context
9518  * @line:  line of the start tag
9519  * @nsNr:  number of namespaces on the start tag
9520  *
9521  * Parse an end tag. Always consumes '</'.
9522  *
9523  * [42] ETag ::= '</' Name S? '>'
9524  *
9525  * With namespace
9526  *
9527  * [NS 9] ETag ::= '</' QName S? '>'
9528  */
9529 
9530 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)9531 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9532     const xmlChar *name;
9533 
9534     GROW;
9535     if ((RAW != '<') || (NXT(1) != '/')) {
9536 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9537 	return;
9538     }
9539     SKIP(2);
9540 
9541     if (tag->prefix == NULL)
9542         name = xmlParseNameAndCompare(ctxt, ctxt->name);
9543     else
9544         name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9545 
9546     /*
9547      * We should definitely be at the ending "S? '>'" part
9548      */
9549     GROW;
9550     SKIP_BLANKS;
9551     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9552 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9553     } else
9554 	NEXT1;
9555 
9556     /*
9557      * [ WFC: Element Type Match ]
9558      * The Name in an element's end-tag must match the element type in the
9559      * start-tag.
9560      *
9561      */
9562     if (name != (xmlChar*)1) {
9563         if (name == NULL) name = BAD_CAST "unparsable";
9564         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9565 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9566 		                ctxt->name, tag->line, name);
9567     }
9568 
9569     /*
9570      * SAX: End of Tag
9571      */
9572     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9573 	(!ctxt->disableSAX))
9574 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9575                                 tag->URI);
9576 
9577     spacePop(ctxt);
9578     if (tag->nsNr != 0)
9579 	xmlParserNsPop(ctxt, tag->nsNr);
9580 }
9581 
9582 /**
9583  * xmlParseCDSect:
9584  * @ctxt:  an XML parser context
9585  *
9586  * DEPRECATED: Internal function, don't use.
9587  *
9588  * Parse escaped pure raw content. Always consumes '<!['.
9589  *
9590  * [18] CDSect ::= CDStart CData CDEnd
9591  *
9592  * [19] CDStart ::= '<![CDATA['
9593  *
9594  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9595  *
9596  * [21] CDEnd ::= ']]>'
9597  */
9598 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9599 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9600     xmlChar *buf = NULL;
9601     int len = 0;
9602     int size = XML_PARSER_BUFFER_SIZE;
9603     int r, rl;
9604     int	s, sl;
9605     int cur, l;
9606     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9607                     XML_MAX_HUGE_LENGTH :
9608                     XML_MAX_TEXT_LENGTH;
9609 
9610     if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9611         return;
9612     SKIP(3);
9613 
9614     if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9615         return;
9616     SKIP(6);
9617 
9618     r = CUR_CHAR(rl);
9619     if (!IS_CHAR(r)) {
9620 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9621         goto out;
9622     }
9623     NEXTL(rl);
9624     s = CUR_CHAR(sl);
9625     if (!IS_CHAR(s)) {
9626 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9627         goto out;
9628     }
9629     NEXTL(sl);
9630     cur = CUR_CHAR(l);
9631     buf = (xmlChar *) xmlMallocAtomic(size);
9632     if (buf == NULL) {
9633 	xmlErrMemory(ctxt);
9634         goto out;
9635     }
9636     while (IS_CHAR(cur) &&
9637            ((r != ']') || (s != ']') || (cur != '>'))) {
9638 	if (len + 5 >= size) {
9639 	    xmlChar *tmp;
9640 
9641 	    tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9642 	    if (tmp == NULL) {
9643 		xmlErrMemory(ctxt);
9644                 goto out;
9645 	    }
9646 	    buf = tmp;
9647 	    size *= 2;
9648 	}
9649 	COPY_BUF(buf, len, r);
9650         if (len > maxLength) {
9651             xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9652                            "CData section too big found\n");
9653             goto out;
9654         }
9655 	r = s;
9656 	rl = sl;
9657 	s = cur;
9658 	sl = l;
9659 	NEXTL(l);
9660 	cur = CUR_CHAR(l);
9661     }
9662     buf[len] = 0;
9663     if (cur != '>') {
9664 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9665 	                     "CData section not finished\n%.50s\n", buf);
9666         goto out;
9667     }
9668     NEXTL(l);
9669 
9670     /*
9671      * OK the buffer is to be consumed as cdata.
9672      */
9673     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9674 	if (ctxt->sax->cdataBlock != NULL)
9675 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9676 	else if (ctxt->sax->characters != NULL)
9677 	    ctxt->sax->characters(ctxt->userData, buf, len);
9678     }
9679 
9680 out:
9681     xmlFree(buf);
9682 }
9683 
9684 /**
9685  * xmlParseContentInternal:
9686  * @ctxt:  an XML parser context
9687  *
9688  * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9689  * unexpected EOF to the caller.
9690  */
9691 
9692 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)9693 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9694     int oldNameNr = ctxt->nameNr;
9695     int oldSpaceNr = ctxt->spaceNr;
9696     int oldNodeNr = ctxt->nodeNr;
9697 
9698     GROW;
9699     while ((ctxt->input->cur < ctxt->input->end) &&
9700 	   (PARSER_STOPPED(ctxt) == 0)) {
9701 	const xmlChar *cur = ctxt->input->cur;
9702 
9703 	/*
9704 	 * First case : a Processing Instruction.
9705 	 */
9706 	if ((*cur == '<') && (cur[1] == '?')) {
9707 	    xmlParsePI(ctxt);
9708 	}
9709 
9710 	/*
9711 	 * Second case : a CDSection
9712 	 */
9713 	/* 2.6.0 test was *cur not RAW */
9714 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9715 	    xmlParseCDSect(ctxt);
9716 	}
9717 
9718 	/*
9719 	 * Third case :  a comment
9720 	 */
9721 	else if ((*cur == '<') && (NXT(1) == '!') &&
9722 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9723 	    xmlParseComment(ctxt);
9724 	}
9725 
9726 	/*
9727 	 * Fourth case :  a sub-element.
9728 	 */
9729 	else if (*cur == '<') {
9730             if (NXT(1) == '/') {
9731                 if (ctxt->nameNr <= oldNameNr)
9732                     break;
9733 	        xmlParseElementEnd(ctxt);
9734             } else {
9735 	        xmlParseElementStart(ctxt);
9736             }
9737 	}
9738 
9739 	/*
9740 	 * Fifth case : a reference. If if has not been resolved,
9741 	 *    parsing returns it's Name, create the node
9742 	 */
9743 
9744 	else if (*cur == '&') {
9745 	    xmlParseReference(ctxt);
9746 	}
9747 
9748 	/*
9749 	 * Last case, text. Note that References are handled directly.
9750 	 */
9751 	else {
9752 	    xmlParseCharDataInternal(ctxt, 0);
9753 	}
9754 
9755 	SHRINK;
9756 	GROW;
9757     }
9758 
9759     if ((ctxt->nameNr > oldNameNr) &&
9760         (ctxt->input->cur >= ctxt->input->end) &&
9761         (ctxt->wellFormed)) {
9762         const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9763         int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9764         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9765                 "Premature end of data in tag %s line %d\n",
9766                 name, line, NULL);
9767     }
9768 
9769     /*
9770      * Clean up in error case
9771      */
9772 
9773     while (ctxt->nodeNr > oldNodeNr)
9774         nodePop(ctxt);
9775 
9776     while (ctxt->nameNr > oldNameNr) {
9777         xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9778 
9779         if (tag->nsNr != 0)
9780             xmlParserNsPop(ctxt, tag->nsNr);
9781 
9782         namePop(ctxt);
9783     }
9784 
9785     while (ctxt->spaceNr > oldSpaceNr)
9786         spacePop(ctxt);
9787 }
9788 
9789 /**
9790  * xmlParseContent:
9791  * @ctxt:  an XML parser context
9792  *
9793  * Parse XML element content. This is useful if you're only interested
9794  * in custom SAX callbacks. If you want a node list, use
9795  * xmlParseInNodeContext.
9796  */
9797 void
xmlParseContent(xmlParserCtxtPtr ctxt)9798 xmlParseContent(xmlParserCtxtPtr ctxt) {
9799     if ((ctxt == NULL) || (ctxt->input == NULL))
9800         return;
9801 
9802     xmlCtxtInitializeLate(ctxt);
9803 
9804     xmlParseContentInternal(ctxt);
9805 
9806     if (ctxt->input->cur < ctxt->input->end)
9807 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9808 }
9809 
9810 /**
9811  * xmlParseElement:
9812  * @ctxt:  an XML parser context
9813  *
9814  * DEPRECATED: Internal function, don't use.
9815  *
9816  * parse an XML element
9817  *
9818  * [39] element ::= EmptyElemTag | STag content ETag
9819  *
9820  * [ WFC: Element Type Match ]
9821  * The Name in an element's end-tag must match the element type in the
9822  * start-tag.
9823  *
9824  */
9825 
9826 void
xmlParseElement(xmlParserCtxtPtr ctxt)9827 xmlParseElement(xmlParserCtxtPtr ctxt) {
9828     if (xmlParseElementStart(ctxt) != 0)
9829         return;
9830 
9831     xmlParseContentInternal(ctxt);
9832 
9833     if (ctxt->input->cur >= ctxt->input->end) {
9834         if (ctxt->wellFormed) {
9835             const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9836             int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9837             xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9838                     "Premature end of data in tag %s line %d\n",
9839                     name, line, NULL);
9840         }
9841         return;
9842     }
9843 
9844     xmlParseElementEnd(ctxt);
9845 }
9846 
9847 /**
9848  * xmlParseElementStart:
9849  * @ctxt:  an XML parser context
9850  *
9851  * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9852  * opening tag was parsed, 1 if an empty element was parsed.
9853  *
9854  * Always consumes '<'.
9855  */
9856 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)9857 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9858     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9859     const xmlChar *name;
9860     const xmlChar *prefix = NULL;
9861     const xmlChar *URI = NULL;
9862     xmlParserNodeInfo node_info;
9863     int line;
9864     xmlNodePtr cur;
9865     int nbNs = 0;
9866 
9867     if (ctxt->nameNr > maxDepth) {
9868         xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9869                 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9870                 ctxt->nameNr);
9871 	xmlHaltParser(ctxt);
9872 	return(-1);
9873     }
9874 
9875     /* Capture start position */
9876     if (ctxt->record_info) {
9877         node_info.begin_pos = ctxt->input->consumed +
9878                           (CUR_PTR - ctxt->input->base);
9879 	node_info.begin_line = ctxt->input->line;
9880     }
9881 
9882     if (ctxt->spaceNr == 0)
9883 	spacePush(ctxt, -1);
9884     else if (*ctxt->space == -2)
9885 	spacePush(ctxt, -1);
9886     else
9887 	spacePush(ctxt, *ctxt->space);
9888 
9889     line = ctxt->input->line;
9890 #ifdef LIBXML_SAX1_ENABLED
9891     if (ctxt->sax2)
9892 #endif /* LIBXML_SAX1_ENABLED */
9893         name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9894 #ifdef LIBXML_SAX1_ENABLED
9895     else
9896 	name = xmlParseStartTag(ctxt);
9897 #endif /* LIBXML_SAX1_ENABLED */
9898     if (name == NULL) {
9899 	spacePop(ctxt);
9900         return(-1);
9901     }
9902     nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9903     cur = ctxt->node;
9904 
9905 #ifdef LIBXML_VALID_ENABLED
9906     /*
9907      * [ VC: Root Element Type ]
9908      * The Name in the document type declaration must match the element
9909      * type of the root element.
9910      */
9911     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9912         ctxt->node && (ctxt->node == ctxt->myDoc->children))
9913         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9914 #endif /* LIBXML_VALID_ENABLED */
9915 
9916     /*
9917      * Check for an Empty Element.
9918      */
9919     if ((RAW == '/') && (NXT(1) == '>')) {
9920         SKIP(2);
9921 	if (ctxt->sax2) {
9922 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9923 		(!ctxt->disableSAX))
9924 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9925 #ifdef LIBXML_SAX1_ENABLED
9926 	} else {
9927 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9928 		(!ctxt->disableSAX))
9929 		ctxt->sax->endElement(ctxt->userData, name);
9930 #endif /* LIBXML_SAX1_ENABLED */
9931 	}
9932 	namePop(ctxt);
9933 	spacePop(ctxt);
9934 	if (nbNs > 0)
9935 	    xmlParserNsPop(ctxt, nbNs);
9936 	if (cur != NULL && ctxt->record_info) {
9937             node_info.node = cur;
9938             node_info.end_pos = ctxt->input->consumed +
9939                                 (CUR_PTR - ctxt->input->base);
9940             node_info.end_line = ctxt->input->line;
9941             xmlParserAddNodeInfo(ctxt, &node_info);
9942 	}
9943 	return(1);
9944     }
9945     if (RAW == '>') {
9946         NEXT1;
9947         if (cur != NULL && ctxt->record_info) {
9948             node_info.node = cur;
9949             node_info.end_pos = 0;
9950             node_info.end_line = 0;
9951             xmlParserAddNodeInfo(ctxt, &node_info);
9952         }
9953     } else {
9954         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9955 		     "Couldn't find end of Start Tag %s line %d\n",
9956 		                name, line, NULL);
9957 
9958 	/*
9959 	 * end of parsing of this node.
9960 	 */
9961 	nodePop(ctxt);
9962 	namePop(ctxt);
9963 	spacePop(ctxt);
9964 	if (nbNs > 0)
9965 	    xmlParserNsPop(ctxt, nbNs);
9966 	return(-1);
9967     }
9968 
9969     return(0);
9970 }
9971 
9972 /**
9973  * xmlParseElementEnd:
9974  * @ctxt:  an XML parser context
9975  *
9976  * Parse the end of an XML element. Always consumes '</'.
9977  */
9978 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)9979 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9980     xmlNodePtr cur = ctxt->node;
9981 
9982     if (ctxt->nameNr <= 0) {
9983         if ((RAW == '<') && (NXT(1) == '/'))
9984             SKIP(2);
9985         return;
9986     }
9987 
9988     /*
9989      * parse the end of tag: '</' should be here.
9990      */
9991     if (ctxt->sax2) {
9992 	xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9993 	namePop(ctxt);
9994     }
9995 #ifdef LIBXML_SAX1_ENABLED
9996     else
9997 	xmlParseEndTag1(ctxt, 0);
9998 #endif /* LIBXML_SAX1_ENABLED */
9999 
10000     /*
10001      * Capture end position
10002      */
10003     if (cur != NULL && ctxt->record_info) {
10004         xmlParserNodeInfoPtr node_info;
10005 
10006         node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10007         if (node_info != NULL) {
10008             node_info->end_pos = ctxt->input->consumed +
10009                                  (CUR_PTR - ctxt->input->base);
10010             node_info->end_line = ctxt->input->line;
10011         }
10012     }
10013 }
10014 
10015 /**
10016  * xmlParseVersionNum:
10017  * @ctxt:  an XML parser context
10018  *
10019  * DEPRECATED: Internal function, don't use.
10020  *
10021  * parse the XML version value.
10022  *
10023  * [26] VersionNum ::= '1.' [0-9]+
10024  *
10025  * In practice allow [0-9].[0-9]+ at that level
10026  *
10027  * Returns the string giving the XML version number, or NULL
10028  */
10029 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10030 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10031     xmlChar *buf = NULL;
10032     int len = 0;
10033     int size = 10;
10034     xmlChar cur;
10035 
10036     buf = (xmlChar *) xmlMallocAtomic(size);
10037     if (buf == NULL) {
10038 	xmlErrMemory(ctxt);
10039 	return(NULL);
10040     }
10041     cur = CUR;
10042     if (!((cur >= '0') && (cur <= '9'))) {
10043 	xmlFree(buf);
10044 	return(NULL);
10045     }
10046     buf[len++] = cur;
10047     NEXT;
10048     cur=CUR;
10049     if (cur != '.') {
10050 	xmlFree(buf);
10051 	return(NULL);
10052     }
10053     buf[len++] = cur;
10054     NEXT;
10055     cur=CUR;
10056     while ((cur >= '0') && (cur <= '9')) {
10057 	if (len + 1 >= size) {
10058 	    xmlChar *tmp;
10059 
10060 	    size *= 2;
10061 	    tmp = (xmlChar *) xmlRealloc(buf, size);
10062 	    if (tmp == NULL) {
10063 	        xmlFree(buf);
10064 		xmlErrMemory(ctxt);
10065 		return(NULL);
10066 	    }
10067 	    buf = tmp;
10068 	}
10069 	buf[len++] = cur;
10070 	NEXT;
10071 	cur=CUR;
10072     }
10073     buf[len] = 0;
10074     return(buf);
10075 }
10076 
10077 /**
10078  * xmlParseVersionInfo:
10079  * @ctxt:  an XML parser context
10080  *
10081  * DEPRECATED: Internal function, don't use.
10082  *
10083  * parse the XML version.
10084  *
10085  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10086  *
10087  * [25] Eq ::= S? '=' S?
10088  *
10089  * Returns the version string, e.g. "1.0"
10090  */
10091 
10092 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10093 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10094     xmlChar *version = NULL;
10095 
10096     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10097 	SKIP(7);
10098 	SKIP_BLANKS;
10099 	if (RAW != '=') {
10100 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10101 	    return(NULL);
10102         }
10103 	NEXT;
10104 	SKIP_BLANKS;
10105 	if (RAW == '"') {
10106 	    NEXT;
10107 	    version = xmlParseVersionNum(ctxt);
10108 	    if (RAW != '"') {
10109 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10110 	    } else
10111 	        NEXT;
10112 	} else if (RAW == '\''){
10113 	    NEXT;
10114 	    version = xmlParseVersionNum(ctxt);
10115 	    if (RAW != '\'') {
10116 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10117 	    } else
10118 	        NEXT;
10119 	} else {
10120 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10121 	}
10122     }
10123     return(version);
10124 }
10125 
10126 /**
10127  * xmlParseEncName:
10128  * @ctxt:  an XML parser context
10129  *
10130  * DEPRECATED: Internal function, don't use.
10131  *
10132  * parse the XML encoding name
10133  *
10134  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10135  *
10136  * Returns the encoding name value or NULL
10137  */
10138 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10139 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10140     xmlChar *buf = NULL;
10141     int len = 0;
10142     int size = 10;
10143     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10144                     XML_MAX_TEXT_LENGTH :
10145                     XML_MAX_NAME_LENGTH;
10146     xmlChar cur;
10147 
10148     cur = CUR;
10149     if (((cur >= 'a') && (cur <= 'z')) ||
10150         ((cur >= 'A') && (cur <= 'Z'))) {
10151 	buf = (xmlChar *) xmlMallocAtomic(size);
10152 	if (buf == NULL) {
10153 	    xmlErrMemory(ctxt);
10154 	    return(NULL);
10155 	}
10156 
10157 	buf[len++] = cur;
10158 	NEXT;
10159 	cur = CUR;
10160 	while (((cur >= 'a') && (cur <= 'z')) ||
10161 	       ((cur >= 'A') && (cur <= 'Z')) ||
10162 	       ((cur >= '0') && (cur <= '9')) ||
10163 	       (cur == '.') || (cur == '_') ||
10164 	       (cur == '-')) {
10165 	    if (len + 1 >= size) {
10166 	        xmlChar *tmp;
10167 
10168 		size *= 2;
10169 		tmp = (xmlChar *) xmlRealloc(buf, size);
10170 		if (tmp == NULL) {
10171 		    xmlErrMemory(ctxt);
10172 		    xmlFree(buf);
10173 		    return(NULL);
10174 		}
10175 		buf = tmp;
10176 	    }
10177 	    buf[len++] = cur;
10178             if (len > maxLength) {
10179                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10180                 xmlFree(buf);
10181                 return(NULL);
10182             }
10183 	    NEXT;
10184 	    cur = CUR;
10185         }
10186 	buf[len] = 0;
10187     } else {
10188 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10189     }
10190     return(buf);
10191 }
10192 
10193 /**
10194  * xmlParseEncodingDecl:
10195  * @ctxt:  an XML parser context
10196  *
10197  * DEPRECATED: Internal function, don't use.
10198  *
10199  * parse the XML encoding declaration
10200  *
10201  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10202  *
10203  * this setups the conversion filters.
10204  *
10205  * Returns the encoding value or NULL
10206  */
10207 
10208 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10209 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10210     xmlChar *encoding = NULL;
10211 
10212     SKIP_BLANKS;
10213     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10214         return(NULL);
10215 
10216     SKIP(8);
10217     SKIP_BLANKS;
10218     if (RAW != '=') {
10219         xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10220         return(NULL);
10221     }
10222     NEXT;
10223     SKIP_BLANKS;
10224     if (RAW == '"') {
10225         NEXT;
10226         encoding = xmlParseEncName(ctxt);
10227         if (RAW != '"') {
10228             xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10229             xmlFree((xmlChar *) encoding);
10230             return(NULL);
10231         } else
10232             NEXT;
10233     } else if (RAW == '\''){
10234         NEXT;
10235         encoding = xmlParseEncName(ctxt);
10236         if (RAW != '\'') {
10237             xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10238             xmlFree((xmlChar *) encoding);
10239             return(NULL);
10240         } else
10241             NEXT;
10242     } else {
10243         xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10244     }
10245 
10246     if (encoding == NULL)
10247         return(NULL);
10248 
10249     xmlSetDeclaredEncoding(ctxt, encoding);
10250 
10251     return(ctxt->encoding);
10252 }
10253 
10254 /**
10255  * xmlParseSDDecl:
10256  * @ctxt:  an XML parser context
10257  *
10258  * DEPRECATED: Internal function, don't use.
10259  *
10260  * parse the XML standalone declaration
10261  *
10262  * [32] SDDecl ::= S 'standalone' Eq
10263  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10264  *
10265  * [ VC: Standalone Document Declaration ]
10266  * TODO The standalone document declaration must have the value "no"
10267  * if any external markup declarations contain declarations of:
10268  *  - attributes with default values, if elements to which these
10269  *    attributes apply appear in the document without specifications
10270  *    of values for these attributes, or
10271  *  - entities (other than amp, lt, gt, apos, quot), if references
10272  *    to those entities appear in the document, or
10273  *  - attributes with values subject to normalization, where the
10274  *    attribute appears in the document with a value which will change
10275  *    as a result of normalization, or
10276  *  - element types with element content, if white space occurs directly
10277  *    within any instance of those types.
10278  *
10279  * Returns:
10280  *   1 if standalone="yes"
10281  *   0 if standalone="no"
10282  *  -2 if standalone attribute is missing or invalid
10283  *	  (A standalone value of -2 means that the XML declaration was found,
10284  *	   but no value was specified for the standalone attribute).
10285  */
10286 
10287 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10288 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10289     int standalone = -2;
10290 
10291     SKIP_BLANKS;
10292     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10293 	SKIP(10);
10294         SKIP_BLANKS;
10295 	if (RAW != '=') {
10296 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10297 	    return(standalone);
10298         }
10299 	NEXT;
10300 	SKIP_BLANKS;
10301         if (RAW == '\''){
10302 	    NEXT;
10303 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10304 	        standalone = 0;
10305                 SKIP(2);
10306 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10307 	               (NXT(2) == 's')) {
10308 	        standalone = 1;
10309 		SKIP(3);
10310             } else {
10311 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10312 	    }
10313 	    if (RAW != '\'') {
10314 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10315 	    } else
10316 	        NEXT;
10317 	} else if (RAW == '"'){
10318 	    NEXT;
10319 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10320 	        standalone = 0;
10321 		SKIP(2);
10322 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10323 	               (NXT(2) == 's')) {
10324 	        standalone = 1;
10325                 SKIP(3);
10326             } else {
10327 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10328 	    }
10329 	    if (RAW != '"') {
10330 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10331 	    } else
10332 	        NEXT;
10333 	} else {
10334 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10335         }
10336     }
10337     return(standalone);
10338 }
10339 
10340 /**
10341  * xmlParseXMLDecl:
10342  * @ctxt:  an XML parser context
10343  *
10344  * DEPRECATED: Internal function, don't use.
10345  *
10346  * parse an XML declaration header
10347  *
10348  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10349  */
10350 
10351 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10352 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10353     xmlChar *version;
10354 
10355     /*
10356      * This value for standalone indicates that the document has an
10357      * XML declaration but it does not have a standalone attribute.
10358      * It will be overwritten later if a standalone attribute is found.
10359      */
10360 
10361     ctxt->standalone = -2;
10362 
10363     /*
10364      * We know that '<?xml' is here.
10365      */
10366     SKIP(5);
10367 
10368     if (!IS_BLANK_CH(RAW)) {
10369 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10370 	               "Blank needed after '<?xml'\n");
10371     }
10372     SKIP_BLANKS;
10373 
10374     /*
10375      * We must have the VersionInfo here.
10376      */
10377     version = xmlParseVersionInfo(ctxt);
10378     if (version == NULL) {
10379 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10380     } else {
10381 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10382 	    /*
10383 	     * Changed here for XML-1.0 5th edition
10384 	     */
10385 	    if (ctxt->options & XML_PARSE_OLD10) {
10386 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10387 			          "Unsupported version '%s'\n",
10388 			          version);
10389 	    } else {
10390 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10391 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10392 		                  "Unsupported version '%s'\n",
10393 				  version, NULL);
10394 		} else {
10395 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10396 				      "Unsupported version '%s'\n",
10397 				      version);
10398 		}
10399 	    }
10400 	}
10401 	if (ctxt->version != NULL)
10402 	    xmlFree((void *) ctxt->version);
10403 	ctxt->version = version;
10404     }
10405 
10406     /*
10407      * We may have the encoding declaration
10408      */
10409     if (!IS_BLANK_CH(RAW)) {
10410         if ((RAW == '?') && (NXT(1) == '>')) {
10411 	    SKIP(2);
10412 	    return;
10413 	}
10414 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10415     }
10416     xmlParseEncodingDecl(ctxt);
10417 
10418     /*
10419      * We may have the standalone status.
10420      */
10421     if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10422         if ((RAW == '?') && (NXT(1) == '>')) {
10423 	    SKIP(2);
10424 	    return;
10425 	}
10426 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10427     }
10428 
10429     /*
10430      * We can grow the input buffer freely at that point
10431      */
10432     GROW;
10433 
10434     SKIP_BLANKS;
10435     ctxt->standalone = xmlParseSDDecl(ctxt);
10436 
10437     SKIP_BLANKS;
10438     if ((RAW == '?') && (NXT(1) == '>')) {
10439         SKIP(2);
10440     } else if (RAW == '>') {
10441         /* Deprecated old WD ... */
10442 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10443 	NEXT;
10444     } else {
10445         int c;
10446 
10447 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10448         while ((PARSER_STOPPED(ctxt) == 0) &&
10449                ((c = CUR) != 0)) {
10450             NEXT;
10451             if (c == '>')
10452                 break;
10453         }
10454     }
10455 }
10456 
10457 /**
10458  * xmlParseMisc:
10459  * @ctxt:  an XML parser context
10460  *
10461  * DEPRECATED: Internal function, don't use.
10462  *
10463  * parse an XML Misc* optional field.
10464  *
10465  * [27] Misc ::= Comment | PI |  S
10466  */
10467 
10468 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10469 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10470     while (PARSER_STOPPED(ctxt) == 0) {
10471         SKIP_BLANKS;
10472         GROW;
10473         if ((RAW == '<') && (NXT(1) == '?')) {
10474 	    xmlParsePI(ctxt);
10475         } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10476 	    xmlParseComment(ctxt);
10477         } else {
10478             break;
10479         }
10480     }
10481 }
10482 
10483 static void
xmlFinishDocument(xmlParserCtxtPtr ctxt)10484 xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10485     xmlDocPtr doc;
10486 
10487     /*
10488      * SAX: end of the document processing.
10489      */
10490     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10491         ctxt->sax->endDocument(ctxt->userData);
10492 
10493     doc = ctxt->myDoc;
10494     if (doc != NULL) {
10495         if (ctxt->wellFormed) {
10496             doc->properties |= XML_DOC_WELLFORMED;
10497             if (ctxt->valid)
10498                 doc->properties |= XML_DOC_DTDVALID;
10499             if (ctxt->nsWellFormed)
10500                 doc->properties |= XML_DOC_NSVALID;
10501         }
10502 
10503         if (ctxt->options & XML_PARSE_OLD10)
10504             doc->properties |= XML_DOC_OLD10;
10505 
10506         /*
10507          * Remove locally kept entity definitions if the tree was not built
10508          */
10509 	if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10510             xmlFreeDoc(doc);
10511             ctxt->myDoc = NULL;
10512         }
10513     }
10514 }
10515 
10516 /**
10517  * xmlParseDocument:
10518  * @ctxt:  an XML parser context
10519  *
10520  * Parse an XML document and invoke the SAX handlers. This is useful
10521  * if you're only interested in custom SAX callbacks. If you want a
10522  * document tree, use xmlCtxtParseDocument.
10523  *
10524  * Returns 0, -1 in case of error.
10525  */
10526 
10527 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10528 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10529     if ((ctxt == NULL) || (ctxt->input == NULL))
10530         return(-1);
10531 
10532     GROW;
10533 
10534     /*
10535      * SAX: detecting the level.
10536      */
10537     xmlCtxtInitializeLate(ctxt);
10538 
10539     /*
10540      * Document locator is unused. Only for backward compatibility.
10541      */
10542     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10543         xmlSAXLocator copy = xmlDefaultSAXLocator;
10544         ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
10545     }
10546 
10547     xmlDetectEncoding(ctxt);
10548 
10549     if (CUR == 0) {
10550 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10551 	return(-1);
10552     }
10553 
10554     GROW;
10555     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10556 
10557 	/*
10558 	 * Note that we will switch encoding on the fly.
10559 	 */
10560 	xmlParseXMLDecl(ctxt);
10561 	SKIP_BLANKS;
10562     } else {
10563 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10564         if (ctxt->version == NULL) {
10565             xmlErrMemory(ctxt);
10566             return(-1);
10567         }
10568     }
10569     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10570         ctxt->sax->startDocument(ctxt->userData);
10571     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10572         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10573 	ctxt->myDoc->compression = ctxt->input->buf->compressed;
10574     }
10575 
10576     /*
10577      * The Misc part of the Prolog
10578      */
10579     xmlParseMisc(ctxt);
10580 
10581     /*
10582      * Then possibly doc type declaration(s) and more Misc
10583      * (doctypedecl Misc*)?
10584      */
10585     GROW;
10586     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10587 
10588 	ctxt->inSubset = 1;
10589 	xmlParseDocTypeDecl(ctxt);
10590 	if (RAW == '[') {
10591 	    xmlParseInternalSubset(ctxt);
10592 	}
10593 
10594 	/*
10595 	 * Create and update the external subset.
10596 	 */
10597 	ctxt->inSubset = 2;
10598 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10599 	    (!ctxt->disableSAX))
10600 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10601 	                              ctxt->extSubSystem, ctxt->extSubURI);
10602 	ctxt->inSubset = 0;
10603 
10604         xmlCleanSpecialAttr(ctxt);
10605 
10606 	xmlParseMisc(ctxt);
10607     }
10608 
10609     /*
10610      * Time to start parsing the tree itself
10611      */
10612     GROW;
10613     if (RAW != '<') {
10614         if (ctxt->wellFormed)
10615             xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10616                            "Start tag expected, '<' not found\n");
10617     } else {
10618 	xmlParseElement(ctxt);
10619 
10620 	/*
10621 	 * The Misc part at the end
10622 	 */
10623 	xmlParseMisc(ctxt);
10624 
10625         if (ctxt->input->cur < ctxt->input->end) {
10626             if (ctxt->wellFormed)
10627 	        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10628         } else if ((ctxt->input->buf != NULL) &&
10629                    (ctxt->input->buf->encoder != NULL) &&
10630                    (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
10631             xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
10632                            "Truncated multi-byte sequence at EOF\n");
10633         }
10634     }
10635 
10636     ctxt->instate = XML_PARSER_EOF;
10637     xmlFinishDocument(ctxt);
10638 
10639     if (! ctxt->wellFormed) {
10640 	ctxt->valid = 0;
10641 	return(-1);
10642     }
10643 
10644     return(0);
10645 }
10646 
10647 /**
10648  * xmlParseExtParsedEnt:
10649  * @ctxt:  an XML parser context
10650  *
10651  * parse a general parsed entity
10652  * An external general parsed entity is well-formed if it matches the
10653  * production labeled extParsedEnt.
10654  *
10655  * [78] extParsedEnt ::= TextDecl? content
10656  *
10657  * Returns 0, -1 in case of error. the parser context is augmented
10658  *                as a result of the parsing.
10659  */
10660 
10661 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10662 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10663     if ((ctxt == NULL) || (ctxt->input == NULL))
10664         return(-1);
10665 
10666     xmlCtxtInitializeLate(ctxt);
10667 
10668     /*
10669      * Document locator is unused. Only for backward compatibility.
10670      */
10671     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10672         xmlSAXLocator copy = xmlDefaultSAXLocator;
10673         ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
10674     }
10675 
10676     xmlDetectEncoding(ctxt);
10677 
10678     if (CUR == 0) {
10679 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10680     }
10681 
10682     /*
10683      * Check for the XMLDecl in the Prolog.
10684      */
10685     GROW;
10686     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10687 
10688 	/*
10689 	 * Note that we will switch encoding on the fly.
10690 	 */
10691 	xmlParseXMLDecl(ctxt);
10692 	SKIP_BLANKS;
10693     } else {
10694 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10695     }
10696     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10697         ctxt->sax->startDocument(ctxt->userData);
10698 
10699     /*
10700      * Doing validity checking on chunk doesn't make sense
10701      */
10702     ctxt->options &= ~XML_PARSE_DTDVALID;
10703     ctxt->validate = 0;
10704     ctxt->depth = 0;
10705 
10706     xmlParseContentInternal(ctxt);
10707 
10708     if (ctxt->input->cur < ctxt->input->end)
10709 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10710 
10711     /*
10712      * SAX: end of the document processing.
10713      */
10714     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10715         ctxt->sax->endDocument(ctxt->userData);
10716 
10717     if (! ctxt->wellFormed) return(-1);
10718     return(0);
10719 }
10720 
10721 #ifdef LIBXML_PUSH_ENABLED
10722 /************************************************************************
10723  *									*
10724  *		Progressive parsing interfaces				*
10725  *									*
10726  ************************************************************************/
10727 
10728 /**
10729  * xmlParseLookupChar:
10730  * @ctxt:  an XML parser context
10731  * @c:  character
10732  *
10733  * Check whether the input buffer contains a character.
10734  */
10735 static int
xmlParseLookupChar(xmlParserCtxtPtr ctxt,int c)10736 xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10737     const xmlChar *cur;
10738 
10739     if (ctxt->checkIndex == 0) {
10740         cur = ctxt->input->cur + 1;
10741     } else {
10742         cur = ctxt->input->cur + ctxt->checkIndex;
10743     }
10744 
10745     if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10746         size_t index = ctxt->input->end - ctxt->input->cur;
10747 
10748         if (index > LONG_MAX) {
10749             ctxt->checkIndex = 0;
10750             return(1);
10751         }
10752         ctxt->checkIndex = index;
10753         return(0);
10754     } else {
10755         ctxt->checkIndex = 0;
10756         return(1);
10757     }
10758 }
10759 
10760 /**
10761  * xmlParseLookupString:
10762  * @ctxt:  an XML parser context
10763  * @startDelta: delta to apply at the start
10764  * @str:  string
10765  * @strLen:  length of string
10766  *
10767  * Check whether the input buffer contains a string.
10768  */
10769 static const xmlChar *
xmlParseLookupString(xmlParserCtxtPtr ctxt,size_t startDelta,const char * str,size_t strLen)10770 xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10771                      const char *str, size_t strLen) {
10772     const xmlChar *cur, *term;
10773 
10774     if (ctxt->checkIndex == 0) {
10775         cur = ctxt->input->cur + startDelta;
10776     } else {
10777         cur = ctxt->input->cur + ctxt->checkIndex;
10778     }
10779 
10780     term = BAD_CAST strstr((const char *) cur, str);
10781     if (term == NULL) {
10782         const xmlChar *end = ctxt->input->end;
10783         size_t index;
10784 
10785         /* Rescan (strLen - 1) characters. */
10786         if ((size_t) (end - cur) < strLen)
10787             end = cur;
10788         else
10789             end -= strLen - 1;
10790         index = end - ctxt->input->cur;
10791         if (index > LONG_MAX) {
10792             ctxt->checkIndex = 0;
10793             return(ctxt->input->end - strLen);
10794         }
10795         ctxt->checkIndex = index;
10796     } else {
10797         ctxt->checkIndex = 0;
10798     }
10799 
10800     return(term);
10801 }
10802 
10803 /**
10804  * xmlParseLookupCharData:
10805  * @ctxt:  an XML parser context
10806  *
10807  * Check whether the input buffer contains terminated char data.
10808  */
10809 static int
xmlParseLookupCharData(xmlParserCtxtPtr ctxt)10810 xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10811     const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10812     const xmlChar *end = ctxt->input->end;
10813     size_t index;
10814 
10815     while (cur < end) {
10816         if ((*cur == '<') || (*cur == '&')) {
10817             ctxt->checkIndex = 0;
10818             return(1);
10819         }
10820         cur++;
10821     }
10822 
10823     index = cur - ctxt->input->cur;
10824     if (index > LONG_MAX) {
10825         ctxt->checkIndex = 0;
10826         return(1);
10827     }
10828     ctxt->checkIndex = index;
10829     return(0);
10830 }
10831 
10832 /**
10833  * xmlParseLookupGt:
10834  * @ctxt:  an XML parser context
10835  *
10836  * Check whether there's enough data in the input buffer to finish parsing
10837  * a start tag. This has to take quotes into account.
10838  */
10839 static int
xmlParseLookupGt(xmlParserCtxtPtr ctxt)10840 xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10841     const xmlChar *cur;
10842     const xmlChar *end = ctxt->input->end;
10843     int state = ctxt->endCheckState;
10844     size_t index;
10845 
10846     if (ctxt->checkIndex == 0)
10847         cur = ctxt->input->cur + 1;
10848     else
10849         cur = ctxt->input->cur + ctxt->checkIndex;
10850 
10851     while (cur < end) {
10852         if (state) {
10853             if (*cur == state)
10854                 state = 0;
10855         } else if (*cur == '\'' || *cur == '"') {
10856             state = *cur;
10857         } else if (*cur == '>') {
10858             ctxt->checkIndex = 0;
10859             ctxt->endCheckState = 0;
10860             return(1);
10861         }
10862         cur++;
10863     }
10864 
10865     index = cur - ctxt->input->cur;
10866     if (index > LONG_MAX) {
10867         ctxt->checkIndex = 0;
10868         ctxt->endCheckState = 0;
10869         return(1);
10870     }
10871     ctxt->checkIndex = index;
10872     ctxt->endCheckState = state;
10873     return(0);
10874 }
10875 
10876 /**
10877  * xmlParseLookupInternalSubset:
10878  * @ctxt:  an XML parser context
10879  *
10880  * Check whether there's enough data in the input buffer to finish parsing
10881  * the internal subset.
10882  */
10883 static int
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt)10884 xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10885     /*
10886      * Sorry, but progressive parsing of the internal subset is not
10887      * supported. We first check that the full content of the internal
10888      * subset is available and parsing is launched only at that point.
10889      * Internal subset ends with "']' S? '>'" in an unescaped section and
10890      * not in a ']]>' sequence which are conditional sections.
10891      */
10892     const xmlChar *cur, *start;
10893     const xmlChar *end = ctxt->input->end;
10894     int state = ctxt->endCheckState;
10895     size_t index;
10896 
10897     if (ctxt->checkIndex == 0) {
10898         cur = ctxt->input->cur + 1;
10899     } else {
10900         cur = ctxt->input->cur + ctxt->checkIndex;
10901     }
10902     start = cur;
10903 
10904     while (cur < end) {
10905         if (state == '-') {
10906             if ((*cur == '-') &&
10907                 (cur[1] == '-') &&
10908                 (cur[2] == '>')) {
10909                 state = 0;
10910                 cur += 3;
10911                 start = cur;
10912                 continue;
10913             }
10914         }
10915         else if (state == ']') {
10916             if (*cur == '>') {
10917                 ctxt->checkIndex = 0;
10918                 ctxt->endCheckState = 0;
10919                 return(1);
10920             }
10921             if (IS_BLANK_CH(*cur)) {
10922                 state = ' ';
10923             } else if (*cur != ']') {
10924                 state = 0;
10925                 start = cur;
10926                 continue;
10927             }
10928         }
10929         else if (state == ' ') {
10930             if (*cur == '>') {
10931                 ctxt->checkIndex = 0;
10932                 ctxt->endCheckState = 0;
10933                 return(1);
10934             }
10935             if (!IS_BLANK_CH(*cur)) {
10936                 state = 0;
10937                 start = cur;
10938                 continue;
10939             }
10940         }
10941         else if (state != 0) {
10942             if (*cur == state) {
10943                 state = 0;
10944                 start = cur + 1;
10945             }
10946         }
10947         else if (*cur == '<') {
10948             if ((cur[1] == '!') &&
10949                 (cur[2] == '-') &&
10950                 (cur[3] == '-')) {
10951                 state = '-';
10952                 cur += 4;
10953                 /* Don't treat <!--> as comment */
10954                 start = cur;
10955                 continue;
10956             }
10957         }
10958         else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10959             state = *cur;
10960         }
10961 
10962         cur++;
10963     }
10964 
10965     /*
10966      * Rescan the three last characters to detect "<!--" and "-->"
10967      * split across chunks.
10968      */
10969     if ((state == 0) || (state == '-')) {
10970         if (cur - start < 3)
10971             cur = start;
10972         else
10973             cur -= 3;
10974     }
10975     index = cur - ctxt->input->cur;
10976     if (index > LONG_MAX) {
10977         ctxt->checkIndex = 0;
10978         ctxt->endCheckState = 0;
10979         return(1);
10980     }
10981     ctxt->checkIndex = index;
10982     ctxt->endCheckState = state;
10983     return(0);
10984 }
10985 
10986 /**
10987  * xmlCheckCdataPush:
10988  * @cur: pointer to the block of characters
10989  * @len: length of the block in bytes
10990  * @complete: 1 if complete CDATA block is passed in, 0 if partial block
10991  *
10992  * Check that the block of characters is okay as SCdata content [20]
10993  *
10994  * Returns the number of bytes to pass if okay, a negative index where an
10995  *         UTF-8 error occurred otherwise
10996  */
10997 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)10998 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
10999     int ix;
11000     unsigned char c;
11001     int codepoint;
11002 
11003     if ((utf == NULL) || (len <= 0))
11004         return(0);
11005 
11006     for (ix = 0; ix < len;) {      /* string is 0-terminated */
11007         c = utf[ix];
11008         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
11009 	    if (c >= 0x20)
11010 		ix++;
11011 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11012 	        ix++;
11013 	    else
11014 	        return(-ix);
11015 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11016 	    if (ix + 2 > len) return(complete ? -ix : ix);
11017 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
11018 	        return(-ix);
11019 	    codepoint = (utf[ix] & 0x1f) << 6;
11020 	    codepoint |= utf[ix+1] & 0x3f;
11021 	    if (!xmlIsCharQ(codepoint))
11022 	        return(-ix);
11023 	    ix += 2;
11024 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11025 	    if (ix + 3 > len) return(complete ? -ix : ix);
11026 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11027 	        ((utf[ix+2] & 0xc0) != 0x80))
11028 		    return(-ix);
11029 	    codepoint = (utf[ix] & 0xf) << 12;
11030 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
11031 	    codepoint |= utf[ix+2] & 0x3f;
11032 	    if (!xmlIsCharQ(codepoint))
11033 	        return(-ix);
11034 	    ix += 3;
11035 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11036 	    if (ix + 4 > len) return(complete ? -ix : ix);
11037 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11038 	        ((utf[ix+2] & 0xc0) != 0x80) ||
11039 		((utf[ix+3] & 0xc0) != 0x80))
11040 		    return(-ix);
11041 	    codepoint = (utf[ix] & 0x7) << 18;
11042 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
11043 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
11044 	    codepoint |= utf[ix+3] & 0x3f;
11045 	    if (!xmlIsCharQ(codepoint))
11046 	        return(-ix);
11047 	    ix += 4;
11048 	} else				/* unknown encoding */
11049 	    return(-ix);
11050       }
11051       return(ix);
11052 }
11053 
11054 /**
11055  * xmlParseTryOrFinish:
11056  * @ctxt:  an XML parser context
11057  * @terminate:  last chunk indicator
11058  *
11059  * Try to progress on parsing
11060  *
11061  * Returns zero if no parsing was possible
11062  */
11063 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11064 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11065     int ret = 0;
11066     size_t avail;
11067     xmlChar cur, next;
11068 
11069     if (ctxt->input == NULL)
11070         return(0);
11071 
11072     if ((ctxt->input != NULL) &&
11073         (ctxt->input->cur - ctxt->input->base > 4096)) {
11074         xmlParserShrink(ctxt);
11075     }
11076 
11077     while (ctxt->disableSAX == 0) {
11078         avail = ctxt->input->end - ctxt->input->cur;
11079         if (avail < 1)
11080 	    goto done;
11081         switch (ctxt->instate) {
11082             case XML_PARSER_EOF:
11083 	        /*
11084 		 * Document parsing is done !
11085 		 */
11086 	        goto done;
11087             case XML_PARSER_START:
11088                 /*
11089                  * Very first chars read from the document flow.
11090                  */
11091                 if ((!terminate) && (avail < 4))
11092                     goto done;
11093 
11094                 /*
11095                  * We need more bytes to detect EBCDIC code pages.
11096                  * See xmlDetectEBCDIC.
11097                  */
11098                 if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11099                     (!terminate) && (avail < 200))
11100                     goto done;
11101 
11102                 xmlDetectEncoding(ctxt);
11103                 ctxt->instate = XML_PARSER_XML_DECL;
11104 		break;
11105 
11106             case XML_PARSER_XML_DECL:
11107 		if ((!terminate) && (avail < 2))
11108 		    goto done;
11109 		cur = ctxt->input->cur[0];
11110 		next = ctxt->input->cur[1];
11111 	        if ((cur == '<') && (next == '?')) {
11112 		    /* PI or XML decl */
11113 		    if ((!terminate) &&
11114                         (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11115 			goto done;
11116 		    if ((ctxt->input->cur[2] == 'x') &&
11117 			(ctxt->input->cur[3] == 'm') &&
11118 			(ctxt->input->cur[4] == 'l') &&
11119 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11120 			ret += 5;
11121 			xmlParseXMLDecl(ctxt);
11122 		    } else {
11123 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11124                         if (ctxt->version == NULL) {
11125                             xmlErrMemory(ctxt);
11126                             break;
11127                         }
11128 		    }
11129 		} else {
11130 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11131 		    if (ctxt->version == NULL) {
11132 		        xmlErrMemory(ctxt);
11133 			break;
11134 		    }
11135 		}
11136                 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11137                     xmlSAXLocator copy = xmlDefaultSAXLocator;
11138                     ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
11139                 }
11140                 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11141                     (!ctxt->disableSAX))
11142                     ctxt->sax->startDocument(ctxt->userData);
11143                 ctxt->instate = XML_PARSER_MISC;
11144 		break;
11145             case XML_PARSER_START_TAG: {
11146 	        const xmlChar *name;
11147 		const xmlChar *prefix = NULL;
11148 		const xmlChar *URI = NULL;
11149                 int line = ctxt->input->line;
11150 		int nbNs = 0;
11151 
11152 		if ((!terminate) && (avail < 2))
11153 		    goto done;
11154 		cur = ctxt->input->cur[0];
11155 	        if (cur != '<') {
11156 		    xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11157                                    "Start tag expected, '<' not found");
11158                     ctxt->instate = XML_PARSER_EOF;
11159                     xmlFinishDocument(ctxt);
11160 		    goto done;
11161 		}
11162 		if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11163                     goto done;
11164 		if (ctxt->spaceNr == 0)
11165 		    spacePush(ctxt, -1);
11166 		else if (*ctxt->space == -2)
11167 		    spacePush(ctxt, -1);
11168 		else
11169 		    spacePush(ctxt, *ctxt->space);
11170 #ifdef LIBXML_SAX1_ENABLED
11171 		if (ctxt->sax2)
11172 #endif /* LIBXML_SAX1_ENABLED */
11173 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11174 #ifdef LIBXML_SAX1_ENABLED
11175 		else
11176 		    name = xmlParseStartTag(ctxt);
11177 #endif /* LIBXML_SAX1_ENABLED */
11178 		if (name == NULL) {
11179 		    spacePop(ctxt);
11180                     ctxt->instate = XML_PARSER_EOF;
11181                     xmlFinishDocument(ctxt);
11182 		    goto done;
11183 		}
11184 #ifdef LIBXML_VALID_ENABLED
11185 		/*
11186 		 * [ VC: Root Element Type ]
11187 		 * The Name in the document type declaration must match
11188 		 * the element type of the root element.
11189 		 */
11190 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11191 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11192 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11193 #endif /* LIBXML_VALID_ENABLED */
11194 
11195 		/*
11196 		 * Check for an Empty Element.
11197 		 */
11198 		if ((RAW == '/') && (NXT(1) == '>')) {
11199 		    SKIP(2);
11200 
11201 		    if (ctxt->sax2) {
11202 			if ((ctxt->sax != NULL) &&
11203 			    (ctxt->sax->endElementNs != NULL) &&
11204 			    (!ctxt->disableSAX))
11205 			    ctxt->sax->endElementNs(ctxt->userData, name,
11206 			                            prefix, URI);
11207 			if (nbNs > 0)
11208 			    xmlParserNsPop(ctxt, nbNs);
11209 #ifdef LIBXML_SAX1_ENABLED
11210 		    } else {
11211 			if ((ctxt->sax != NULL) &&
11212 			    (ctxt->sax->endElement != NULL) &&
11213 			    (!ctxt->disableSAX))
11214 			    ctxt->sax->endElement(ctxt->userData, name);
11215 #endif /* LIBXML_SAX1_ENABLED */
11216 		    }
11217 		    spacePop(ctxt);
11218 		} else if (RAW == '>') {
11219 		    NEXT;
11220                     nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11221 		} else {
11222 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11223 					 "Couldn't find end of Start Tag %s\n",
11224 					 name);
11225 		    nodePop(ctxt);
11226 		    spacePop(ctxt);
11227                     if (nbNs > 0)
11228                         xmlParserNsPop(ctxt, nbNs);
11229 		}
11230 
11231                 if (ctxt->nameNr == 0)
11232                     ctxt->instate = XML_PARSER_EPILOG;
11233                 else
11234                     ctxt->instate = XML_PARSER_CONTENT;
11235                 break;
11236 	    }
11237             case XML_PARSER_CONTENT: {
11238 		cur = ctxt->input->cur[0];
11239 
11240 		if (cur == '<') {
11241                     if ((!terminate) && (avail < 2))
11242                         goto done;
11243 		    next = ctxt->input->cur[1];
11244 
11245                     if (next == '/') {
11246                         ctxt->instate = XML_PARSER_END_TAG;
11247                         break;
11248                     } else if (next == '?') {
11249                         if ((!terminate) &&
11250                             (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11251                             goto done;
11252                         xmlParsePI(ctxt);
11253                         ctxt->instate = XML_PARSER_CONTENT;
11254                         break;
11255                     } else if (next == '!') {
11256                         if ((!terminate) && (avail < 3))
11257                             goto done;
11258                         next = ctxt->input->cur[2];
11259 
11260                         if (next == '-') {
11261                             if ((!terminate) && (avail < 4))
11262                                 goto done;
11263                             if (ctxt->input->cur[3] == '-') {
11264                                 if ((!terminate) &&
11265                                     (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11266                                     goto done;
11267                                 xmlParseComment(ctxt);
11268                                 ctxt->instate = XML_PARSER_CONTENT;
11269                                 break;
11270                             }
11271                         } else if (next == '[') {
11272                             if ((!terminate) && (avail < 9))
11273                                 goto done;
11274                             if ((ctxt->input->cur[2] == '[') &&
11275                                 (ctxt->input->cur[3] == 'C') &&
11276                                 (ctxt->input->cur[4] == 'D') &&
11277                                 (ctxt->input->cur[5] == 'A') &&
11278                                 (ctxt->input->cur[6] == 'T') &&
11279                                 (ctxt->input->cur[7] == 'A') &&
11280                                 (ctxt->input->cur[8] == '[')) {
11281                                 SKIP(9);
11282                                 ctxt->instate = XML_PARSER_CDATA_SECTION;
11283                                 break;
11284                             }
11285                         }
11286                     }
11287 		} else if (cur == '&') {
11288 		    if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11289 			goto done;
11290 		    xmlParseReference(ctxt);
11291                     break;
11292 		} else {
11293 		    /* TODO Avoid the extra copy, handle directly !!! */
11294 		    /*
11295 		     * Goal of the following test is:
11296 		     *  - minimize calls to the SAX 'character' callback
11297 		     *    when they are mergeable
11298 		     *  - handle an problem for isBlank when we only parse
11299 		     *    a sequence of blank chars and the next one is
11300 		     *    not available to check against '<' presence.
11301 		     *  - tries to homogenize the differences in SAX
11302 		     *    callbacks between the push and pull versions
11303 		     *    of the parser.
11304 		     */
11305 		    if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11306 			if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11307 			    goto done;
11308                     }
11309                     ctxt->checkIndex = 0;
11310 		    xmlParseCharDataInternal(ctxt, !terminate);
11311                     break;
11312 		}
11313 
11314                 ctxt->instate = XML_PARSER_START_TAG;
11315 		break;
11316 	    }
11317             case XML_PARSER_END_TAG:
11318 		if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11319 		    goto done;
11320 		if (ctxt->sax2) {
11321 	            xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11322 		    nameNsPop(ctxt);
11323 		}
11324 #ifdef LIBXML_SAX1_ENABLED
11325 		  else
11326 		    xmlParseEndTag1(ctxt, 0);
11327 #endif /* LIBXML_SAX1_ENABLED */
11328 		if (ctxt->nameNr == 0) {
11329 		    ctxt->instate = XML_PARSER_EPILOG;
11330 		} else {
11331 		    ctxt->instate = XML_PARSER_CONTENT;
11332 		}
11333 		break;
11334             case XML_PARSER_CDATA_SECTION: {
11335 	        /*
11336 		 * The Push mode need to have the SAX callback for
11337 		 * cdataBlock merge back contiguous callbacks.
11338 		 */
11339 		const xmlChar *term;
11340 
11341                 if (terminate) {
11342                     /*
11343                      * Don't call xmlParseLookupString. If 'terminate'
11344                      * is set, checkIndex is invalid.
11345                      */
11346                     term = BAD_CAST strstr((const char *) ctxt->input->cur,
11347                                            "]]>");
11348                 } else {
11349 		    term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11350                 }
11351 
11352 		if (term == NULL) {
11353 		    int tmp, size;
11354 
11355                     if (terminate) {
11356                         /* Unfinished CDATA section */
11357                         size = ctxt->input->end - ctxt->input->cur;
11358                     } else {
11359                         if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11360                             goto done;
11361                         ctxt->checkIndex = 0;
11362                         /* XXX: Why don't we pass the full buffer? */
11363                         size = XML_PARSER_BIG_BUFFER_SIZE;
11364                     }
11365                     tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11366                     if (tmp <= 0) {
11367                         tmp = -tmp;
11368                         ctxt->input->cur += tmp;
11369                         goto encoding_error;
11370                     }
11371                     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11372                         if (ctxt->sax->cdataBlock != NULL)
11373                             ctxt->sax->cdataBlock(ctxt->userData,
11374                                                   ctxt->input->cur, tmp);
11375                         else if (ctxt->sax->characters != NULL)
11376                             ctxt->sax->characters(ctxt->userData,
11377                                                   ctxt->input->cur, tmp);
11378                     }
11379                     SKIPL(tmp);
11380 		} else {
11381                     int base = term - CUR_PTR;
11382 		    int tmp;
11383 
11384 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11385 		    if ((tmp < 0) || (tmp != base)) {
11386 			tmp = -tmp;
11387 			ctxt->input->cur += tmp;
11388 			goto encoding_error;
11389 		    }
11390 		    if ((ctxt->sax != NULL) && (base == 0) &&
11391 		        (ctxt->sax->cdataBlock != NULL) &&
11392 		        (!ctxt->disableSAX)) {
11393 			/*
11394 			 * Special case to provide identical behaviour
11395 			 * between pull and push parsers on enpty CDATA
11396 			 * sections
11397 			 */
11398 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11399 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11400 			               "<![CDATA[", 9)))
11401 			     ctxt->sax->cdataBlock(ctxt->userData,
11402 			                           BAD_CAST "", 0);
11403 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11404 			(!ctxt->disableSAX)) {
11405 			if (ctxt->sax->cdataBlock != NULL)
11406 			    ctxt->sax->cdataBlock(ctxt->userData,
11407 						  ctxt->input->cur, base);
11408 			else if (ctxt->sax->characters != NULL)
11409 			    ctxt->sax->characters(ctxt->userData,
11410 						  ctxt->input->cur, base);
11411 		    }
11412 		    SKIPL(base + 3);
11413 		    ctxt->instate = XML_PARSER_CONTENT;
11414 		}
11415 		break;
11416 	    }
11417             case XML_PARSER_MISC:
11418             case XML_PARSER_PROLOG:
11419             case XML_PARSER_EPILOG:
11420 		SKIP_BLANKS;
11421                 avail = ctxt->input->end - ctxt->input->cur;
11422 		if (avail < 1)
11423 		    goto done;
11424 		if (ctxt->input->cur[0] == '<') {
11425                     if ((!terminate) && (avail < 2))
11426                         goto done;
11427                     next = ctxt->input->cur[1];
11428                     if (next == '?') {
11429                         if ((!terminate) &&
11430                             (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11431                             goto done;
11432                         xmlParsePI(ctxt);
11433                         break;
11434                     } else if (next == '!') {
11435                         if ((!terminate) && (avail < 3))
11436                             goto done;
11437 
11438                         if (ctxt->input->cur[2] == '-') {
11439                             if ((!terminate) && (avail < 4))
11440                                 goto done;
11441                             if (ctxt->input->cur[3] == '-') {
11442                                 if ((!terminate) &&
11443                                     (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11444                                     goto done;
11445                                 xmlParseComment(ctxt);
11446                                 break;
11447                             }
11448                         } else if (ctxt->instate == XML_PARSER_MISC) {
11449                             if ((!terminate) && (avail < 9))
11450                                 goto done;
11451                             if ((ctxt->input->cur[2] == 'D') &&
11452                                 (ctxt->input->cur[3] == 'O') &&
11453                                 (ctxt->input->cur[4] == 'C') &&
11454                                 (ctxt->input->cur[5] == 'T') &&
11455                                 (ctxt->input->cur[6] == 'Y') &&
11456                                 (ctxt->input->cur[7] == 'P') &&
11457                                 (ctxt->input->cur[8] == 'E')) {
11458                                 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11459                                     goto done;
11460                                 ctxt->inSubset = 1;
11461                                 xmlParseDocTypeDecl(ctxt);
11462                                 if (RAW == '[') {
11463                                     ctxt->instate = XML_PARSER_DTD;
11464                                 } else {
11465                                     /*
11466                                      * Create and update the external subset.
11467                                      */
11468                                     ctxt->inSubset = 2;
11469                                     if ((ctxt->sax != NULL) &&
11470                                         (!ctxt->disableSAX) &&
11471                                         (ctxt->sax->externalSubset != NULL))
11472                                         ctxt->sax->externalSubset(
11473                                                 ctxt->userData,
11474                                                 ctxt->intSubName,
11475                                                 ctxt->extSubSystem,
11476                                                 ctxt->extSubURI);
11477                                     ctxt->inSubset = 0;
11478                                     xmlCleanSpecialAttr(ctxt);
11479                                     ctxt->instate = XML_PARSER_PROLOG;
11480                                 }
11481                                 break;
11482                             }
11483                         }
11484                     }
11485                 }
11486 
11487                 if (ctxt->instate == XML_PARSER_EPILOG) {
11488                     if (ctxt->errNo == XML_ERR_OK)
11489                         xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11490 		    ctxt->instate = XML_PARSER_EOF;
11491                     xmlFinishDocument(ctxt);
11492                 } else {
11493 		    ctxt->instate = XML_PARSER_START_TAG;
11494 		}
11495 		break;
11496             case XML_PARSER_DTD: {
11497                 if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11498                     goto done;
11499 		xmlParseInternalSubset(ctxt);
11500 		ctxt->inSubset = 2;
11501 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11502 		    (ctxt->sax->externalSubset != NULL))
11503 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11504 			    ctxt->extSubSystem, ctxt->extSubURI);
11505 		ctxt->inSubset = 0;
11506 		xmlCleanSpecialAttr(ctxt);
11507 		ctxt->instate = XML_PARSER_PROLOG;
11508                 break;
11509 	    }
11510             default:
11511                 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11512 			"PP: internal error\n");
11513 		ctxt->instate = XML_PARSER_EOF;
11514 		break;
11515 	}
11516     }
11517 done:
11518     return(ret);
11519 encoding_error:
11520     /* Only report the first error */
11521     if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
11522         xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
11523         ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
11524     }
11525     return(0);
11526 }
11527 
11528 /**
11529  * xmlParseChunk:
11530  * @ctxt:  an XML parser context
11531  * @chunk:  chunk of memory
11532  * @size:  size of chunk in bytes
11533  * @terminate:  last chunk indicator
11534  *
11535  * Parse a chunk of memory in push parser mode.
11536  *
11537  * Assumes that the parser context was initialized with
11538  * xmlCreatePushParserCtxt.
11539  *
11540  * The last chunk, which will often be empty, must be marked with
11541  * the @terminate flag. With the default SAX callbacks, the resulting
11542  * document will be available in ctxt->myDoc. This pointer will not
11543  * be freed by the library.
11544  *
11545  * If the document isn't well-formed, ctxt->myDoc is set to NULL.
11546  * The push parser doesn't support recovery mode.
11547  *
11548  * Returns an xmlParserErrors code (0 on success).
11549  */
11550 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)11551 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11552               int terminate) {
11553     size_t curBase;
11554     size_t maxLength;
11555     int end_in_lf = 0;
11556 
11557     if ((ctxt == NULL) || (size < 0))
11558         return(XML_ERR_ARGUMENT);
11559     if (ctxt->disableSAX != 0)
11560         return(ctxt->errNo);
11561     if (ctxt->input == NULL)
11562         return(XML_ERR_INTERNAL_ERROR);
11563 
11564     ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11565     if (ctxt->instate == XML_PARSER_START)
11566         xmlCtxtInitializeLate(ctxt);
11567     if ((size > 0) && (chunk != NULL) && (!terminate) &&
11568         (chunk[size - 1] == '\r')) {
11569 	end_in_lf = 1;
11570 	size--;
11571     }
11572 
11573     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11574         (ctxt->input->buf != NULL))  {
11575 	size_t pos = ctxt->input->cur - ctxt->input->base;
11576 	int res;
11577 
11578 	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11579         xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11580 	if (res < 0) {
11581             xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11582 	    xmlHaltParser(ctxt);
11583 	    return(ctxt->errNo);
11584 	}
11585     }
11586 
11587     xmlParseTryOrFinish(ctxt, terminate);
11588 
11589     curBase = ctxt->input->cur - ctxt->input->base;
11590     maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11591                 XML_MAX_HUGE_LENGTH :
11592                 XML_MAX_LOOKUP_LIMIT;
11593     if (curBase > maxLength) {
11594         xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11595                     "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11596         xmlHaltParser(ctxt);
11597     }
11598 
11599     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11600         return(ctxt->errNo);
11601 
11602     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11603         (ctxt->input->buf != NULL)) {
11604 	size_t pos = ctxt->input->cur - ctxt->input->base;
11605         int res;
11606 
11607 	res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11608 	xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11609         if (res < 0) {
11610             xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11611             xmlHaltParser(ctxt);
11612             return(ctxt->errNo);
11613         }
11614     }
11615     if (terminate) {
11616 	/*
11617 	 * Check for termination
11618 	 */
11619         if ((ctxt->instate != XML_PARSER_EOF) &&
11620             (ctxt->instate != XML_PARSER_EPILOG)) {
11621             if (ctxt->nameNr > 0) {
11622                 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11623                 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11624                 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11625                         "Premature end of data in tag %s line %d\n",
11626                         name, line, NULL);
11627             } else if (ctxt->instate == XML_PARSER_START) {
11628                 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11629             } else {
11630                 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11631                                "Start tag expected, '<' not found\n");
11632             }
11633         } else if ((ctxt->input->buf != NULL) &&
11634                    (ctxt->input->buf->encoder != NULL) &&
11635                    (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11636             xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11637                            "Truncated multi-byte sequence at EOF\n");
11638         }
11639 	if (ctxt->instate != XML_PARSER_EOF) {
11640             ctxt->instate = XML_PARSER_EOF;
11641             xmlFinishDocument(ctxt);
11642 	}
11643     }
11644     if (ctxt->wellFormed == 0)
11645 	return((xmlParserErrors) ctxt->errNo);
11646     else
11647         return(0);
11648 }
11649 
11650 /************************************************************************
11651  *									*
11652  *		I/O front end functions to the parser			*
11653  *									*
11654  ************************************************************************/
11655 
11656 /**
11657  * xmlCreatePushParserCtxt:
11658  * @sax:  a SAX handler (optional)
11659  * @user_data:  user data for SAX callbacks (optional)
11660  * @chunk:  initial chunk (optional, deprecated)
11661  * @size:  size of initial chunk in bytes
11662  * @filename:  file name or URI (optional)
11663  *
11664  * Create a parser context for using the XML parser in push mode.
11665  * See xmlParseChunk.
11666  *
11667  * Passing an initial chunk is useless and deprecated.
11668  *
11669  * @filename is used as base URI to fetch external entities and for
11670  * error reports.
11671  *
11672  * Returns the new parser context or NULL in case of error.
11673  */
11674 
11675 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)11676 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11677                         const char *chunk, int size, const char *filename) {
11678     xmlParserCtxtPtr ctxt;
11679     xmlParserInputPtr input;
11680 
11681     ctxt = xmlNewSAXParserCtxt(sax, user_data);
11682     if (ctxt == NULL)
11683 	return(NULL);
11684 
11685     ctxt->options &= ~XML_PARSE_NODICT;
11686     ctxt->dictNames = 1;
11687 
11688     input = xmlNewInputPush(ctxt, filename, chunk, size, NULL);
11689     if (input == NULL) {
11690 	xmlFreeParserCtxt(ctxt);
11691 	return(NULL);
11692     }
11693     inputPush(ctxt, input);
11694 
11695     return(ctxt);
11696 }
11697 #endif /* LIBXML_PUSH_ENABLED */
11698 
11699 /**
11700  * xmlStopParser:
11701  * @ctxt:  an XML parser context
11702  *
11703  * Blocks further parser processing
11704  */
11705 void
xmlStopParser(xmlParserCtxtPtr ctxt)11706 xmlStopParser(xmlParserCtxtPtr ctxt) {
11707     if (ctxt == NULL)
11708         return;
11709     xmlHaltParser(ctxt);
11710     if (ctxt->errNo != XML_ERR_NO_MEMORY)
11711         ctxt->errNo = XML_ERR_USER_STOP;
11712 }
11713 
11714 /**
11715  * xmlCreateIOParserCtxt:
11716  * @sax:  a SAX handler (optional)
11717  * @user_data:  user data for SAX callbacks (optional)
11718  * @ioread:  an I/O read function
11719  * @ioclose:  an I/O close function (optional)
11720  * @ioctx:  an I/O handler
11721  * @enc:  the charset encoding if known (deprecated)
11722  *
11723  * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadIO.
11724  *
11725  * Create a parser context for using the XML parser with an existing
11726  * I/O stream
11727  *
11728  * Returns the new parser context or NULL
11729  */
11730 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)11731 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11732                       xmlInputReadCallback ioread,
11733                       xmlInputCloseCallback ioclose,
11734                       void *ioctx, xmlCharEncoding enc) {
11735     xmlParserCtxtPtr ctxt;
11736     xmlParserInputPtr input;
11737     const char *encoding;
11738 
11739     ctxt = xmlNewSAXParserCtxt(sax, user_data);
11740     if (ctxt == NULL)
11741 	return(NULL);
11742 
11743     encoding = xmlGetCharEncodingName(enc);
11744     input = xmlNewInputIO(ctxt, NULL, ioread, ioclose, ioctx, encoding, 0);
11745     if (input == NULL) {
11746 	xmlFreeParserCtxt(ctxt);
11747         return (NULL);
11748     }
11749     inputPush(ctxt, input);
11750 
11751     return(ctxt);
11752 }
11753 
11754 #ifdef LIBXML_VALID_ENABLED
11755 /************************************************************************
11756  *									*
11757  *		Front ends when parsing a DTD				*
11758  *									*
11759  ************************************************************************/
11760 
11761 /**
11762  * xmlIOParseDTD:
11763  * @sax:  the SAX handler block or NULL
11764  * @input:  an Input Buffer
11765  * @enc:  the charset encoding if known
11766  *
11767  * Load and parse a DTD
11768  *
11769  * Returns the resulting xmlDtdPtr or NULL in case of error.
11770  * @input will be freed by the function in any case.
11771  */
11772 
11773 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)11774 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11775 	      xmlCharEncoding enc) {
11776     xmlDtdPtr ret = NULL;
11777     xmlParserCtxtPtr ctxt;
11778     xmlParserInputPtr pinput = NULL;
11779 
11780     if (input == NULL)
11781 	return(NULL);
11782 
11783     ctxt = xmlNewSAXParserCtxt(sax, NULL);
11784     if (ctxt == NULL) {
11785         xmlFreeParserInputBuffer(input);
11786 	return(NULL);
11787     }
11788 
11789     /*
11790      * generate a parser input from the I/O handler
11791      */
11792 
11793     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11794     if (pinput == NULL) {
11795         xmlFreeParserInputBuffer(input);
11796 	xmlFreeParserCtxt(ctxt);
11797 	return(NULL);
11798     }
11799 
11800     /*
11801      * plug some encoding conversion routines here.
11802      */
11803     if (xmlPushInput(ctxt, pinput) < 0) {
11804 	xmlFreeParserCtxt(ctxt);
11805 	return(NULL);
11806     }
11807     if (enc != XML_CHAR_ENCODING_NONE) {
11808         xmlSwitchEncoding(ctxt, enc);
11809     }
11810 
11811     /*
11812      * let's parse that entity knowing it's an external subset.
11813      */
11814     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11815     if (ctxt->myDoc == NULL) {
11816 	xmlErrMemory(ctxt);
11817 	return(NULL);
11818     }
11819     ctxt->myDoc->properties = XML_DOC_INTERNAL;
11820     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11821 	                               BAD_CAST "none", BAD_CAST "none");
11822 
11823     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11824 
11825     if (ctxt->myDoc != NULL) {
11826 	if (ctxt->wellFormed) {
11827 	    ret = ctxt->myDoc->extSubset;
11828 	    ctxt->myDoc->extSubset = NULL;
11829 	    if (ret != NULL) {
11830 		xmlNodePtr tmp;
11831 
11832 		ret->doc = NULL;
11833 		tmp = ret->children;
11834 		while (tmp != NULL) {
11835 		    tmp->doc = NULL;
11836 		    tmp = tmp->next;
11837 		}
11838 	    }
11839 	} else {
11840 	    ret = NULL;
11841 	}
11842         xmlFreeDoc(ctxt->myDoc);
11843         ctxt->myDoc = NULL;
11844     }
11845     xmlFreeParserCtxt(ctxt);
11846 
11847     return(ret);
11848 }
11849 
11850 /**
11851  * xmlSAXParseDTD:
11852  * @sax:  the SAX handler block
11853  * @ExternalID:  a NAME* containing the External ID of the DTD
11854  * @SystemID:  a NAME* containing the URL to the DTD
11855  *
11856  * DEPRECATED: Don't use.
11857  *
11858  * Load and parse an external subset.
11859  *
11860  * Returns the resulting xmlDtdPtr or NULL in case of error.
11861  */
11862 
11863 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)11864 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11865                           const xmlChar *SystemID) {
11866     xmlDtdPtr ret = NULL;
11867     xmlParserCtxtPtr ctxt;
11868     xmlParserInputPtr input = NULL;
11869     xmlChar* systemIdCanonic;
11870 
11871     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11872 
11873     ctxt = xmlNewSAXParserCtxt(sax, NULL);
11874     if (ctxt == NULL) {
11875 	return(NULL);
11876     }
11877 
11878     /*
11879      * Canonicalise the system ID
11880      */
11881     systemIdCanonic = xmlCanonicPath(SystemID);
11882     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11883 	xmlFreeParserCtxt(ctxt);
11884 	return(NULL);
11885     }
11886 
11887     /*
11888      * Ask the Entity resolver to load the damn thing
11889      */
11890 
11891     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11892 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11893 	                                 systemIdCanonic);
11894     if (input == NULL) {
11895 	xmlFreeParserCtxt(ctxt);
11896 	if (systemIdCanonic != NULL)
11897 	    xmlFree(systemIdCanonic);
11898 	return(NULL);
11899     }
11900 
11901     /*
11902      * plug some encoding conversion routines here.
11903      */
11904     if (xmlPushInput(ctxt, input) < 0) {
11905 	xmlFreeParserCtxt(ctxt);
11906 	if (systemIdCanonic != NULL)
11907 	    xmlFree(systemIdCanonic);
11908 	return(NULL);
11909     }
11910 
11911     xmlDetectEncoding(ctxt);
11912 
11913     if (input->filename == NULL)
11914 	input->filename = (char *) systemIdCanonic;
11915     else
11916 	xmlFree(systemIdCanonic);
11917 
11918     /*
11919      * let's parse that entity knowing it's an external subset.
11920      */
11921     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11922     if (ctxt->myDoc == NULL) {
11923 	xmlErrMemory(ctxt);
11924 	xmlFreeParserCtxt(ctxt);
11925 	return(NULL);
11926     }
11927     ctxt->myDoc->properties = XML_DOC_INTERNAL;
11928     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11929 	                               ExternalID, SystemID);
11930     if (ctxt->myDoc->extSubset == NULL) {
11931         xmlFreeDoc(ctxt->myDoc);
11932         xmlFreeParserCtxt(ctxt);
11933         return(NULL);
11934     }
11935     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11936 
11937     if (ctxt->myDoc != NULL) {
11938 	if (ctxt->wellFormed) {
11939 	    ret = ctxt->myDoc->extSubset;
11940 	    ctxt->myDoc->extSubset = NULL;
11941 	    if (ret != NULL) {
11942 		xmlNodePtr tmp;
11943 
11944 		ret->doc = NULL;
11945 		tmp = ret->children;
11946 		while (tmp != NULL) {
11947 		    tmp->doc = NULL;
11948 		    tmp = tmp->next;
11949 		}
11950 	    }
11951 	} else {
11952 	    ret = NULL;
11953 	}
11954         xmlFreeDoc(ctxt->myDoc);
11955         ctxt->myDoc = NULL;
11956     }
11957     xmlFreeParserCtxt(ctxt);
11958 
11959     return(ret);
11960 }
11961 
11962 
11963 /**
11964  * xmlParseDTD:
11965  * @ExternalID:  a NAME* containing the External ID of the DTD
11966  * @SystemID:  a NAME* containing the URL to the DTD
11967  *
11968  * Load and parse an external subset.
11969  *
11970  * Returns the resulting xmlDtdPtr or NULL in case of error.
11971  */
11972 
11973 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)11974 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11975     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11976 }
11977 #endif /* LIBXML_VALID_ENABLED */
11978 
11979 /************************************************************************
11980  *									*
11981  *		Front ends when parsing an Entity			*
11982  *									*
11983  ************************************************************************/
11984 
11985 static xmlNodePtr
xmlCtxtParseContent(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,int hasTextDecl,int buildTree)11986 xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11987                     int hasTextDecl, int buildTree) {
11988     xmlNodePtr root = NULL;
11989     xmlNodePtr list = NULL;
11990     xmlChar *rootName = BAD_CAST "#root";
11991     int result;
11992 
11993     if (buildTree) {
11994         root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11995         if (root == NULL) {
11996             xmlErrMemory(ctxt);
11997             goto error;
11998         }
11999     }
12000 
12001     if (xmlPushInput(ctxt, input) < 0)
12002         goto error;
12003 
12004     nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
12005     spacePush(ctxt, -1);
12006 
12007     if (buildTree)
12008         nodePush(ctxt, root);
12009 
12010     if (hasTextDecl) {
12011         xmlDetectEncoding(ctxt);
12012 
12013         /*
12014          * Parse a possible text declaration first
12015          */
12016         if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
12017             (IS_BLANK_CH(NXT(5)))) {
12018             xmlParseTextDecl(ctxt);
12019             /*
12020              * An XML-1.0 document can't reference an entity not XML-1.0
12021              */
12022             if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
12023                 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12024                 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12025                                "Version mismatch between document and "
12026                                "entity\n");
12027             }
12028         }
12029     }
12030 
12031     xmlParseContentInternal(ctxt);
12032 
12033     if (ctxt->input->cur < ctxt->input->end)
12034 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12035 
12036     if ((ctxt->wellFormed) ||
12037         ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12038         if (root != NULL) {
12039             xmlNodePtr cur;
12040 
12041             /*
12042              * Return the newly created nodeset after unlinking it from
12043              * its pseudo parent.
12044              */
12045             cur = root->children;
12046             list = cur;
12047             while (cur != NULL) {
12048                 cur->parent = NULL;
12049                 cur = cur->next;
12050             }
12051             root->children = NULL;
12052             root->last = NULL;
12053         }
12054     }
12055 
12056     /*
12057      * Read the rest of the stream in case of errors. We want
12058      * to account for the whole entity size.
12059      */
12060     do {
12061         ctxt->input->cur = ctxt->input->end;
12062         xmlParserShrink(ctxt);
12063         result = xmlParserGrow(ctxt);
12064     } while (result > 0);
12065 
12066     if (buildTree)
12067         nodePop(ctxt);
12068 
12069     namePop(ctxt);
12070     spacePop(ctxt);
12071 
12072     /* xmlPopInput would free the stream */
12073     inputPop(ctxt);
12074 
12075 error:
12076     xmlFreeNode(root);
12077 
12078     return(list);
12079 }
12080 
12081 static void
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt,xmlEntityPtr ent)12082 xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12083     xmlParserInputPtr input;
12084     xmlNodePtr list;
12085     unsigned long consumed;
12086     int isExternal;
12087     int buildTree;
12088     int oldMinNsIndex;
12089     int oldNodelen, oldNodemem;
12090 
12091     isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12092     buildTree = (ctxt->node != NULL);
12093 
12094     /*
12095      * Recursion check
12096      */
12097     if (ent->flags & XML_ENT_EXPANDING) {
12098         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12099         xmlHaltParser(ctxt);
12100         goto error;
12101     }
12102 
12103     /*
12104      * Load entity
12105      */
12106     input = xmlNewEntityInputStream(ctxt, ent);
12107     if (input == NULL)
12108         goto error;
12109 
12110     /*
12111      * When building a tree, we need to limit the scope of namespace
12112      * declarations, so that entities don't reference xmlNs structs
12113      * from the parent of a reference.
12114      */
12115     oldMinNsIndex = ctxt->nsdb->minNsIndex;
12116     if (buildTree)
12117         ctxt->nsdb->minNsIndex = ctxt->nsNr;
12118 
12119     oldNodelen = ctxt->nodelen;
12120     oldNodemem = ctxt->nodemem;
12121     ctxt->nodelen = 0;
12122     ctxt->nodemem = 0;
12123 
12124     /*
12125      * Parse content
12126      *
12127      * This initiates a recursive call chain:
12128      *
12129      * - xmlCtxtParseContent
12130      * - xmlParseContentInternal
12131      * - xmlParseReference
12132      * - xmlCtxtParseEntity
12133      *
12134      * The nesting depth is limited by the maximum number of inputs,
12135      * see xmlPushInput.
12136      *
12137      * It's possible to make this non-recursive (minNsIndex must be
12138      * stored in the input struct) at the expense of code readability.
12139      */
12140 
12141     ent->flags |= XML_ENT_EXPANDING;
12142 
12143     list = xmlCtxtParseContent(ctxt, input, isExternal, buildTree);
12144 
12145     ent->flags &= ~XML_ENT_EXPANDING;
12146 
12147     ctxt->nsdb->minNsIndex = oldMinNsIndex;
12148     ctxt->nodelen = oldNodelen;
12149     ctxt->nodemem = oldNodemem;
12150 
12151     /*
12152      * Entity size accounting
12153      */
12154     consumed = input->consumed;
12155     xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12156 
12157     if ((ent->flags & XML_ENT_CHECKED) == 0)
12158         xmlSaturatedAdd(&ent->expandedSize, consumed);
12159 
12160     if ((ent->flags & XML_ENT_PARSED) == 0) {
12161         if (isExternal)
12162             xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12163 
12164         ent->children = list;
12165 
12166         while (list != NULL) {
12167             list->parent = (xmlNodePtr) ent;
12168             if (list->next == NULL)
12169                 ent->last = list;
12170             list = list->next;
12171         }
12172     } else {
12173         xmlFreeNodeList(list);
12174     }
12175 
12176     xmlFreeInputStream(input);
12177 
12178 error:
12179     ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12180 }
12181 
12182 /**
12183  * xmlParseCtxtExternalEntity:
12184  * @ctx:  the existing parsing context
12185  * @URL:  the URL for the entity to load
12186  * @ID:  the System ID for the entity to load
12187  * @lst:  the return value for the set of parsed nodes
12188  *
12189  * Parse an external general entity within an existing parsing context
12190  * An external general parsed entity is well-formed if it matches the
12191  * production labeled extParsedEnt.
12192  *
12193  * [78] extParsedEnt ::= TextDecl? content
12194  *
12195  * Returns 0 if the entity is well formed, -1 in case of args problem and
12196  *    the parser error code otherwise
12197  */
12198 
12199 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * listOut)12200 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12201                            const xmlChar *ID, xmlNodePtr *listOut) {
12202     xmlParserInputPtr input;
12203     xmlNodePtr list;
12204 
12205     if (listOut != NULL)
12206         *listOut = NULL;
12207 
12208     if (ctxt == NULL)
12209         return(XML_ERR_ARGUMENT);
12210 
12211     input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12212     if (input == NULL)
12213         return(ctxt->errNo);
12214 
12215     xmlCtxtInitializeLate(ctxt);
12216 
12217     list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 1, 1);
12218     if (*listOut != NULL)
12219         *listOut = list;
12220     else
12221         xmlFreeNodeList(list);
12222 
12223     xmlFreeInputStream(input);
12224     return(ctxt->errNo);
12225 }
12226 
12227 #ifdef LIBXML_SAX1_ENABLED
12228 /**
12229  * xmlParseExternalEntity:
12230  * @doc:  the document the chunk pertains to
12231  * @sax:  the SAX handler block (possibly NULL)
12232  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12233  * @depth:  Used for loop detection, use 0
12234  * @URL:  the URL for the entity to load
12235  * @ID:  the System ID for the entity to load
12236  * @lst:  the return value for the set of parsed nodes
12237  *
12238  * Parse an external general entity
12239  * An external general parsed entity is well-formed if it matches the
12240  * production labeled extParsedEnt.
12241  *
12242  * [78] extParsedEnt ::= TextDecl? content
12243  *
12244  * Returns 0 if the entity is well formed, -1 in case of args problem and
12245  *    the parser error code otherwise
12246  */
12247 
12248 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12249 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12250 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12251     xmlParserCtxtPtr ctxt;
12252     int ret;
12253 
12254     if (list != NULL)
12255         *list = NULL;
12256 
12257     if (doc == NULL)
12258         return(XML_ERR_ARGUMENT);
12259 
12260     ctxt = xmlNewSAXParserCtxt(sax, user_data);
12261     if (ctxt == NULL)
12262         return(XML_ERR_NO_MEMORY);
12263 
12264     ctxt->depth = depth;
12265     ctxt->myDoc = doc;
12266     ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12267 
12268     xmlFreeParserCtxt(ctxt);
12269     return(ret);
12270 }
12271 
12272 /**
12273  * xmlParseBalancedChunkMemory:
12274  * @doc:  the document the chunk pertains to (must not be NULL)
12275  * @sax:  the SAX handler block (possibly NULL)
12276  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12277  * @depth:  Used for loop detection, use 0
12278  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12279  * @lst:  the return value for the set of parsed nodes
12280  *
12281  * Parse a well-balanced chunk of an XML document
12282  * called by the parser
12283  * The allowed sequence for the Well Balanced Chunk is the one defined by
12284  * the content production in the XML grammar:
12285  *
12286  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12287  *
12288  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12289  *    the parser error code otherwise
12290  */
12291 
12292 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12293 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12294      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12295     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12296                                                 depth, string, lst, 0 );
12297 }
12298 #endif /* LIBXML_SAX1_ENABLED */
12299 
12300 /**
12301  * xmlParseInNodeContext:
12302  * @node:  the context node
12303  * @data:  the input string
12304  * @datalen:  the input string length in bytes
12305  * @options:  a combination of xmlParserOption
12306  * @lst:  the return value for the set of parsed nodes
12307  *
12308  * Parse a well-balanced chunk of an XML document
12309  * within the context (DTD, namespaces, etc ...) of the given node.
12310  *
12311  * The allowed sequence for the data is a Well Balanced Chunk defined by
12312  * the content production in the XML grammar:
12313  *
12314  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12315  *
12316  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12317  * error code otherwise
12318  */
12319 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)12320 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12321                       int options, xmlNodePtr *lst) {
12322     xmlParserCtxtPtr ctxt;
12323     xmlDocPtr doc = NULL;
12324     xmlNodePtr fake, cur;
12325     int nsnr = 0;
12326 
12327     xmlParserErrors ret = XML_ERR_OK;
12328 
12329     /*
12330      * check all input parameters, grab the document
12331      */
12332     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12333         return(XML_ERR_ARGUMENT);
12334     switch (node->type) {
12335         case XML_ELEMENT_NODE:
12336         case XML_ATTRIBUTE_NODE:
12337         case XML_TEXT_NODE:
12338         case XML_CDATA_SECTION_NODE:
12339         case XML_ENTITY_REF_NODE:
12340         case XML_PI_NODE:
12341         case XML_COMMENT_NODE:
12342         case XML_DOCUMENT_NODE:
12343         case XML_HTML_DOCUMENT_NODE:
12344 	    break;
12345 	default:
12346 	    return(XML_ERR_INTERNAL_ERROR);
12347 
12348     }
12349     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12350            (node->type != XML_DOCUMENT_NODE) &&
12351 	   (node->type != XML_HTML_DOCUMENT_NODE))
12352 	node = node->parent;
12353     if (node == NULL)
12354 	return(XML_ERR_INTERNAL_ERROR);
12355     if (node->type == XML_ELEMENT_NODE)
12356 	doc = node->doc;
12357     else
12358         doc = (xmlDocPtr) node;
12359     if (doc == NULL)
12360 	return(XML_ERR_INTERNAL_ERROR);
12361 
12362     /*
12363      * allocate a context and set-up everything not related to the
12364      * node position in the tree
12365      */
12366     if (doc->type == XML_DOCUMENT_NODE)
12367 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12368 #ifdef LIBXML_HTML_ENABLED
12369     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
12370 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12371         /*
12372          * When parsing in context, it makes no sense to add implied
12373          * elements like html/body/etc...
12374          */
12375         options |= HTML_PARSE_NOIMPLIED;
12376     }
12377 #endif
12378     else
12379         return(XML_ERR_INTERNAL_ERROR);
12380 
12381     if (ctxt == NULL)
12382         return(XML_ERR_NO_MEMORY);
12383 
12384     /*
12385      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12386      * We need a dictionary for xmlCtxtInitializeLate, so if there's no doc dict
12387      * we must wait until the last moment to free the original one.
12388      */
12389     if (doc->dict != NULL) {
12390         if (ctxt->dict != NULL)
12391 	    xmlDictFree(ctxt->dict);
12392 	ctxt->dict = doc->dict;
12393     } else {
12394         options |= XML_PARSE_NODICT;
12395         ctxt->dictNames = 0;
12396     }
12397 
12398     if (doc->encoding != NULL)
12399         xmlSwitchEncodingName(ctxt, (const char *) doc->encoding);
12400 
12401     xmlCtxtUseOptions(ctxt, options);
12402     xmlCtxtInitializeLate(ctxt);
12403     ctxt->myDoc = doc;
12404     /* parsing in context, i.e. as within existing content */
12405     ctxt->input_id = 2;
12406 
12407     /*
12408      * TODO: Use xmlCtxtParseContent
12409      */
12410 
12411     fake = xmlNewDocComment(node->doc, NULL);
12412     if (fake == NULL) {
12413         xmlFreeParserCtxt(ctxt);
12414 	return(XML_ERR_NO_MEMORY);
12415     }
12416     xmlAddChild(node, fake);
12417 
12418     if (node->type == XML_ELEMENT_NODE)
12419 	nodePush(ctxt, node);
12420 
12421     if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) {
12422 	/*
12423 	 * initialize the SAX2 namespaces stack
12424 	 */
12425 	cur = node;
12426 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12427 	    xmlNsPtr ns = cur->nsDef;
12428             xmlHashedString hprefix, huri;
12429 
12430 	    while (ns != NULL) {
12431                 hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12432                 huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12433                 if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12434                     nsnr++;
12435 		ns = ns->next;
12436 	    }
12437 	    cur = cur->parent;
12438 	}
12439     }
12440 
12441     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12442 	/*
12443 	 * ID/IDREF registration will be done in xmlValidateElement below
12444 	 */
12445 	ctxt->loadsubset |= XML_SKIP_IDS;
12446     }
12447 
12448 #ifdef LIBXML_HTML_ENABLED
12449     if (doc->type == XML_HTML_DOCUMENT_NODE)
12450         __htmlParseContent(ctxt);
12451     else
12452 #endif
12453 	xmlParseContentInternal(ctxt);
12454 
12455     if (ctxt->input->cur < ctxt->input->end)
12456 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12457 
12458     xmlParserNsPop(ctxt, nsnr);
12459 
12460     if ((ctxt->wellFormed) ||
12461         ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12462         ret = XML_ERR_OK;
12463     } else {
12464 	ret = (xmlParserErrors) ctxt->errNo;
12465     }
12466 
12467     /*
12468      * Return the newly created nodeset after unlinking it from
12469      * the pseudo sibling.
12470      */
12471 
12472     cur = fake->next;
12473     fake->next = NULL;
12474     node->last = fake;
12475 
12476     if (cur != NULL) {
12477 	cur->prev = NULL;
12478     }
12479 
12480     *lst = cur;
12481 
12482     while (cur != NULL) {
12483 	cur->parent = NULL;
12484 	cur = cur->next;
12485     }
12486 
12487     xmlUnlinkNode(fake);
12488     xmlFreeNode(fake);
12489 
12490 
12491     if (ret != XML_ERR_OK) {
12492         xmlFreeNodeList(*lst);
12493 	*lst = NULL;
12494     }
12495 
12496     if (doc->dict != NULL)
12497         ctxt->dict = NULL;
12498     xmlFreeParserCtxt(ctxt);
12499 
12500     return(ret);
12501 }
12502 
12503 #ifdef LIBXML_SAX1_ENABLED
12504 /**
12505  * xmlParseBalancedChunkMemoryRecover:
12506  * @doc:  the document the chunk pertains to (must not be NULL)
12507  * @sax:  the SAX handler block (possibly NULL)
12508  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12509  * @depth:  Used for loop detection, use 0
12510  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12511  * @list:  the return value for the set of parsed nodes
12512  * @recover: return nodes even if the data is broken (use 0)
12513  *
12514  * Parse a well-balanced chunk of an XML document
12515  *
12516  * The allowed sequence for the Well Balanced Chunk is the one defined by
12517  * the content production in the XML grammar:
12518  *
12519  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12520  *
12521  * Returns 0 if the chunk is well balanced, or thehe parser error code
12522  * otherwise.
12523  *
12524  * In case recover is set to 1, the nodelist will not be empty even if
12525  * the parsed chunk is not well balanced, assuming the parsing succeeded to
12526  * some extent.
12527  */
12528 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * listOut,int recover)12529 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12530      void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12531      int recover) {
12532     xmlParserCtxtPtr ctxt;
12533     xmlParserInputPtr input;
12534     xmlNodePtr list;
12535     int ret;
12536 
12537     if (listOut != NULL)
12538         *listOut = NULL;
12539 
12540     if (string == NULL)
12541         return(XML_ERR_ARGUMENT);
12542 
12543     ctxt = xmlNewSAXParserCtxt(sax, user_data);
12544     if (ctxt == NULL)
12545         return(XML_ERR_NO_MEMORY);
12546 
12547     xmlCtxtInitializeLate(ctxt);
12548 
12549     ctxt->depth = depth;
12550     ctxt->myDoc = doc;
12551     if (recover) {
12552         ctxt->options |= XML_PARSE_RECOVER;
12553         ctxt->recovery = 1;
12554     }
12555 
12556     input = xmlNewStringInputStream(ctxt, string);
12557     if (input == NULL)
12558         return(ctxt->errNo);
12559 
12560     list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 0, 1);
12561     if (listOut != NULL)
12562         *listOut = list;
12563     else
12564         xmlFreeNodeList(list);
12565 
12566     ret = ctxt->errNo;
12567 
12568     xmlFreeInputStream(input);
12569     xmlFreeParserCtxt(ctxt);
12570     return(ret);
12571 }
12572 
12573 /**
12574  * xmlSAXParseEntity:
12575  * @sax:  the SAX handler block
12576  * @filename:  the filename
12577  *
12578  * DEPRECATED: Don't use.
12579  *
12580  * parse an XML external entity out of context and build a tree.
12581  * It use the given SAX function block to handle the parsing callback.
12582  * If sax is NULL, fallback to the default DOM tree building routines.
12583  *
12584  * [78] extParsedEnt ::= TextDecl? content
12585  *
12586  * This correspond to a "Well Balanced" chunk
12587  *
12588  * Returns the resulting document tree
12589  */
12590 
12591 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)12592 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12593     xmlDocPtr ret;
12594     xmlParserCtxtPtr ctxt;
12595 
12596     ctxt = xmlCreateFileParserCtxt(filename);
12597     if (ctxt == NULL) {
12598 	return(NULL);
12599     }
12600     if (sax != NULL) {
12601         if (sax->initialized == XML_SAX2_MAGIC) {
12602             *ctxt->sax = *sax;
12603         } else {
12604             memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12605             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12606         }
12607         ctxt->userData = NULL;
12608     }
12609 
12610     xmlParseExtParsedEnt(ctxt);
12611 
12612     if (ctxt->wellFormed) {
12613 	ret = ctxt->myDoc;
12614     } else {
12615         ret = NULL;
12616         xmlFreeDoc(ctxt->myDoc);
12617     }
12618 
12619     xmlFreeParserCtxt(ctxt);
12620 
12621     return(ret);
12622 }
12623 
12624 /**
12625  * xmlParseEntity:
12626  * @filename:  the filename
12627  *
12628  * parse an XML external entity out of context and build a tree.
12629  *
12630  * [78] extParsedEnt ::= TextDecl? content
12631  *
12632  * This correspond to a "Well Balanced" chunk
12633  *
12634  * Returns the resulting document tree
12635  */
12636 
12637 xmlDocPtr
xmlParseEntity(const char * filename)12638 xmlParseEntity(const char *filename) {
12639     return(xmlSAXParseEntity(NULL, filename));
12640 }
12641 #endif /* LIBXML_SAX1_ENABLED */
12642 
12643 /**
12644  * xmlCreateEntityParserCtxt:
12645  * @URL:  the entity URL
12646  * @ID:  the entity PUBLIC ID
12647  * @base:  a possible base for the target URI
12648  *
12649  * DEPRECATED: Use xmlNewInputURL.
12650  *
12651  * Create a parser context for an external entity
12652  * Automatic support for ZLIB/Compress compressed document is provided
12653  * by default if found at compile-time.
12654  *
12655  * Returns the new parser context or NULL
12656  */
12657 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)12658 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12659 	                  const xmlChar *base) {
12660     xmlParserCtxtPtr ctxt;
12661     xmlParserInputPtr input;
12662     xmlChar *uri = NULL;
12663 
12664     ctxt = xmlNewParserCtxt();
12665     if (ctxt == NULL)
12666 	return(NULL);
12667 
12668     if (base != NULL) {
12669         if (xmlBuildURISafe(URL, base, &uri) < 0)
12670             goto error;
12671         if (uri != NULL)
12672             URL = uri;
12673     }
12674 
12675     input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12676     if (input == NULL)
12677         goto error;
12678 
12679     if (inputPush(ctxt, input) < 0)
12680         goto error;
12681 
12682     xmlFree(uri);
12683     return(ctxt);
12684 
12685 error:
12686     xmlFree(uri);
12687     xmlFreeParserCtxt(ctxt);
12688     return(NULL);
12689 }
12690 
12691 /************************************************************************
12692  *									*
12693  *		Front ends when parsing from a file			*
12694  *									*
12695  ************************************************************************/
12696 
12697 /**
12698  * xmlCreateURLParserCtxt:
12699  * @filename:  the filename or URL
12700  * @options:  a combination of xmlParserOption
12701  *
12702  * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12703  *
12704  * Create a parser context for a file or URL content.
12705  * Automatic support for ZLIB/Compress compressed document is provided
12706  * by default if found at compile-time and for file accesses
12707  *
12708  * Returns the new parser context or NULL
12709  */
12710 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)12711 xmlCreateURLParserCtxt(const char *filename, int options)
12712 {
12713     xmlParserCtxtPtr ctxt;
12714     xmlParserInputPtr input;
12715 
12716     ctxt = xmlNewParserCtxt();
12717     if (ctxt == NULL)
12718 	return(NULL);
12719 
12720     xmlCtxtUseOptions(ctxt, options);
12721     ctxt->linenumbers = 1;
12722 
12723     input = xmlLoadExternalEntity(filename, NULL, ctxt);
12724     if (input == NULL) {
12725 	xmlFreeParserCtxt(ctxt);
12726 	return(NULL);
12727     }
12728     inputPush(ctxt, input);
12729 
12730     return(ctxt);
12731 }
12732 
12733 /**
12734  * xmlCreateFileParserCtxt:
12735  * @filename:  the filename
12736  *
12737  * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12738  *
12739  * Create a parser context for a file content.
12740  * Automatic support for ZLIB/Compress compressed document is provided
12741  * by default if found at compile-time.
12742  *
12743  * Returns the new parser context or NULL
12744  */
12745 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)12746 xmlCreateFileParserCtxt(const char *filename)
12747 {
12748     return(xmlCreateURLParserCtxt(filename, 0));
12749 }
12750 
12751 #ifdef LIBXML_SAX1_ENABLED
12752 /**
12753  * xmlSAXParseFileWithData:
12754  * @sax:  the SAX handler block
12755  * @filename:  the filename
12756  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12757  *             documents
12758  * @data:  the userdata
12759  *
12760  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12761  *
12762  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12763  * compressed document is provided by default if found at compile-time.
12764  * It use the given SAX function block to handle the parsing callback.
12765  * If sax is NULL, fallback to the default DOM tree building routines.
12766  *
12767  * User data (void *) is stored within the parser context in the
12768  * context's _private member, so it is available nearly everywhere in libxml
12769  *
12770  * Returns the resulting document tree
12771  */
12772 
12773 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)12774 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12775                         int recovery, void *data) {
12776     xmlDocPtr ret;
12777     xmlParserCtxtPtr ctxt;
12778     xmlParserInputPtr input;
12779 
12780     ctxt = xmlNewSAXParserCtxt(sax, NULL);
12781     if (ctxt == NULL)
12782 	return(NULL);
12783 
12784     if (data != NULL)
12785 	ctxt->_private = data;
12786 
12787     if (recovery) {
12788         ctxt->options |= XML_PARSE_RECOVER;
12789         ctxt->recovery = 1;
12790     }
12791 
12792     input = xmlNewInputURL(ctxt, filename, NULL, NULL, 0);
12793 
12794     ret = xmlCtxtParseDocument(ctxt, input);
12795 
12796     xmlFreeParserCtxt(ctxt);
12797     return(ret);
12798 }
12799 
12800 /**
12801  * xmlSAXParseFile:
12802  * @sax:  the SAX handler block
12803  * @filename:  the filename
12804  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12805  *             documents
12806  *
12807  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12808  *
12809  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12810  * compressed document is provided by default if found at compile-time.
12811  * It use the given SAX function block to handle the parsing callback.
12812  * If sax is NULL, fallback to the default DOM tree building routines.
12813  *
12814  * Returns the resulting document tree
12815  */
12816 
12817 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)12818 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12819                           int recovery) {
12820     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12821 }
12822 
12823 /**
12824  * xmlRecoverDoc:
12825  * @cur:  a pointer to an array of xmlChar
12826  *
12827  * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12828  *
12829  * parse an XML in-memory document and build a tree.
12830  * In the case the document is not Well Formed, a attempt to build a
12831  * tree is tried anyway
12832  *
12833  * Returns the resulting document tree or NULL in case of failure
12834  */
12835 
12836 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)12837 xmlRecoverDoc(const xmlChar *cur) {
12838     return(xmlSAXParseDoc(NULL, cur, 1));
12839 }
12840 
12841 /**
12842  * xmlParseFile:
12843  * @filename:  the filename
12844  *
12845  * DEPRECATED: Use xmlReadFile.
12846  *
12847  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12848  * compressed document is provided by default if found at compile-time.
12849  *
12850  * Returns the resulting document tree if the file was wellformed,
12851  * NULL otherwise.
12852  */
12853 
12854 xmlDocPtr
xmlParseFile(const char * filename)12855 xmlParseFile(const char *filename) {
12856     return(xmlSAXParseFile(NULL, filename, 0));
12857 }
12858 
12859 /**
12860  * xmlRecoverFile:
12861  * @filename:  the filename
12862  *
12863  * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12864  *
12865  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12866  * compressed document is provided by default if found at compile-time.
12867  * In the case the document is not Well Formed, it attempts to build
12868  * a tree anyway
12869  *
12870  * Returns the resulting document tree or NULL in case of failure
12871  */
12872 
12873 xmlDocPtr
xmlRecoverFile(const char * filename)12874 xmlRecoverFile(const char *filename) {
12875     return(xmlSAXParseFile(NULL, filename, 1));
12876 }
12877 
12878 
12879 /**
12880  * xmlSetupParserForBuffer:
12881  * @ctxt:  an XML parser context
12882  * @buffer:  a xmlChar * buffer
12883  * @filename:  a file name
12884  *
12885  * DEPRECATED: Don't use.
12886  *
12887  * Setup the parser context to parse a new buffer; Clears any prior
12888  * contents from the parser context. The buffer parameter must not be
12889  * NULL, but the filename parameter can be
12890  */
12891 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)12892 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12893                              const char* filename)
12894 {
12895     xmlParserInputPtr input;
12896 
12897     if ((ctxt == NULL) || (buffer == NULL))
12898         return;
12899 
12900     xmlClearParserCtxt(ctxt);
12901 
12902     input = xmlNewInputString(ctxt, filename, (const char *) buffer, NULL, 0);
12903     if (input == NULL)
12904         return;
12905     inputPush(ctxt, input);
12906 }
12907 
12908 /**
12909  * xmlSAXUserParseFile:
12910  * @sax:  a SAX handler
12911  * @user_data:  The user data returned on SAX callbacks
12912  * @filename:  a file name
12913  *
12914  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12915  *
12916  * parse an XML file and call the given SAX handler routines.
12917  * Automatic support for ZLIB/Compress compressed document is provided
12918  *
12919  * Returns 0 in case of success or a error number otherwise
12920  */
12921 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)12922 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12923                     const char *filename) {
12924     int ret = 0;
12925     xmlParserCtxtPtr ctxt;
12926 
12927     ctxt = xmlCreateFileParserCtxt(filename);
12928     if (ctxt == NULL) return -1;
12929     if (sax != NULL) {
12930         if (sax->initialized == XML_SAX2_MAGIC) {
12931             *ctxt->sax = *sax;
12932         } else {
12933             memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12934             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12935         }
12936 	ctxt->userData = user_data;
12937     }
12938 
12939     xmlParseDocument(ctxt);
12940 
12941     if (ctxt->wellFormed)
12942 	ret = 0;
12943     else {
12944         if (ctxt->errNo != 0)
12945 	    ret = ctxt->errNo;
12946 	else
12947 	    ret = -1;
12948     }
12949     if (ctxt->myDoc != NULL) {
12950         xmlFreeDoc(ctxt->myDoc);
12951 	ctxt->myDoc = NULL;
12952     }
12953     xmlFreeParserCtxt(ctxt);
12954 
12955     return ret;
12956 }
12957 #endif /* LIBXML_SAX1_ENABLED */
12958 
12959 /************************************************************************
12960  *									*
12961  *		Front ends when parsing from memory			*
12962  *									*
12963  ************************************************************************/
12964 
12965 /**
12966  * xmlCreateMemoryParserCtxt:
12967  * @buffer:  a pointer to a char array
12968  * @size:  the size of the array
12969  *
12970  * Create a parser context for an XML in-memory document. The input buffer
12971  * must not contain a terminating null byte.
12972  *
12973  * Returns the new parser context or NULL
12974  */
12975 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)12976 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12977     xmlParserCtxtPtr ctxt;
12978     xmlParserInputPtr input;
12979 
12980     if (size < 0)
12981 	return(NULL);
12982 
12983     ctxt = xmlNewParserCtxt();
12984     if (ctxt == NULL)
12985 	return(NULL);
12986 
12987     input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL, 0);
12988     if (input == NULL) {
12989 	xmlFreeParserCtxt(ctxt);
12990 	return(NULL);
12991     }
12992     inputPush(ctxt, input);
12993 
12994     return(ctxt);
12995 }
12996 
12997 #ifdef LIBXML_SAX1_ENABLED
12998 /**
12999  * xmlSAXParseMemoryWithData:
13000  * @sax:  the SAX handler block
13001  * @buffer:  an pointer to a char array
13002  * @size:  the size of the array
13003  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13004  *             documents
13005  * @data:  the userdata
13006  *
13007  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13008  *
13009  * parse an XML in-memory block and use the given SAX function block
13010  * to handle the parsing callback. If sax is NULL, fallback to the default
13011  * DOM tree building routines.
13012  *
13013  * User data (void *) is stored within the parser context in the
13014  * context's _private member, so it is available nearly everywhere in libxml
13015  *
13016  * Returns the resulting document tree
13017  */
13018 
13019 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)13020 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13021                           int size, int recovery, void *data) {
13022     xmlDocPtr ret;
13023     xmlParserCtxtPtr ctxt;
13024     xmlParserInputPtr input;
13025 
13026     if (size < 0)
13027         return(NULL);
13028 
13029     ctxt = xmlNewSAXParserCtxt(sax, NULL);
13030     if (ctxt == NULL)
13031         return(NULL);
13032 
13033     if (data != NULL)
13034 	ctxt->_private=data;
13035 
13036     if (recovery) {
13037         ctxt->options |= XML_PARSE_RECOVER;
13038         ctxt->recovery = 1;
13039     }
13040 
13041     input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL,
13042                               XML_INPUT_BUF_STATIC);
13043 
13044     ret = xmlCtxtParseDocument(ctxt, input);
13045 
13046     xmlFreeParserCtxt(ctxt);
13047     return(ret);
13048 }
13049 
13050 /**
13051  * xmlSAXParseMemory:
13052  * @sax:  the SAX handler block
13053  * @buffer:  an pointer to a char array
13054  * @size:  the size of the array
13055  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13056  *             documents
13057  *
13058  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13059  *
13060  * parse an XML in-memory block and use the given SAX function block
13061  * to handle the parsing callback. If sax is NULL, fallback to the default
13062  * DOM tree building routines.
13063  *
13064  * Returns the resulting document tree
13065  */
13066 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)13067 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13068 	          int size, int recovery) {
13069     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13070 }
13071 
13072 /**
13073  * xmlParseMemory:
13074  * @buffer:  an pointer to a char array
13075  * @size:  the size of the array
13076  *
13077  * DEPRECATED: Use xmlReadMemory.
13078  *
13079  * parse an XML in-memory block and build a tree.
13080  *
13081  * Returns the resulting document tree
13082  */
13083 
xmlParseMemory(const char * buffer,int size)13084 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13085    return(xmlSAXParseMemory(NULL, buffer, size, 0));
13086 }
13087 
13088 /**
13089  * xmlRecoverMemory:
13090  * @buffer:  an pointer to a char array
13091  * @size:  the size of the array
13092  *
13093  * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13094  *
13095  * parse an XML in-memory block and build a tree.
13096  * In the case the document is not Well Formed, an attempt to
13097  * build a tree is tried anyway
13098  *
13099  * Returns the resulting document tree or NULL in case of error
13100  */
13101 
xmlRecoverMemory(const char * buffer,int size)13102 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13103    return(xmlSAXParseMemory(NULL, buffer, size, 1));
13104 }
13105 
13106 /**
13107  * xmlSAXUserParseMemory:
13108  * @sax:  a SAX handler
13109  * @user_data:  The user data returned on SAX callbacks
13110  * @buffer:  an in-memory XML document input
13111  * @size:  the length of the XML document in bytes
13112  *
13113  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13114  *
13115  * parse an XML in-memory buffer and call the given SAX handler routines.
13116  *
13117  * Returns 0 in case of success or a error number otherwise
13118  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)13119 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13120 			  const char *buffer, int size) {
13121     int ret = 0;
13122     xmlParserCtxtPtr ctxt;
13123 
13124     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13125     if (ctxt == NULL) return -1;
13126     if (sax != NULL) {
13127         if (sax->initialized == XML_SAX2_MAGIC) {
13128             *ctxt->sax = *sax;
13129         } else {
13130             memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13131             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13132         }
13133 	ctxt->userData = user_data;
13134     }
13135 
13136     xmlParseDocument(ctxt);
13137 
13138     if (ctxt->wellFormed)
13139 	ret = 0;
13140     else {
13141         if (ctxt->errNo != 0)
13142 	    ret = ctxt->errNo;
13143 	else
13144 	    ret = -1;
13145     }
13146     if (ctxt->myDoc != NULL) {
13147         xmlFreeDoc(ctxt->myDoc);
13148 	ctxt->myDoc = NULL;
13149     }
13150     xmlFreeParserCtxt(ctxt);
13151 
13152     return ret;
13153 }
13154 #endif /* LIBXML_SAX1_ENABLED */
13155 
13156 /**
13157  * xmlCreateDocParserCtxt:
13158  * @str:  a pointer to an array of xmlChar
13159  *
13160  * Creates a parser context for an XML in-memory document.
13161  *
13162  * Returns the new parser context or NULL
13163  */
13164 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * str)13165 xmlCreateDocParserCtxt(const xmlChar *str) {
13166     xmlParserCtxtPtr ctxt;
13167     xmlParserInputPtr input;
13168 
13169     ctxt = xmlNewParserCtxt();
13170     if (ctxt == NULL)
13171 	return(NULL);
13172 
13173     input = xmlNewInputString(ctxt, NULL, (const char *) str, NULL, 0);
13174     if (input == NULL) {
13175 	xmlFreeParserCtxt(ctxt);
13176 	return(NULL);
13177     }
13178     inputPush(ctxt, input);
13179 
13180     return(ctxt);
13181 }
13182 
13183 #ifdef LIBXML_SAX1_ENABLED
13184 /**
13185  * xmlSAXParseDoc:
13186  * @sax:  the SAX handler block
13187  * @cur:  a pointer to an array of xmlChar
13188  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13189  *             documents
13190  *
13191  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13192  *
13193  * parse an XML in-memory document and build a tree.
13194  * It use the given SAX function block to handle the parsing callback.
13195  * If sax is NULL, fallback to the default DOM tree building routines.
13196  *
13197  * Returns the resulting document tree
13198  */
13199 
13200 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)13201 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13202     xmlDocPtr ret;
13203     xmlParserCtxtPtr ctxt;
13204     xmlSAXHandlerPtr oldsax = NULL;
13205 
13206     if (cur == NULL) return(NULL);
13207 
13208 
13209     ctxt = xmlCreateDocParserCtxt(cur);
13210     if (ctxt == NULL) return(NULL);
13211     if (sax != NULL) {
13212         oldsax = ctxt->sax;
13213         ctxt->sax = sax;
13214         ctxt->userData = NULL;
13215     }
13216 
13217     xmlParseDocument(ctxt);
13218     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13219     else {
13220        ret = NULL;
13221        xmlFreeDoc(ctxt->myDoc);
13222        ctxt->myDoc = NULL;
13223     }
13224     if (sax != NULL)
13225 	ctxt->sax = oldsax;
13226     xmlFreeParserCtxt(ctxt);
13227 
13228     return(ret);
13229 }
13230 
13231 /**
13232  * xmlParseDoc:
13233  * @cur:  a pointer to an array of xmlChar
13234  *
13235  * DEPRECATED: Use xmlReadDoc.
13236  *
13237  * parse an XML in-memory document and build a tree.
13238  *
13239  * Returns the resulting document tree
13240  */
13241 
13242 xmlDocPtr
xmlParseDoc(const xmlChar * cur)13243 xmlParseDoc(const xmlChar *cur) {
13244     return(xmlSAXParseDoc(NULL, cur, 0));
13245 }
13246 #endif /* LIBXML_SAX1_ENABLED */
13247 
13248 /************************************************************************
13249  *									*
13250  *	New set (2.6.0) of simpler and more flexible APIs		*
13251  *									*
13252  ************************************************************************/
13253 
13254 /**
13255  * DICT_FREE:
13256  * @str:  a string
13257  *
13258  * Free a string if it is not owned by the "dict" dictionary in the
13259  * current scope
13260  */
13261 #define DICT_FREE(str)						\
13262 	if ((str) && ((!dict) ||				\
13263 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
13264 	    xmlFree((char *)(str));
13265 
13266 /**
13267  * xmlCtxtReset:
13268  * @ctxt: an XML parser context
13269  *
13270  * Reset a parser context
13271  */
13272 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)13273 xmlCtxtReset(xmlParserCtxtPtr ctxt)
13274 {
13275     xmlParserInputPtr input;
13276     xmlDictPtr dict;
13277 
13278     if (ctxt == NULL)
13279         return;
13280 
13281     dict = ctxt->dict;
13282 
13283     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13284         xmlFreeInputStream(input);
13285     }
13286     ctxt->inputNr = 0;
13287     ctxt->input = NULL;
13288 
13289     ctxt->spaceNr = 0;
13290     if (ctxt->spaceTab != NULL) {
13291 	ctxt->spaceTab[0] = -1;
13292 	ctxt->space = &ctxt->spaceTab[0];
13293     } else {
13294         ctxt->space = NULL;
13295     }
13296 
13297 
13298     ctxt->nodeNr = 0;
13299     ctxt->node = NULL;
13300 
13301     ctxt->nameNr = 0;
13302     ctxt->name = NULL;
13303 
13304     ctxt->nsNr = 0;
13305     xmlParserNsReset(ctxt->nsdb);
13306 
13307     DICT_FREE(ctxt->version);
13308     ctxt->version = NULL;
13309     DICT_FREE(ctxt->encoding);
13310     ctxt->encoding = NULL;
13311     DICT_FREE(ctxt->extSubURI);
13312     ctxt->extSubURI = NULL;
13313     DICT_FREE(ctxt->extSubSystem);
13314     ctxt->extSubSystem = NULL;
13315     if (ctxt->myDoc != NULL)
13316         xmlFreeDoc(ctxt->myDoc);
13317     ctxt->myDoc = NULL;
13318 
13319     ctxt->standalone = -1;
13320     ctxt->hasExternalSubset = 0;
13321     ctxt->hasPErefs = 0;
13322     ctxt->html = 0;
13323     ctxt->instate = XML_PARSER_START;
13324 
13325     ctxt->wellFormed = 1;
13326     ctxt->nsWellFormed = 1;
13327     ctxt->disableSAX = 0;
13328     ctxt->valid = 1;
13329 #if 0
13330     ctxt->vctxt.userData = ctxt;
13331     ctxt->vctxt.error = xmlParserValidityError;
13332     ctxt->vctxt.warning = xmlParserValidityWarning;
13333 #endif
13334     ctxt->record_info = 0;
13335     ctxt->checkIndex = 0;
13336     ctxt->endCheckState = 0;
13337     ctxt->inSubset = 0;
13338     ctxt->errNo = XML_ERR_OK;
13339     ctxt->depth = 0;
13340     ctxt->catalogs = NULL;
13341     ctxt->sizeentities = 0;
13342     ctxt->sizeentcopy = 0;
13343     xmlInitNodeInfoSeq(&ctxt->node_seq);
13344 
13345     if (ctxt->attsDefault != NULL) {
13346         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13347         ctxt->attsDefault = NULL;
13348     }
13349     if (ctxt->attsSpecial != NULL) {
13350         xmlHashFree(ctxt->attsSpecial, NULL);
13351         ctxt->attsSpecial = NULL;
13352     }
13353 
13354 #ifdef LIBXML_CATALOG_ENABLED
13355     if (ctxt->catalogs != NULL)
13356 	xmlCatalogFreeLocal(ctxt->catalogs);
13357 #endif
13358     ctxt->nbErrors = 0;
13359     ctxt->nbWarnings = 0;
13360     if (ctxt->lastError.code != XML_ERR_OK)
13361         xmlResetError(&ctxt->lastError);
13362 }
13363 
13364 /**
13365  * xmlCtxtResetPush:
13366  * @ctxt: an XML parser context
13367  * @chunk:  a pointer to an array of chars
13368  * @size:  number of chars in the array
13369  * @filename:  an optional file name or URI
13370  * @encoding:  the document encoding, or NULL
13371  *
13372  * Reset a push parser context
13373  *
13374  * Returns 0 in case of success and 1 in case of error
13375  */
13376 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)13377 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13378                  int size, const char *filename, const char *encoding)
13379 {
13380     xmlParserInputPtr input;
13381 
13382     if (ctxt == NULL)
13383         return(1);
13384 
13385     xmlCtxtReset(ctxt);
13386 
13387     input = xmlNewInputPush(ctxt, filename, chunk, size, encoding);
13388     if (input == NULL)
13389         return(1);
13390     inputPush(ctxt, input);
13391 
13392     return(0);
13393 }
13394 
13395 static int
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt,int options,int keepMask)13396 xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13397 {
13398     int allMask;
13399 
13400     if (ctxt == NULL)
13401         return(-1);
13402 
13403     /*
13404      * XInclude options aren't handled by the parser.
13405      *
13406      * XML_PARSE_XINCLUDE
13407      * XML_PARSE_NOXINCNODE
13408      * XML_PARSE_NOBASEFIX
13409      */
13410     allMask = XML_PARSE_RECOVER |
13411               XML_PARSE_NOENT |
13412               XML_PARSE_DTDLOAD |
13413               XML_PARSE_DTDATTR |
13414               XML_PARSE_DTDVALID |
13415               XML_PARSE_NOERROR |
13416               XML_PARSE_NOWARNING |
13417               XML_PARSE_PEDANTIC |
13418               XML_PARSE_NOBLANKS |
13419 #ifdef LIBXML_SAX1_ENABLED
13420               XML_PARSE_SAX1 |
13421 #endif
13422               XML_PARSE_NONET |
13423               XML_PARSE_NODICT |
13424               XML_PARSE_NSCLEAN |
13425               XML_PARSE_NOCDATA |
13426               XML_PARSE_COMPACT |
13427               XML_PARSE_OLD10 |
13428               XML_PARSE_HUGE |
13429               XML_PARSE_OLDSAX |
13430               XML_PARSE_IGNORE_ENC |
13431               XML_PARSE_BIG_LINES |
13432               XML_PARSE_NO_XXE;
13433 
13434     ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13435 
13436     /*
13437      * For some options, struct members are historically the source
13438      * of truth. The values are initalized from global variables and
13439      * old code could also modify them directly. Several older API
13440      * functions that don't take an options argument rely on these
13441      * deprecated mechanisms.
13442      *
13443      * Once public access to struct members and the globals are
13444      * disabled, we can use the options bitmask as source of
13445      * truth, making all these struct members obsolete.
13446      *
13447      * The XML_DETECT_IDS flags is misnamed. It simply enables
13448      * loading of the external subset.
13449      */
13450     ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13451     ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13452     ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13453     ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13454     ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13455     ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13456     ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13457     ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13458 
13459     /*
13460      * Changing SAX callbacks is a bad idea. This should be fixed.
13461      */
13462     if (options & XML_PARSE_NOBLANKS) {
13463         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13464     }
13465     if (options & XML_PARSE_NOCDATA) {
13466         ctxt->sax->cdataBlock = NULL;
13467     }
13468     if (options & XML_PARSE_HUGE) {
13469         if (ctxt->dict != NULL)
13470             xmlDictSetLimit(ctxt->dict, 0);
13471     }
13472 
13473     ctxt->linenumbers = 1;
13474 
13475     return(options & ~allMask);
13476 }
13477 
13478 /**
13479  * xmlCtxtSetOptions:
13480  * @ctxt: an XML parser context
13481  * @options:  a bitmask of xmlParserOption values
13482  *
13483  * Applies the options to the parser context. Unset options are
13484  * cleared.
13485  *
13486  * Available since 2.13.0. With older versions, you can use
13487  * xmlCtxtUseOptions.
13488  *
13489  * XML_PARSE_RECOVER
13490  *
13491  * Enable "recovery" mode which allows non-wellformed documents.
13492  * How this mode behaves exactly is unspecified and may change
13493  * without further notice. Use of this feature is DISCOURAGED.
13494  *
13495  * XML_PARSE_NOENT
13496  *
13497  * Despite the confusing name, this option enables substitution
13498  * of entities. The resulting tree won't contain any entity
13499  * reference nodes.
13500  *
13501  * This option also enables loading of external entities (both
13502  * general and parameter entities) which is dangerous. If you
13503  * process untrusted data, it's recommended to set the
13504  * XML_PARSE_NO_XXE option to disable loading of external
13505  * entities.
13506  *
13507  * XML_PARSE_DTDLOAD
13508  *
13509  * Enables loading of an external DTD and the loading and
13510  * substitution of external parameter entities. Has no effect
13511  * if XML_PARSE_NO_XXE is set.
13512  *
13513  * XML_PARSE_DTDATTR
13514  *
13515  * Adds default attributes from the DTD to the result document.
13516  *
13517  * Implies XML_PARSE_DTDLOAD, but loading of external content
13518  * can be disabled with XML_PARSE_NO_XXE.
13519  *
13520  * XML_PARSE_DTDVALID
13521  *
13522  * This option enables DTD validation which requires to load
13523  * external DTDs and external entities (both general and
13524  * parameter entities) unless XML_PARSE_NO_XXE was set.
13525  *
13526  * XML_PARSE_NO_XXE
13527  *
13528  * Disables loading of external DTDs or entities.
13529  *
13530  * XML_PARSE_NOERROR
13531  *
13532  * Disable error and warning reports to the error handlers.
13533  * Errors are still accessible with xmlCtxtGetLastError.
13534  *
13535  * XML_PARSE_NOWARNING
13536  *
13537  * Disable warning reports.
13538  *
13539  * XML_PARSE_PEDANTIC
13540  *
13541  * Enable some pedantic warnings.
13542  *
13543  * XML_PARSE_NOBLANKS
13544  *
13545  * Remove some text nodes containing only whitespace from the
13546  * result document. Which nodes are removed depends on DTD
13547  * element declarations or a conservative heuristic. The
13548  * reindenting feature of the serialization code relies on this
13549  * option to be set when parsing. Use of this option is
13550  * DISCOURAGED.
13551  *
13552  * XML_PARSE_SAX1
13553  *
13554  * Always invoke the deprecated SAX1 startElement and endElement
13555  * handlers. This option is DEPRECATED.
13556  *
13557  * XML_PARSE_NONET
13558  *
13559  * Disable network access with the builtin HTTP and FTP clients.
13560  *
13561  * XML_PARSE_NODICT
13562  *
13563  * Create a document without interned strings, making all
13564  * strings separate memory allocations.
13565  *
13566  * XML_PARSE_NSCLEAN
13567  *
13568  * Remove redundant namespace declarations from the result
13569  * document.
13570  *
13571  * XML_PARSE_NOCDATA
13572  *
13573  * Output normal text nodes instead of CDATA nodes.
13574  *
13575  * XML_PARSE_COMPACT
13576  *
13577  * Store small strings directly in the node struct to save
13578  * memory.
13579  *
13580  * XML_PARSE_OLD10
13581  *
13582  * Use old Name productions from before XML 1.0 Fifth Edition.
13583  * This options is DEPRECATED.
13584  *
13585  * XML_PARSE_HUGE
13586  *
13587  * Relax some internal limits.
13588  *
13589  * Maximum size of text nodes, tags, comments, processing instructions,
13590  * CDATA sections, entity values
13591  *
13592  * normal: 10M
13593  * huge:    1B
13594  *
13595  * Maximum size of names, system literals, pubid literals
13596  *
13597  * normal: 50K
13598  * huge:   10M
13599  *
13600  * Maximum nesting depth of elements
13601  *
13602  * normal:  256
13603  * huge:   2048
13604  *
13605  * Maximum nesting depth of entities
13606  *
13607  * normal: 20
13608  * huge:   40
13609  *
13610  * XML_PARSE_OLDSAX
13611  *
13612  * Enable an unspecified legacy mode for SAX parsers. This
13613  * option is DEPRECATED.
13614  *
13615  * XML_PARSE_IGNORE_ENC
13616  *
13617  * Ignore the encoding in the XML declaration. This option is
13618  * mostly unneeded these days. The only effect is to enforce
13619  * UTF-8 decoding of ASCII-like data.
13620  *
13621  * XML_PARSE_BIG_LINES
13622  *
13623  * Enable reporting of line numbers larger than 65535.
13624  *
13625  * Returns 0 in case of success, the set of unknown or unimplemented options
13626  *         in case of error.
13627  */
13628 int
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt,int options)13629 xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13630 {
13631     return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13632 }
13633 
13634 /**
13635  * xmlCtxtUseOptions:
13636  * @ctxt: an XML parser context
13637  * @options:  a combination of xmlParserOption
13638  *
13639  * DEPRECATED: Use xmlCtxtSetOptions.
13640  *
13641  * Applies the options to the parser context. The following options
13642  * are never cleared and can only be enabled:
13643  *
13644  * XML_PARSE_NOERROR
13645  * XML_PARSE_NOWARNING
13646  * XML_PARSE_NONET
13647  * XML_PARSE_NSCLEAN
13648  * XML_PARSE_NOCDATA
13649  * XML_PARSE_COMPACT
13650  * XML_PARSE_OLD10
13651  * XML_PARSE_HUGE
13652  * XML_PARSE_OLDSAX
13653  * XML_PARSE_IGNORE_ENC
13654  * XML_PARSE_BIG_LINES
13655  *
13656  * Returns 0 in case of success, the set of unknown or unimplemented options
13657  *         in case of error.
13658  */
13659 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)13660 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13661 {
13662     int keepMask;
13663 
13664     /*
13665      * For historic reasons, some options can only be enabled.
13666      */
13667     keepMask = XML_PARSE_NOERROR |
13668                XML_PARSE_NOWARNING |
13669                XML_PARSE_NONET |
13670                XML_PARSE_NSCLEAN |
13671                XML_PARSE_NOCDATA |
13672                XML_PARSE_COMPACT |
13673                XML_PARSE_OLD10 |
13674                XML_PARSE_HUGE |
13675                XML_PARSE_OLDSAX |
13676                XML_PARSE_IGNORE_ENC |
13677                XML_PARSE_BIG_LINES;
13678 
13679     return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13680 }
13681 
13682 /**
13683  * xmlCtxtSetMaxAmplification:
13684  * @ctxt: an XML parser context
13685  * @maxAmpl:  maximum amplification factor
13686  *
13687  * To protect against exponential entity expansion ("billion laughs"), the
13688  * size of serialized output is (roughly) limited to the input size
13689  * multiplied by this factor. The default value is 5.
13690  *
13691  * When working with documents making heavy use of entity expansion, it can
13692  * be necessary to increase the value. For security reasons, this should only
13693  * be considered when processing trusted input.
13694  */
13695 void
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt,unsigned maxAmpl)13696 xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13697 {
13698     ctxt->maxAmpl = maxAmpl;
13699 }
13700 
13701 /**
13702  * xmlCtxtParseDocument:
13703  * @ctxt:  an XML parser context
13704  * @input:  parser input
13705  *
13706  * Parse an XML document and return the resulting document tree.
13707  * Takes ownership of the input object.
13708  *
13709  * Returns the resulting document tree or NULL
13710  */
13711 xmlDocPtr
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)13712 xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13713 {
13714     xmlDocPtr ret = NULL;
13715 
13716     if ((ctxt == NULL) || (input == NULL))
13717         return(NULL);
13718 
13719     /* assert(ctxt->inputNr == 0); */
13720     while (ctxt->inputNr > 0)
13721         xmlFreeInputStream(inputPop(ctxt));
13722 
13723     if (inputPush(ctxt, input) < 0) {
13724         xmlFreeInputStream(input);
13725         return(NULL);
13726     }
13727 
13728     xmlParseDocument(ctxt);
13729 
13730     if ((ctxt->wellFormed) ||
13731         ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
13732         ret = ctxt->myDoc;
13733     } else {
13734         if (ctxt->errNo == XML_ERR_OK)
13735             xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error\n");
13736 
13737         ret = NULL;
13738 	xmlFreeDoc(ctxt->myDoc);
13739     }
13740     ctxt->myDoc = NULL;
13741 
13742     /* assert(ctxt->inputNr == 1); */
13743     while (ctxt->inputNr > 0)
13744         xmlFreeInputStream(inputPop(ctxt));
13745 
13746     return(ret);
13747 }
13748 
13749 /**
13750  * xmlReadDoc:
13751  * @cur:  a pointer to a zero terminated string
13752  * @URL:  base URL (optional)
13753  * @encoding:  the document encoding (optional)
13754  * @options:  a combination of xmlParserOption
13755  *
13756  * Convenience function to parse an XML document from a
13757  * zero-terminated string.
13758  *
13759  * See xmlCtxtReadDoc for details.
13760  *
13761  * Returns the resulting document tree
13762  */
13763 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)13764 xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13765            int options)
13766 {
13767     xmlParserCtxtPtr ctxt;
13768     xmlParserInputPtr input;
13769     xmlDocPtr doc;
13770 
13771     ctxt = xmlNewParserCtxt();
13772     if (ctxt == NULL)
13773         return(NULL);
13774 
13775     xmlCtxtUseOptions(ctxt, options);
13776 
13777     input = xmlNewInputString(ctxt, URL, (const char *) cur, encoding,
13778                               XML_INPUT_BUF_STATIC);
13779 
13780     doc = xmlCtxtParseDocument(ctxt, input);
13781 
13782     xmlFreeParserCtxt(ctxt);
13783     return(doc);
13784 }
13785 
13786 /**
13787  * xmlReadFile:
13788  * @filename:  a file or URL
13789  * @encoding:  the document encoding (optional)
13790  * @options:  a combination of xmlParserOption
13791  *
13792  * Convenience function to parse an XML file from the filesystem,
13793  * the network or a global user-define resource loader.
13794  *
13795  * See xmlCtxtReadFile for details.
13796  *
13797  * Returns the resulting document tree
13798  */
13799 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)13800 xmlReadFile(const char *filename, const char *encoding, int options)
13801 {
13802     xmlParserCtxtPtr ctxt;
13803     xmlParserInputPtr input;
13804     xmlDocPtr doc;
13805 
13806     ctxt = xmlNewParserCtxt();
13807     if (ctxt == NULL)
13808         return(NULL);
13809 
13810     xmlCtxtUseOptions(ctxt, options);
13811 
13812     input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13813 
13814     doc = xmlCtxtParseDocument(ctxt, input);
13815 
13816     xmlFreeParserCtxt(ctxt);
13817     return(doc);
13818 }
13819 
13820 /**
13821  * xmlReadMemory:
13822  * @buffer:  a pointer to a char array
13823  * @size:  the size of the array
13824  * @url:  base URL (optional)
13825  * @encoding:  the document encoding (optional)
13826  * @options:  a combination of xmlParserOption
13827  *
13828  * Parse an XML in-memory document and build a tree. The input buffer must
13829  * not contain a terminating null byte.
13830  *
13831  * See xmlCtxtReadMemory for details.
13832  *
13833  * Returns the resulting document tree
13834  */
13835 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * url,const char * encoding,int options)13836 xmlReadMemory(const char *buffer, int size, const char *url,
13837               const char *encoding, int options)
13838 {
13839     xmlParserCtxtPtr ctxt;
13840     xmlParserInputPtr input;
13841     xmlDocPtr doc;
13842 
13843     if (size < 0)
13844 	return(NULL);
13845 
13846     ctxt = xmlNewParserCtxt();
13847     if (ctxt == NULL)
13848         return(NULL);
13849 
13850     xmlCtxtUseOptions(ctxt, options);
13851 
13852     input = xmlNewInputMemory(ctxt, url, buffer, size, encoding,
13853                               XML_INPUT_BUF_STATIC);
13854 
13855     doc = xmlCtxtParseDocument(ctxt, input);
13856 
13857     xmlFreeParserCtxt(ctxt);
13858     return(doc);
13859 }
13860 
13861 /**
13862  * xmlReadFd:
13863  * @fd:  an open file descriptor
13864  * @URL:  base URL (optional)
13865  * @encoding:  the document encoding (optional)
13866  * @options:  a combination of xmlParserOption
13867  *
13868  * Parse an XML from a file descriptor and build a tree.
13869  *
13870  * See xmlCtxtReadFd for details.
13871  *
13872  * NOTE that the file descriptor will not be closed when the
13873  * context is freed or reset.
13874  *
13875  * Returns the resulting document tree
13876  */
13877 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)13878 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13879 {
13880     xmlParserCtxtPtr ctxt;
13881     xmlParserInputPtr input;
13882     xmlDocPtr doc;
13883 
13884     ctxt = xmlNewParserCtxt();
13885     if (ctxt == NULL)
13886         return(NULL);
13887 
13888     xmlCtxtUseOptions(ctxt, options);
13889 
13890     input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
13891     input->buf->closecallback = NULL;
13892 
13893     doc = xmlCtxtParseDocument(ctxt, input);
13894 
13895     xmlFreeParserCtxt(ctxt);
13896     return(doc);
13897 }
13898 
13899 /**
13900  * xmlReadIO:
13901  * @ioread:  an I/O read function
13902  * @ioclose:  an I/O close function (optional)
13903  * @ioctx:  an I/O handler
13904  * @URL:  base URL (optional)
13905  * @encoding:  the document encoding (optional)
13906  * @options:  a combination of xmlParserOption
13907  *
13908  * Parse an XML document from I/O functions and context and build a tree.
13909  *
13910  * See xmlCtxtReadIO for details.
13911  *
13912  * Returns the resulting document tree
13913  */
13914 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)13915 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13916           void *ioctx, const char *URL, const char *encoding, int options)
13917 {
13918     xmlParserCtxtPtr ctxt;
13919     xmlParserInputPtr input;
13920     xmlDocPtr doc;
13921 
13922     ctxt = xmlNewParserCtxt();
13923     if (ctxt == NULL)
13924         return(NULL);
13925 
13926     xmlCtxtUseOptions(ctxt, options);
13927 
13928     input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
13929 
13930     doc = xmlCtxtParseDocument(ctxt, input);
13931 
13932     xmlFreeParserCtxt(ctxt);
13933     return(doc);
13934 }
13935 
13936 /**
13937  * xmlCtxtReadDoc:
13938  * @ctxt:  an XML parser context
13939  * @str:  a pointer to a zero terminated string
13940  * @URL:  base URL (optional)
13941  * @encoding:  the document encoding (optional)
13942  * @options:  a combination of xmlParserOption
13943  *
13944  * Parse an XML in-memory document and build a tree.
13945  *
13946  * @URL is used as base to resolve external entities and for error
13947  * reporting.
13948  *
13949  * See xmlCtxtUseOptions for details.
13950  *
13951  * Returns the resulting document tree
13952  */
13953 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * str,const char * URL,const char * encoding,int options)13954 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
13955                const char *URL, const char *encoding, int options)
13956 {
13957     xmlParserInputPtr input;
13958 
13959     if (ctxt == NULL)
13960         return(NULL);
13961 
13962     xmlCtxtReset(ctxt);
13963     xmlCtxtUseOptions(ctxt, options);
13964 
13965     input = xmlNewInputString(ctxt, URL, (const char *) str, encoding,
13966                               XML_INPUT_BUF_STATIC);
13967 
13968     return(xmlCtxtParseDocument(ctxt, input));
13969 }
13970 
13971 /**
13972  * xmlCtxtReadFile:
13973  * @ctxt:  an XML parser context
13974  * @filename:  a file or URL
13975  * @encoding:  the document encoding (optional)
13976  * @options:  a combination of xmlParserOption
13977  *
13978  * Parse an XML file from the filesystem, the network or a user-defined
13979  * resource loader.
13980  *
13981  * See xmlNewInputURL and xmlCtxtUseOptions for details.
13982  *
13983  * Returns the resulting document tree
13984  */
13985 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)13986 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13987                 const char *encoding, int options)
13988 {
13989     xmlParserInputPtr input;
13990 
13991     if (ctxt == NULL)
13992         return(NULL);
13993 
13994     xmlCtxtReset(ctxt);
13995     xmlCtxtUseOptions(ctxt, options);
13996 
13997     input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13998 
13999     return(xmlCtxtParseDocument(ctxt, input));
14000 }
14001 
14002 /**
14003  * xmlCtxtReadMemory:
14004  * @ctxt:  an XML parser context
14005  * @buffer:  a pointer to a char array
14006  * @size:  the size of the array
14007  * @URL:  base URL (optional)
14008  * @encoding:  the document encoding (optional)
14009  * @options:  a combination of xmlParserOption
14010  *
14011  * Parse an XML in-memory document and build a tree. The input buffer must
14012  * not contain a terminating null byte.
14013  *
14014  * @URL is used as base to resolve external entities and for error
14015  * reporting.
14016  *
14017  * See xmlCtxtUseOptions for details.
14018  *
14019  * Returns the resulting document tree
14020  */
14021 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)14022 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14023                   const char *URL, const char *encoding, int options)
14024 {
14025     xmlParserInputPtr input;
14026 
14027     if ((ctxt == NULL) || (size < 0))
14028         return(NULL);
14029 
14030     xmlCtxtReset(ctxt);
14031     xmlCtxtUseOptions(ctxt, options);
14032 
14033     input = xmlNewInputMemory(ctxt, URL, buffer, size, encoding,
14034                               XML_INPUT_BUF_STATIC);
14035 
14036     return(xmlCtxtParseDocument(ctxt, input));
14037 }
14038 
14039 /**
14040  * xmlCtxtReadFd:
14041  * @ctxt:  an XML parser context
14042  * @fd:  an open file descriptor
14043  * @URL:  base URL (optional)
14044  * @encoding:  the document encoding (optional)
14045  * @options:  a combination of xmlParserOption
14046  *
14047  * Parse an XML document from a file descriptor and build a tree.
14048  *
14049  * NOTE that the file descriptor will not be closed when the
14050  * context is freed or reset.
14051  *
14052  * @URL is used as base to resolve external entities and for error
14053  * reporting.
14054  *
14055  * See xmlCtxtUseOptions for details.
14056  *
14057  * Returns the resulting document tree
14058  */
14059 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)14060 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14061               const char *URL, const char *encoding, int options)
14062 {
14063     xmlParserInputPtr input;
14064 
14065     if (ctxt == NULL)
14066         return(NULL);
14067 
14068     xmlCtxtReset(ctxt);
14069     xmlCtxtUseOptions(ctxt, options);
14070 
14071     input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
14072     input->buf->closecallback = NULL;
14073 
14074     return(xmlCtxtParseDocument(ctxt, input));
14075 }
14076 
14077 /**
14078  * xmlCtxtReadIO:
14079  * @ctxt:  an XML parser context
14080  * @ioread:  an I/O read function
14081  * @ioclose:  an I/O close function
14082  * @ioctx:  an I/O handler
14083  * @URL:  the base URL to use for the document
14084  * @encoding:  the document encoding, or NULL
14085  * @options:  a combination of xmlParserOption
14086  *
14087  * parse an XML document from I/O functions and source and build a tree.
14088  * This reuses the existing @ctxt parser context
14089  *
14090  * @URL is used as base to resolve external entities and for error
14091  * reporting.
14092  *
14093  * See xmlCtxtUseOptions for details.
14094  *
14095  * Returns the resulting document tree
14096  */
14097 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14098 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14099               xmlInputCloseCallback ioclose, void *ioctx,
14100 	      const char *URL,
14101               const char *encoding, int options)
14102 {
14103     xmlParserInputPtr input;
14104 
14105     if (ctxt == NULL)
14106         return(NULL);
14107 
14108     xmlCtxtReset(ctxt);
14109     xmlCtxtUseOptions(ctxt, options);
14110 
14111     input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
14112 
14113     return(xmlCtxtParseDocument(ctxt, input));
14114 }
14115 
14116