xref: /aosp_15_r20/external/libxml2/parserInternals.c (revision 7c5688314b92172186c154356a6374bf7684c3ca)
1 /*
2  * parserInternals.c : Internal routines (and obsolete ones) needed for the
3  *                     XML and HTML parsers.
4  *
5  * See Copyright for the status of this software.
6  *
7  * [email protected]
8  */
9 
10 #define IN_LIBXML
11 #include "libxml.h"
12 
13 #if defined(_WIN32)
14 #define XML_DIR_SEP '\\'
15 #else
16 #define XML_DIR_SEP '/'
17 #endif
18 
19 #include <string.h>
20 #include <ctype.h>
21 #include <stdlib.h>
22 
23 #include <libxml/xmlmemory.h>
24 #include <libxml/tree.h>
25 #include <libxml/parser.h>
26 #include <libxml/parserInternals.h>
27 #include <libxml/entities.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/encoding.h>
30 #include <libxml/xmlIO.h>
31 #include <libxml/uri.h>
32 #include <libxml/dict.h>
33 #include <libxml/xmlsave.h>
34 #ifdef LIBXML_CATALOG_ENABLED
35 #include <libxml/catalog.h>
36 #endif
37 #include <libxml/chvalid.h>
38 #include <libxml/nanohttp.h>
39 
40 #define CUR(ctxt) ctxt->input->cur
41 #define END(ctxt) ctxt->input->end
42 
43 #include "private/buf.h"
44 #include "private/enc.h"
45 #include "private/error.h"
46 #include "private/io.h"
47 #include "private/parser.h"
48 
49 #define XML_MAX_ERRORS 100
50 
51 /*
52  * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
53  * factor of serialized output after entity expansion.
54  */
55 #define XML_MAX_AMPLIFICATION_DEFAULT 5
56 
57 /*
58  * Various global defaults for parsing
59  */
60 
61 /**
62  * xmlCheckVersion:
63  * @version: the include version number
64  *
65  * check the compiled lib version against the include one.
66  */
67 void
xmlCheckVersion(int version)68 xmlCheckVersion(int version) {
69     int myversion = LIBXML_VERSION;
70 
71     xmlInitParser();
72 
73     if ((myversion / 10000) != (version / 10000)) {
74 	xmlPrintErrorMessage(
75 		"Fatal: program compiled against libxml %d using libxml %d\n",
76 		(version / 10000), (myversion / 10000));
77     } else if ((myversion / 100) < (version / 100)) {
78 	xmlPrintErrorMessage(
79 		"Warning: program compiled against libxml %d using older %d\n",
80 		(version / 100), (myversion / 100));
81     }
82 }
83 
84 
85 /************************************************************************
86  *									*
87  *		Some factorized error routines				*
88  *									*
89  ************************************************************************/
90 
91 
92 /**
93  * xmlCtxtSetErrorHandler:
94  * @ctxt:  an XML parser context
95  * @handler:  error handler
96  * @data:  data for error handler
97  *
98  * Register a callback function that will be called on errors and
99  * warnings. If handler is NULL, the error handler will be deactivated.
100  *
101  * This is the recommended way to collect errors from the parser and
102  * takes precedence over all other error reporting mechanisms.
103  * These are (in order of precedence):
104  *
105  * - per-context structured handler (xmlCtxtSetErrorHandler)
106  * - per-context structured "serror" SAX handler
107  * - global structured handler (xmlSetStructuredErrorFunc)
108  * - per-context generic "error" and "warning" SAX handlers
109  * - global generic handler (xmlSetGenericErrorFunc)
110  * - print to stderr
111  *
112  * Available since 2.13.0.
113  */
114 void
xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt,xmlStructuredErrorFunc handler,void * data)115 xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt, xmlStructuredErrorFunc handler,
116                        void *data)
117 {
118     if (ctxt == NULL)
119         return;
120     ctxt->errorHandler = handler;
121     ctxt->errorCtxt = data;
122 }
123 
124 /**
125  * xmlCtxtGetLastError:
126  * @ctx:  an XML parser context
127  *
128  * Get the last parsing error registered.
129  *
130  * Returns NULL if no error occurred or a pointer to the error
131  */
132 const xmlError *
xmlCtxtGetLastError(void * ctx)133 xmlCtxtGetLastError(void *ctx)
134 {
135     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
136 
137     if (ctxt == NULL)
138         return (NULL);
139     if (ctxt->lastError.code == XML_ERR_OK)
140         return (NULL);
141     return (&ctxt->lastError);
142 }
143 
144 /**
145  * xmlCtxtResetLastError:
146  * @ctx:  an XML parser context
147  *
148  * Cleanup the last global error registered. For parsing error
149  * this does not change the well-formedness result.
150  */
151 void
xmlCtxtResetLastError(void * ctx)152 xmlCtxtResetLastError(void *ctx)
153 {
154     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
155 
156     if (ctxt == NULL)
157         return;
158     ctxt->errNo = XML_ERR_OK;
159     if (ctxt->lastError.code == XML_ERR_OK)
160         return;
161     xmlResetError(&ctxt->lastError);
162 }
163 
164 /**
165  * xmlCtxtErrMemory:
166  * @ctxt:  an XML parser context
167  *
168  * Handle an out-of-memory error.
169  *
170  * Available since 2.13.0.
171  */
172 void
xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)173 xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)
174 {
175     xmlStructuredErrorFunc schannel = NULL;
176     xmlGenericErrorFunc channel = NULL;
177     void *data;
178 
179     if (ctxt == NULL)
180         return;
181 
182     ctxt->errNo = XML_ERR_NO_MEMORY;
183     ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
184     ctxt->wellFormed = 0;
185     ctxt->disableSAX = 2;
186 
187     if (ctxt->errorHandler) {
188         schannel = ctxt->errorHandler;
189         data = ctxt->errorCtxt;
190     } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
191         (ctxt->sax->serror != NULL)) {
192         schannel = ctxt->sax->serror;
193         data = ctxt->userData;
194     } else {
195         channel = ctxt->sax->error;
196         data = ctxt->userData;
197     }
198 
199     xmlRaiseMemoryError(schannel, channel, data, XML_FROM_PARSER,
200                         &ctxt->lastError);
201 }
202 
203 /**
204  * xmlCtxtErrIO:
205  * @ctxt:  parser context
206  * @code:  xmlParserErrors code
207  * @uri:  filename or URI (optional)
208  *
209  * If filename is empty, use the one from context input if available.
210  *
211  * Report an IO error to the parser context.
212  */
213 void
xmlCtxtErrIO(xmlParserCtxtPtr ctxt,int code,const char * uri)214 xmlCtxtErrIO(xmlParserCtxtPtr ctxt, int code, const char *uri)
215 {
216     const char *errstr, *msg, *str1, *str2;
217     xmlErrorLevel level;
218 
219     if (ctxt == NULL)
220         return;
221 
222     if (((code == XML_IO_ENOENT) ||
223          (code == XML_IO_UNKNOWN))) {
224         /*
225          * Only report a warning if a file could not be found. This should
226          * only be done for external entities, but the external entity loader
227          * of xsltproc can try multiple paths and assumes that ENOENT doesn't
228          * raise an error and aborts parsing.
229          */
230         if (ctxt->validate == 0)
231             level = XML_ERR_WARNING;
232         else
233             level = XML_ERR_ERROR;
234     } else if (code == XML_IO_NETWORK_ATTEMPT) {
235         level = XML_ERR_ERROR;
236     } else {
237         level = XML_ERR_FATAL;
238     }
239 
240     errstr = xmlErrString(code);
241 
242     if (uri == NULL) {
243         msg = "%s\n";
244         str1 = errstr;
245         str2 = NULL;
246     } else {
247         msg = "failed to load \"%s\": %s\n";
248         str1 = uri;
249         str2 = errstr;
250     }
251 
252     xmlCtxtErr(ctxt, NULL, XML_FROM_IO, code, level,
253                (const xmlChar *) uri, NULL, NULL, 0,
254                msg, str1, str2);
255 }
256 
257 static int
xmlCtxtIsCatastrophicError(xmlParserCtxtPtr ctxt)258 xmlCtxtIsCatastrophicError(xmlParserCtxtPtr ctxt) {
259     int fatal = 0;
260     int code;
261 
262     if (ctxt == NULL)
263         return(1);
264 
265     if (ctxt->lastError.level != XML_ERR_FATAL)
266         return(0);
267 
268     code = ctxt->lastError.code;
269 
270     switch (code) {
271         case XML_ERR_NO_MEMORY:
272         case XML_ERR_RESOURCE_LIMIT:
273         case XML_ERR_SYSTEM:
274         case XML_ERR_ARGUMENT:
275         case XML_ERR_INTERNAL_ERROR:
276             fatal = 1;
277             break;
278         default:
279             if ((code >= 1500) && (code <= 1599))
280                 fatal = 1;
281             break;
282     }
283 
284     return(fatal);
285 }
286 
287 /**
288  * xmlCtxtVErr:
289  * @ctxt:  a parser context
290  * @node: the current node or NULL
291  * @domain: the domain for the error
292  * @code: the code for the error
293  * @level: the xmlErrorLevel for the error
294  * @str1: extra string info
295  * @str2: extra string info
296  * @str3: extra string info
297  * @int1: extra int info
298  * @msg:  the message to display/transmit
299  * @ap:  extra parameters for the message display
300  *
301  * Raise a parser error.
302  */
303 void
xmlCtxtVErr(xmlParserCtxtPtr ctxt,xmlNodePtr node,xmlErrorDomain domain,xmlParserErrors code,xmlErrorLevel level,const xmlChar * str1,const xmlChar * str2,const xmlChar * str3,int int1,const char * msg,va_list ap)304 xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
305             xmlParserErrors code, xmlErrorLevel level,
306             const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
307             int int1, const char *msg, va_list ap)
308 {
309     xmlStructuredErrorFunc schannel = NULL;
310     xmlGenericErrorFunc channel = NULL;
311     void *data = NULL;
312     const char *file = NULL;
313     int line = 0;
314     int col = 0;
315     int res;
316 
317     if (code == XML_ERR_NO_MEMORY) {
318         xmlCtxtErrMemory(ctxt);
319         return;
320     }
321 
322     if (ctxt == NULL)
323         return;
324 
325     if (PARSER_STOPPED(ctxt))
326 	return;
327 
328     if (level == XML_ERR_WARNING) {
329         if (ctxt->nbWarnings >= XML_MAX_ERRORS)
330             goto done;
331         ctxt->nbWarnings += 1;
332     } else {
333         /* Report at least one fatal error. */
334         if ((ctxt->nbErrors >= XML_MAX_ERRORS) &&
335             ((level < XML_ERR_FATAL) || (ctxt->wellFormed == 0)))
336             goto done;
337         ctxt->nbErrors += 1;
338     }
339 
340     if (((ctxt->options & XML_PARSE_NOERROR) == 0) &&
341         ((level != XML_ERR_WARNING) ||
342          ((ctxt->options & XML_PARSE_NOWARNING) == 0))) {
343         if (ctxt->errorHandler) {
344             schannel = ctxt->errorHandler;
345             data = ctxt->errorCtxt;
346         } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
347             (ctxt->sax->serror != NULL)) {
348             schannel = ctxt->sax->serror;
349             data = ctxt->userData;
350         } else if ((domain == XML_FROM_VALID) || (domain == XML_FROM_DTD)) {
351             if (level == XML_ERR_WARNING)
352                 channel = ctxt->vctxt.warning;
353             else
354                 channel = ctxt->vctxt.error;
355             data = ctxt->vctxt.userData;
356         } else {
357             if (level == XML_ERR_WARNING)
358                 channel = ctxt->sax->warning;
359             else
360                 channel = ctxt->sax->error;
361             data = ctxt->userData;
362         }
363     }
364 
365     if (ctxt->input != NULL) {
366         xmlParserInputPtr input = ctxt->input;
367 
368         if ((input->filename == NULL) &&
369             (ctxt->inputNr > 1)) {
370             input = ctxt->inputTab[ctxt->inputNr - 2];
371         }
372         file = input->filename;
373         line = input->line;
374         col = input->col;
375     }
376 
377     res = xmlVRaiseError(schannel, channel, data, ctxt, node, domain, code,
378                          level, file, line, (const char *) str1,
379                          (const char *) str2, (const char *) str3, int1, col,
380                          msg, ap);
381 
382     if (res < 0) {
383         xmlCtxtErrMemory(ctxt);
384         return;
385     }
386 
387 done:
388     if (level >= XML_ERR_ERROR)
389         ctxt->errNo = code;
390     if (level == XML_ERR_FATAL) {
391         ctxt->wellFormed = 0;
392 
393         if (xmlCtxtIsCatastrophicError(ctxt))
394             ctxt->disableSAX = 2; /* stop parser */
395         else if (ctxt->recovery == 0)
396             ctxt->disableSAX = 1;
397     }
398 }
399 
400 /**
401  * xmlCtxtErr:
402  * @ctxt:  a parser context
403  * @node: the current node or NULL
404  * @domain: the domain for the error
405  * @code: the code for the error
406  * @level: the xmlErrorLevel for the error
407  * @str1: extra string info
408  * @str2: extra string info
409  * @str3: extra string info
410  * @int1: extra int info
411  * @msg:  the message to display/transmit
412  * @...:  extra parameters for the message display
413  *
414  * Raise a parser error.
415  */
416 void
xmlCtxtErr(xmlParserCtxtPtr ctxt,xmlNodePtr node,xmlErrorDomain domain,xmlParserErrors code,xmlErrorLevel level,const xmlChar * str1,const xmlChar * str2,const xmlChar * str3,int int1,const char * msg,...)417 xmlCtxtErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
418            xmlParserErrors code, xmlErrorLevel level,
419            const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
420            int int1, const char *msg, ...)
421 {
422     va_list ap;
423 
424     va_start(ap, msg);
425     xmlCtxtVErr(ctxt, node, domain, code, level,
426                 str1, str2, str3, int1, msg, ap);
427     va_end(ap);
428 }
429 
430 /**
431  * xmlCtxtGetStatus:
432  * @ctxt:  an XML parser context
433  *
434  * Get well-formedness and validation status after parsing. Also
435  * reports catastrophic errors which are not related to parsing
436  * like out-of-memory, I/O or other errors.
437  *
438  * Available since 2.14.0.
439  *
440  * Returns a bitmask of XML_STATUS_* flags ORed together.
441  */
442 int
xmlCtxtGetStatus(xmlParserCtxt * ctxt)443 xmlCtxtGetStatus(xmlParserCtxt *ctxt) {
444     int bits = 0;
445 
446     if (xmlCtxtIsCatastrophicError(ctxt)) {
447         bits |= XML_STATUS_CATASTROPHIC_ERROR |
448                 XML_STATUS_NOT_WELL_FORMED |
449                 XML_STATUS_NOT_NS_WELL_FORMED;
450         if ((ctxt != NULL) && (ctxt->validate))
451             bits |= XML_STATUS_DTD_VALIDATION_FAILED;
452 
453         return(bits);
454     }
455 
456     if (!ctxt->wellFormed)
457         bits |= XML_STATUS_NOT_WELL_FORMED;
458     if (!ctxt->nsWellFormed)
459         bits |= XML_STATUS_NOT_NS_WELL_FORMED;
460     if ((ctxt->validate) && (!ctxt->valid))
461         bits |= XML_STATUS_DTD_VALIDATION_FAILED;
462 
463     return(bits);
464 }
465 
466 /**
467  * xmlFatalErr:
468  * @ctxt:  an XML parser context
469  * @code:  the error number
470  * @info:  extra information string
471  *
472  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
473  */
474 void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors code,const char * info)475 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors code, const char *info)
476 {
477     const char *errmsg;
478 
479     errmsg = xmlErrString(code);
480 
481     if (info == NULL) {
482         xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, XML_ERR_FATAL,
483                    NULL, NULL, NULL, 0, "%s\n", errmsg);
484     } else {
485         xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, XML_ERR_FATAL,
486                    (const xmlChar *) info, NULL, NULL, 0,
487                    "%s: %s\n", errmsg, info);
488     }
489 }
490 
491 /**
492  * xmlIsLetter:
493  * @c:  an unicode character (int)
494  *
495  * DEPRECATED: Internal function, don't use.
496  *
497  * Check whether the character is allowed by the production
498  * [84] Letter ::= BaseChar | Ideographic
499  *
500  * Returns 0 if not, non-zero otherwise
501  */
502 int
xmlIsLetter(int c)503 xmlIsLetter(int c) {
504     return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
505 }
506 
507 /************************************************************************
508  *									*
509  *		Input handling functions for progressive parsing	*
510  *									*
511  ************************************************************************/
512 
513 /* we need to keep enough input to show errors in context */
514 #define LINE_LEN        80
515 
516 /**
517  * xmlHaltParser:
518  * @ctxt:  an XML parser context
519  *
520  * Blocks further parser processing don't override error
521  * for internal use
522  */
523 void
xmlHaltParser(xmlParserCtxtPtr ctxt)524 xmlHaltParser(xmlParserCtxtPtr ctxt) {
525     if (ctxt == NULL)
526         return;
527     ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
528     ctxt->disableSAX = 2;
529 }
530 
531 /**
532  * xmlParserInputRead:
533  * @in:  an XML parser input
534  * @len:  an indicative size for the lookahead
535  *
536  * DEPRECATED: This function was internal and is deprecated.
537  *
538  * Returns -1 as this is an error to use it.
539  */
540 int
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED,int len ATTRIBUTE_UNUSED)541 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
542     return(-1);
543 }
544 
545 /**
546  * xmlParserGrow:
547  * @ctxt:  an XML parser context
548  *
549  * Grow the input buffer.
550  *
551  * Returns the number of bytes read or -1 in case of error.
552  */
553 int
xmlParserGrow(xmlParserCtxtPtr ctxt)554 xmlParserGrow(xmlParserCtxtPtr ctxt) {
555     xmlParserInputPtr in = ctxt->input;
556     xmlParserInputBufferPtr buf = in->buf;
557     size_t curEnd = in->end - in->cur;
558     size_t curBase = in->cur - in->base;
559     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
560                        XML_MAX_HUGE_LENGTH :
561                        XML_MAX_LOOKUP_LIMIT;
562     int ret;
563 
564     if (buf == NULL)
565         return(0);
566     /* Don't grow push parser buffer. */
567     if (PARSER_PROGRESSIVE(ctxt))
568         return(0);
569     /* Don't grow memory buffers. */
570     if ((buf->encoder == NULL) && (buf->readcallback == NULL))
571         return(0);
572     if (buf->error != 0)
573         return(-1);
574 
575     if (curBase > maxLength) {
576         xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
577                     "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
578         xmlHaltParser(ctxt);
579 	return(-1);
580     }
581 
582     if (curEnd >= INPUT_CHUNK)
583         return(0);
584 
585     ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
586     xmlBufUpdateInput(buf->buffer, in, curBase);
587 
588     if (ret < 0) {
589         xmlCtxtErrIO(ctxt, buf->error, NULL);
590     }
591 
592     return(ret);
593 }
594 
595 /**
596  * xmlParserInputGrow:
597  * @in:  an XML parser input
598  * @len:  an indicative size for the lookahead
599  *
600  * DEPRECATED: Don't use.
601  *
602  * This function increase the input for the parser. It tries to
603  * preserve pointers to the input buffer, and keep already read data
604  *
605  * Returns the amount of char read, or -1 in case of error, 0 indicate the
606  * end of this entity
607  */
608 int
xmlParserInputGrow(xmlParserInputPtr in,int len)609 xmlParserInputGrow(xmlParserInputPtr in, int len) {
610     int ret;
611     size_t indx;
612 
613     if ((in == NULL) || (len < 0)) return(-1);
614     if (in->buf == NULL) return(-1);
615     if (in->base == NULL) return(-1);
616     if (in->cur == NULL) return(-1);
617     if (in->buf->buffer == NULL) return(-1);
618 
619     /* Don't grow memory buffers. */
620     if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
621         return(0);
622 
623     indx = in->cur - in->base;
624     if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
625         return(0);
626     }
627     ret = xmlParserInputBufferGrow(in->buf, len);
628 
629     in->base = xmlBufContent(in->buf->buffer);
630     if (in->base == NULL) {
631         in->base = BAD_CAST "";
632         in->cur = in->base;
633         in->end = in->base;
634         return(-1);
635     }
636     in->cur = in->base + indx;
637     in->end = xmlBufEnd(in->buf->buffer);
638 
639     return(ret);
640 }
641 
642 /**
643  * xmlParserShrink:
644  * @ctxt:  an XML parser context
645  *
646  * Shrink the input buffer.
647  */
648 void
xmlParserShrink(xmlParserCtxtPtr ctxt)649 xmlParserShrink(xmlParserCtxtPtr ctxt) {
650     xmlParserInputPtr in = ctxt->input;
651     xmlParserInputBufferPtr buf = in->buf;
652     size_t used, res;
653 
654     if (buf == NULL)
655         return;
656 
657     used = in->cur - in->base;
658 
659     if (used > LINE_LEN) {
660         res = xmlBufShrink(buf->buffer, used - LINE_LEN);
661 
662         if (res > 0) {
663             used -= res;
664             if ((res > ULONG_MAX) ||
665                 (in->consumed > ULONG_MAX - (unsigned long)res))
666                 in->consumed = ULONG_MAX;
667             else
668                 in->consumed += res;
669         }
670 
671         xmlBufUpdateInput(buf->buffer, in, used);
672     }
673 }
674 
675 /**
676  * xmlParserInputShrink:
677  * @in:  an XML parser input
678  *
679  * DEPRECATED: Don't use.
680  *
681  * This function removes used input for the parser.
682  */
683 void
xmlParserInputShrink(xmlParserInputPtr in)684 xmlParserInputShrink(xmlParserInputPtr in) {
685     size_t used;
686     size_t ret;
687 
688     if (in == NULL) return;
689     if (in->buf == NULL) return;
690     if (in->base == NULL) return;
691     if (in->cur == NULL) return;
692     if (in->buf->buffer == NULL) return;
693 
694     used = in->cur - in->base;
695 
696     if (used > LINE_LEN) {
697 	ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
698 	if (ret > 0) {
699             used -= ret;
700             if ((ret > ULONG_MAX) ||
701                 (in->consumed > ULONG_MAX - (unsigned long)ret))
702                 in->consumed = ULONG_MAX;
703             else
704                 in->consumed += ret;
705 	}
706 
707         xmlBufUpdateInput(in->buf->buffer, in, used);
708     }
709 }
710 
711 /************************************************************************
712  *									*
713  *		UTF8 character input and related functions		*
714  *									*
715  ************************************************************************/
716 
717 /**
718  * xmlNextChar:
719  * @ctxt:  the XML parser context
720  *
721  * DEPRECATED: Internal function, do not use.
722  *
723  * Skip to the next char input char.
724  */
725 
726 void
xmlNextChar(xmlParserCtxtPtr ctxt)727 xmlNextChar(xmlParserCtxtPtr ctxt)
728 {
729     const unsigned char *cur;
730     size_t avail;
731     int c;
732 
733     if ((ctxt == NULL) || (ctxt->input == NULL))
734         return;
735 
736     avail = ctxt->input->end - ctxt->input->cur;
737 
738     if (avail < INPUT_CHUNK) {
739         xmlParserGrow(ctxt);
740         if (ctxt->input->cur >= ctxt->input->end)
741             return;
742         avail = ctxt->input->end - ctxt->input->cur;
743     }
744 
745     cur = ctxt->input->cur;
746     c = *cur;
747 
748     if (c < 0x80) {
749         if (c == '\n') {
750             ctxt->input->cur++;
751             ctxt->input->line++;
752             ctxt->input->col = 1;
753         } else if (c == '\r') {
754             /*
755              *   2.11 End-of-Line Handling
756              *   the literal two-character sequence "#xD#xA" or a standalone
757              *   literal #xD, an XML processor must pass to the application
758              *   the single character #xA.
759              */
760             ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
761             ctxt->input->line++;
762             ctxt->input->col = 1;
763             return;
764         } else {
765             ctxt->input->cur++;
766             ctxt->input->col++;
767         }
768     } else {
769         ctxt->input->col++;
770 
771         if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
772             goto encoding_error;
773 
774         if (c < 0xe0) {
775             /* 2-byte code */
776             if (c < 0xc2)
777                 goto encoding_error;
778             ctxt->input->cur += 2;
779         } else {
780             unsigned int val = (c << 8) | cur[1];
781 
782             if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
783                 goto encoding_error;
784 
785             if (c < 0xf0) {
786                 /* 3-byte code */
787                 if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
788                     goto encoding_error;
789                 ctxt->input->cur += 3;
790             } else {
791                 if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
792                     goto encoding_error;
793 
794                 /* 4-byte code */
795                 if ((val < 0xf090) || (val >= 0xf490))
796                     goto encoding_error;
797                 ctxt->input->cur += 4;
798             }
799         }
800     }
801 
802     return;
803 
804 encoding_error:
805     /* Only report the first error */
806     if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
807         xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
808         ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
809     }
810     ctxt->input->cur++;
811 }
812 
813 /**
814  * xmlCurrentChar:
815  * @ctxt:  the XML parser context
816  * @len:  pointer to the length of the char read
817  *
818  * DEPRECATED: Internal function, do not use.
819  *
820  * The current char value, if using UTF-8 this may actually span multiple
821  * bytes in the input buffer. Implement the end of line normalization:
822  * 2.11 End-of-Line Handling
823  * Wherever an external parsed entity or the literal entity value
824  * of an internal parsed entity contains either the literal two-character
825  * sequence "#xD#xA" or a standalone literal #xD, an XML processor
826  * must pass to the application the single character #xA.
827  * This behavior can conveniently be produced by normalizing all
828  * line breaks to #xA on input, before parsing.)
829  *
830  * Returns the current char value and its length
831  */
832 
833 int
xmlCurrentChar(xmlParserCtxtPtr ctxt,int * len)834 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
835     const unsigned char *cur;
836     size_t avail;
837     int c;
838 
839     if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
840 
841     avail = ctxt->input->end - ctxt->input->cur;
842 
843     if (avail < INPUT_CHUNK) {
844         xmlParserGrow(ctxt);
845         avail = ctxt->input->end - ctxt->input->cur;
846     }
847 
848     cur = ctxt->input->cur;
849     c = *cur;
850 
851     if (c < 0x80) {
852 	/* 1-byte code */
853         if (c < 0x20) {
854             /*
855              *   2.11 End-of-Line Handling
856              *   the literal two-character sequence "#xD#xA" or a standalone
857              *   literal #xD, an XML processor must pass to the application
858              *   the single character #xA.
859              */
860             if (c == '\r') {
861                 /*
862                  * TODO: This function shouldn't change the 'cur' pointer
863                  * as side effect, but the NEXTL macro in parser.c relies
864                  * on this behavior when incrementing line numbers.
865                  */
866                 if (cur[1] == '\n')
867                     ctxt->input->cur++;
868                 *len = 1;
869                 c = '\n';
870             } else if (c == 0) {
871                 if (ctxt->input->cur >= ctxt->input->end) {
872                     *len = 0;
873                 } else {
874                     *len = 1;
875                     /*
876                      * TODO: Null bytes should be handled by callers,
877                      * but this can be tricky.
878                      */
879                     xmlFatalErr(ctxt, XML_ERR_INVALID_CHAR,
880                             "Char 0x0 out of allowed range\n");
881                 }
882             } else {
883                 *len = 1;
884             }
885         } else {
886             *len = 1;
887         }
888 
889         return(c);
890     } else {
891         int val;
892 
893         if (avail < 2)
894             goto incomplete_sequence;
895         if ((cur[1] & 0xc0) != 0x80)
896             goto encoding_error;
897 
898         if (c < 0xe0) {
899             /* 2-byte code */
900             if (c < 0xc2)
901                 goto encoding_error;
902             val = (c & 0x1f) << 6;
903             val |= cur[1] & 0x3f;
904             *len = 2;
905         } else {
906             if (avail < 3)
907                 goto incomplete_sequence;
908             if ((cur[2] & 0xc0) != 0x80)
909                 goto encoding_error;
910 
911             if (c < 0xf0) {
912                 /* 3-byte code */
913                 val = (c & 0xf) << 12;
914                 val |= (cur[1] & 0x3f) << 6;
915                 val |= cur[2] & 0x3f;
916                 if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
917                     goto encoding_error;
918                 *len = 3;
919             } else {
920                 if (avail < 4)
921                     goto incomplete_sequence;
922                 if ((cur[3] & 0xc0) != 0x80)
923                     goto encoding_error;
924 
925                 /* 4-byte code */
926                 val = (c & 0x0f) << 18;
927                 val |= (cur[1] & 0x3f) << 12;
928                 val |= (cur[2] & 0x3f) << 6;
929                 val |= cur[3] & 0x3f;
930                 if ((val < 0x10000) || (val >= 0x110000))
931                     goto encoding_error;
932                 *len = 4;
933             }
934         }
935 
936         return(val);
937     }
938 
939 encoding_error:
940     /* Only report the first error */
941     if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
942         xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
943         ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
944     }
945     *len = 1;
946     return(XML_INVALID_CHAR);
947 
948 incomplete_sequence:
949     /*
950      * An encoding problem may arise from a truncated input buffer
951      * splitting a character in the middle. In that case do not raise
952      * an error but return 0. This should only happen when push parsing
953      * char data.
954      */
955     *len = 0;
956     return(0);
957 }
958 
959 /**
960  * xmlStringCurrentChar:
961  * @ctxt:  the XML parser context
962  * @cur:  pointer to the beginning of the char
963  * @len:  pointer to the length of the char read
964  *
965  * DEPRECATED: Internal function, do not use.
966  *
967  * The current char value, if using UTF-8 this may actually span multiple
968  * bytes in the input buffer.
969  *
970  * Returns the current char value and its length
971  */
972 
973 int
xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,const xmlChar * cur,int * len)974 xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
975                      const xmlChar *cur, int *len) {
976     int c;
977 
978     if ((cur == NULL) || (len == NULL))
979         return(0);
980 
981     /* cur is zero-terminated, so we can lie about its length. */
982     *len = 4;
983     c = xmlGetUTF8Char(cur, len);
984 
985     return((c < 0) ? 0 : c);
986 }
987 
988 /**
989  * xmlCopyCharMultiByte:
990  * @out:  pointer to an array of xmlChar
991  * @val:  the char value
992  *
993  * append the char value in the array
994  *
995  * Returns the number of xmlChar written
996  */
997 int
xmlCopyCharMultiByte(xmlChar * out,int val)998 xmlCopyCharMultiByte(xmlChar *out, int val) {
999     if ((out == NULL) || (val < 0)) return(0);
1000     /*
1001      * We are supposed to handle UTF8, check it's valid
1002      * From rfc2044: encoding of the Unicode values on UTF-8:
1003      *
1004      * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
1005      * 0000 0000-0000 007F   0xxxxxxx
1006      * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1007      * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1008      */
1009     if  (val >= 0x80) {
1010 	xmlChar *savedout = out;
1011 	int bits;
1012 	if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
1013 	else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
1014 	else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
1015 	else {
1016 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1017             xmlAbort("xmlCopyCharMultiByte: codepoint out of range\n");
1018 #endif
1019 	    return(0);
1020 	}
1021 	for ( ; bits >= 0; bits-= 6)
1022 	    *out++= ((val >> bits) & 0x3F) | 0x80 ;
1023 	return (out - savedout);
1024     }
1025     *out = val;
1026     return 1;
1027 }
1028 
1029 /**
1030  * xmlCopyChar:
1031  * @len:  Ignored, compatibility
1032  * @out:  pointer to an array of xmlChar
1033  * @val:  the char value
1034  *
1035  * DEPRECATED: Don't use.
1036  *
1037  * append the char value in the array
1038  *
1039  * Returns the number of xmlChar written
1040  */
1041 
1042 int
xmlCopyChar(int len ATTRIBUTE_UNUSED,xmlChar * out,int val)1043 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1044     if ((out == NULL) || (val < 0)) return(0);
1045     /* the len parameter is ignored */
1046     if  (val >= 0x80) {
1047 	return(xmlCopyCharMultiByte (out, val));
1048     }
1049     *out = val;
1050     return 1;
1051 }
1052 
1053 /************************************************************************
1054  *									*
1055  *		Commodity functions to switch encodings			*
1056  *									*
1057  ************************************************************************/
1058 
1059 /**
1060  * xmlCtxtSetCharEncConvImpl:
1061  * @ctxt:  parser context
1062  * @impl:  callback
1063  * @vctxt:  user data
1064  *
1065  * Installs a custom implementation to convert between character
1066  * encodings.
1067  *
1068  * This bypasses legacy feature like global encoding handlers or
1069  * encoding aliases.
1070  *
1071  * Available since 2.14.0.
1072  */
1073 void
xmlCtxtSetCharEncConvImpl(xmlParserCtxtPtr ctxt,xmlCharEncConvImpl impl,void * vctxt)1074 xmlCtxtSetCharEncConvImpl(xmlParserCtxtPtr ctxt, xmlCharEncConvImpl impl,
1075                           void *vctxt) {
1076     if (ctxt == NULL)
1077         return;
1078 
1079     ctxt->convImpl = impl;
1080     ctxt->convCtxt = vctxt;
1081 }
1082 
1083 static int
xmlDetectEBCDIC(xmlParserCtxtPtr ctxt,xmlCharEncodingHandlerPtr * hout)1084 xmlDetectEBCDIC(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr *hout) {
1085     xmlChar out[200];
1086     xmlParserInputPtr input = ctxt->input;
1087     xmlCharEncodingHandlerPtr handler;
1088     int inlen, outlen, res, i;
1089 
1090     *hout = NULL;
1091 
1092     /*
1093      * To detect the EBCDIC code page, we convert the first 200 bytes
1094      * to IBM037 (EBCDIC-US) and try to find the encoding declaration.
1095      */
1096     res = xmlCreateCharEncodingHandler("IBM037", /* output */ 0,
1097             ctxt->convImpl, ctxt->convCtxt, &handler);
1098     if (res != 0)
1099         return(res);
1100     outlen = sizeof(out) - 1;
1101     inlen = input->end - input->cur;
1102     res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
1103     /*
1104      * Return the EBCDIC handler if decoding failed. The error will
1105      * be reported later.
1106      */
1107     if (res < 0)
1108         goto done;
1109     out[outlen] = 0;
1110 
1111     for (i = 0; i < outlen; i++) {
1112         if (out[i] == '>')
1113             break;
1114         if ((out[i] == 'e') &&
1115             (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1116             int start, cur, quote;
1117 
1118             i += 8;
1119             while (IS_BLANK_CH(out[i]))
1120                 i += 1;
1121             if (out[i++] != '=')
1122                 break;
1123             while (IS_BLANK_CH(out[i]))
1124                 i += 1;
1125             quote = out[i++];
1126             if ((quote != '\'') && (quote != '"'))
1127                 break;
1128             start = i;
1129             cur = out[i];
1130             while (((cur >= 'a') && (cur <= 'z')) ||
1131                    ((cur >= 'A') && (cur <= 'Z')) ||
1132                    ((cur >= '0') && (cur <= '9')) ||
1133                    (cur == '.') || (cur == '_') ||
1134                    (cur == '-'))
1135                 cur = out[++i];
1136             if (cur != quote)
1137                 break;
1138             out[i] = 0;
1139             xmlCharEncCloseFunc(handler);
1140             res = xmlCreateCharEncodingHandler((char *) out + start,
1141                     /* output */ 0, ctxt->convImpl, ctxt->convCtxt,
1142                     &handler);
1143             if (res != 0)
1144                 return(res);
1145             *hout = handler;
1146             return(0);
1147         }
1148     }
1149 
1150 done:
1151     /*
1152      * Encoding handlers are stateful, so we have to recreate them.
1153      */
1154     xmlCharEncCloseFunc(handler);
1155     res = xmlCreateCharEncodingHandler("IBM037", /* output */ 0,
1156             ctxt->convImpl, ctxt->convCtxt, &handler);
1157     if (res != 0)
1158         return(res);
1159     *hout = handler;
1160     return(0);
1161 }
1162 
1163 /**
1164  * xmlSwitchEncoding:
1165  * @ctxt:  the parser context
1166  * @enc:  the encoding value (number)
1167  *
1168  * Use encoding specified by enum to decode input data. This overrides
1169  * the encoding found in the XML declaration.
1170  *
1171  * This function can also be used to override the encoding of chunks
1172  * passed to xmlParseChunk.
1173  *
1174  * Returns 0 in case of success, -1 otherwise
1175  */
1176 int
xmlSwitchEncoding(xmlParserCtxtPtr ctxt,xmlCharEncoding enc)1177 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1178 {
1179     xmlCharEncodingHandlerPtr handler = NULL;
1180     int ret;
1181     int res;
1182 
1183     if ((ctxt == NULL) || (ctxt->input == NULL))
1184         return(-1);
1185 
1186     res = xmlLookupCharEncodingHandler(enc, &handler);
1187     if (res != 0) {
1188         xmlFatalErr(ctxt, res, NULL);
1189         return(-1);
1190     }
1191 
1192     ret = xmlSwitchToEncoding(ctxt, handler);
1193 
1194     if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1195         ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1196     }
1197 
1198     return(ret);
1199 }
1200 
1201 /**
1202  * xmlSwitchInputEncodingName:
1203  * @ctxt:  the parser context
1204  * @input:  the input strea,
1205  * @encoding:  the encoding name
1206  *
1207  * Returns 0 in case of success, -1 otherwise
1208  */
1209 static int
xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,const char * encoding)1210 xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1211                            const char *encoding) {
1212     xmlCharEncodingHandlerPtr handler;
1213     int res;
1214 
1215     if (encoding == NULL)
1216         return(-1);
1217 
1218     res = xmlCreateCharEncodingHandler(encoding, /* output */ 0,
1219             ctxt->convImpl, ctxt->convCtxt, &handler);
1220     if (res == XML_ERR_UNSUPPORTED_ENCODING) {
1221         xmlWarningMsg(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1222                       "Unsupported encoding: %s\n", BAD_CAST encoding, NULL);
1223         return(-1);
1224     } else if (res != XML_ERR_OK) {
1225         xmlFatalErr(ctxt, res, encoding);
1226         return(-1);
1227     }
1228 
1229     res  = xmlInputSetEncodingHandler(input, handler);
1230     if (res != XML_ERR_OK) {
1231         xmlCtxtErrIO(ctxt, res, NULL);
1232         return(-1);
1233     }
1234 
1235     return(0);
1236 }
1237 
1238 /**
1239  * xmlSwitchEncodingName:
1240  * @ctxt:  the parser context
1241  * @encoding:  the encoding name
1242  *
1243  * Use specified encoding to decode input data. This overrides the
1244  * encoding found in the XML declaration.
1245  *
1246  * This function can also be used to override the encoding of chunks
1247  * passed to xmlParseChunk.
1248  *
1249  * Available since 2.13.0.
1250  *
1251  * Returns 0 in case of success, -1 otherwise
1252  */
1253 int
xmlSwitchEncodingName(xmlParserCtxtPtr ctxt,const char * encoding)1254 xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding) {
1255     if (ctxt == NULL)
1256         return(-1);
1257 
1258     return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding));
1259 }
1260 
1261 /**
1262  * xmlInputSetEncodingHandler:
1263  * @input:  the input stream
1264  * @handler:  the encoding handler
1265  *
1266  * Use encoding handler to decode input data.
1267  *
1268  * Closes the handler on error.
1269  *
1270  * Returns an xmlParserErrors code.
1271  */
1272 int
xmlInputSetEncodingHandler(xmlParserInputPtr input,xmlCharEncodingHandlerPtr handler)1273 xmlInputSetEncodingHandler(xmlParserInputPtr input,
1274                            xmlCharEncodingHandlerPtr handler) {
1275     xmlParserInputBufferPtr in;
1276     xmlBufPtr buf;
1277     int code = XML_ERR_OK;
1278 
1279     if ((input == NULL) || (input->buf == NULL)) {
1280         xmlCharEncCloseFunc(handler);
1281 	return(XML_ERR_ARGUMENT);
1282     }
1283     in = input->buf;
1284 
1285     input->flags |= XML_INPUT_HAS_ENCODING;
1286 
1287     /*
1288      * UTF-8 requires no encoding handler.
1289      */
1290     if ((handler != NULL) &&
1291         (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1292         xmlCharEncCloseFunc(handler);
1293         handler = NULL;
1294     }
1295 
1296     if (in->encoder == handler)
1297         return(XML_ERR_OK);
1298 
1299     if (in->encoder != NULL) {
1300         /*
1301          * Switching encodings during parsing is a really bad idea,
1302          * but Chromium can switch between ISO-8859-1 and UTF-16 before
1303          * separate calls to xmlParseChunk.
1304          *
1305          * TODO: We should check whether the "raw" input buffer is empty and
1306          * convert the old content using the old encoder.
1307          */
1308 
1309         xmlCharEncCloseFunc(in->encoder);
1310         in->encoder = handler;
1311         return(XML_ERR_OK);
1312     }
1313 
1314     buf = xmlBufCreate(XML_IO_BUFFER_SIZE);
1315     if (buf == NULL) {
1316         xmlCharEncCloseFunc(handler);
1317         return(XML_ERR_NO_MEMORY);
1318     }
1319 
1320     in->encoder = handler;
1321     in->raw = in->buffer;
1322     in->buffer = buf;
1323 
1324     /*
1325      * Is there already some content down the pipe to convert ?
1326      */
1327     if (input->end > input->base) {
1328         size_t processed;
1329         size_t nbchars;
1330         int res;
1331 
1332         /*
1333          * Shrink the current input buffer.
1334          * Move it as the raw buffer and create a new input buffer
1335          */
1336         processed = input->cur - input->base;
1337         xmlBufShrink(in->raw, processed);
1338         input->consumed += processed;
1339         in->rawconsumed = processed;
1340 
1341         nbchars = 4000 /* MINLEN */;
1342         res = xmlCharEncInput(in, &nbchars);
1343         if (res < 0)
1344             code = in->error;
1345     }
1346 
1347     xmlBufResetInput(in->buffer, input);
1348 
1349     return(code);
1350 }
1351 
1352 /**
1353  * xmlSwitchInputEncoding:
1354  * @ctxt:  the parser context, only for error reporting
1355  * @input:  the input stream
1356  * @handler:  the encoding handler
1357  *
1358  * DEPRECATED: Internal function, don't use.
1359  *
1360  * Use encoding handler to decode input data.
1361  *
1362  * Returns 0 in case of success, -1 otherwise
1363  */
1364 int
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,xmlCharEncodingHandlerPtr handler)1365 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1366                        xmlCharEncodingHandlerPtr handler) {
1367     int code = xmlInputSetEncodingHandler(input, handler);
1368 
1369     if (code != XML_ERR_OK) {
1370         xmlCtxtErrIO(ctxt, code, NULL);
1371         return(-1);
1372     }
1373 
1374     return(0);
1375 }
1376 
1377 /**
1378  * xmlSwitchToEncoding:
1379  * @ctxt:  the parser context
1380  * @handler:  the encoding handler
1381  *
1382  * Use encoding handler to decode input data.
1383  *
1384  * This function can be used to enforce the encoding of chunks passed
1385  * to xmlParseChunk.
1386  *
1387  * Returns 0 in case of success, -1 otherwise
1388  */
1389 int
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt,xmlCharEncodingHandlerPtr handler)1390 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1391 {
1392     int code;
1393 
1394     if (ctxt == NULL)
1395         return(-1);
1396 
1397     code = xmlInputSetEncodingHandler(ctxt->input, handler);
1398     if (code != XML_ERR_OK) {
1399         xmlCtxtErrIO(ctxt, code, NULL);
1400         return(-1);
1401     }
1402 
1403     return(0);
1404 }
1405 
1406 /**
1407  * xmlDetectEncoding:
1408  * @ctxt:  the parser context
1409  *
1410  * Handle optional BOM, detect and switch to encoding.
1411  *
1412  * Assumes that there are at least four bytes in the input buffer.
1413  */
1414 void
xmlDetectEncoding(xmlParserCtxtPtr ctxt)1415 xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
1416     const xmlChar *in;
1417     xmlCharEncoding enc;
1418     int bomSize;
1419     int autoFlag = 0;
1420 
1421     if (xmlParserGrow(ctxt) < 0)
1422         return;
1423     in = ctxt->input->cur;
1424     if (ctxt->input->end - in < 4)
1425         return;
1426 
1427     if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1428         /*
1429          * If the encoding was already set, only skip the BOM which was
1430          * possibly decoded to UTF-8.
1431          */
1432         if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1433             ctxt->input->cur += 3;
1434         }
1435 
1436         return;
1437     }
1438 
1439     enc = XML_CHAR_ENCODING_NONE;
1440     bomSize = 0;
1441 
1442     switch (in[0]) {
1443         case 0x00:
1444             if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1445                 enc = XML_CHAR_ENCODING_UCS4BE;
1446                 autoFlag = XML_INPUT_AUTO_OTHER;
1447             } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1448                 enc = XML_CHAR_ENCODING_UTF16BE;
1449                 autoFlag = XML_INPUT_AUTO_UTF16BE;
1450             }
1451             break;
1452 
1453         case 0x3C:
1454             if (in[1] == 0x00) {
1455                 if ((in[2] == 0x00) && (in[3] == 0x00)) {
1456                     enc = XML_CHAR_ENCODING_UCS4LE;
1457                     autoFlag = XML_INPUT_AUTO_OTHER;
1458                 } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1459                     enc = XML_CHAR_ENCODING_UTF16LE;
1460                     autoFlag = XML_INPUT_AUTO_UTF16LE;
1461                 }
1462             }
1463             break;
1464 
1465         case 0x4C:
1466 	    if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1467 	        enc = XML_CHAR_ENCODING_EBCDIC;
1468                 autoFlag = XML_INPUT_AUTO_OTHER;
1469             }
1470             break;
1471 
1472         case 0xEF:
1473             if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1474                 enc = XML_CHAR_ENCODING_UTF8;
1475                 autoFlag = XML_INPUT_AUTO_UTF8;
1476                 bomSize = 3;
1477             }
1478             break;
1479 
1480         case 0xFE:
1481             if (in[1] == 0xFF) {
1482                 enc = XML_CHAR_ENCODING_UTF16BE;
1483                 autoFlag = XML_INPUT_AUTO_UTF16BE;
1484                 bomSize = 2;
1485             }
1486             break;
1487 
1488         case 0xFF:
1489             if (in[1] == 0xFE) {
1490                 enc = XML_CHAR_ENCODING_UTF16LE;
1491                 autoFlag = XML_INPUT_AUTO_UTF16LE;
1492                 bomSize = 2;
1493             }
1494             break;
1495     }
1496 
1497     if (bomSize > 0) {
1498         ctxt->input->cur += bomSize;
1499     }
1500 
1501     if (enc != XML_CHAR_ENCODING_NONE) {
1502         ctxt->input->flags |= autoFlag;
1503 
1504         if (enc == XML_CHAR_ENCODING_EBCDIC) {
1505             xmlCharEncodingHandlerPtr handler;
1506             int res;
1507 
1508             res = xmlDetectEBCDIC(ctxt, &handler);
1509             if (res != XML_ERR_OK) {
1510                 xmlFatalErr(ctxt, res, "detecting EBCDIC\n");
1511             } else {
1512                 xmlSwitchToEncoding(ctxt, handler);
1513             }
1514         } else {
1515             xmlSwitchEncoding(ctxt, enc);
1516         }
1517     }
1518 }
1519 
1520 /**
1521  * xmlSetDeclaredEncoding:
1522  * @ctxt:  the parser context
1523  * @encoding:  declared encoding
1524  *
1525  * Set the encoding from a declaration in the document.
1526  *
1527  * If no encoding was set yet, switch the encoding. Otherwise, only warn
1528  * about encoding mismatches.
1529  *
1530  * Takes ownership of 'encoding'.
1531  */
1532 void
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt,xmlChar * encoding)1533 xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
1534     if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1535         ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1536         xmlCharEncodingHandlerPtr handler;
1537         int res;
1538 
1539         /*
1540          * xmlSwitchEncodingName treats unsupported encodings as
1541          * warnings, but we want it to be an error in an encoding
1542          * declaration.
1543          */
1544         res = xmlCreateCharEncodingHandler((const char *) encoding,
1545                 /* output */ 0, ctxt->convImpl, ctxt->convCtxt, &handler);
1546         if (res != XML_ERR_OK) {
1547             xmlFatalErr(ctxt, res, (const char *) encoding);
1548             xmlFree(encoding);
1549             return;
1550         }
1551 
1552         res  = xmlInputSetEncodingHandler(ctxt->input, handler);
1553         if (res != XML_ERR_OK) {
1554             xmlCtxtErrIO(ctxt, res, NULL);
1555             xmlFree(encoding);
1556             return;
1557         }
1558 
1559         ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1560     } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1561         static const char *allowedUTF8[] = {
1562             "UTF-8", "UTF8", NULL
1563         };
1564         static const char *allowedUTF16LE[] = {
1565             "UTF-16", "UTF-16LE", "UTF16", NULL
1566         };
1567         static const char *allowedUTF16BE[] = {
1568             "UTF-16", "UTF-16BE", "UTF16", NULL
1569         };
1570         const char **allowed = NULL;
1571         const char *autoEnc = NULL;
1572 
1573         switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1574             case XML_INPUT_AUTO_UTF8:
1575                 allowed = allowedUTF8;
1576                 autoEnc = "UTF-8";
1577                 break;
1578             case XML_INPUT_AUTO_UTF16LE:
1579                 allowed = allowedUTF16LE;
1580                 autoEnc = "UTF-16LE";
1581                 break;
1582             case XML_INPUT_AUTO_UTF16BE:
1583                 allowed = allowedUTF16BE;
1584                 autoEnc = "UTF-16BE";
1585                 break;
1586         }
1587 
1588         if (allowed != NULL) {
1589             const char **p;
1590             int match = 0;
1591 
1592             for (p = allowed; *p != NULL; p++) {
1593                 if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1594                     match = 1;
1595                     break;
1596                 }
1597             }
1598 
1599             if (match == 0) {
1600                 xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1601                               "Encoding '%s' doesn't match "
1602                               "auto-detected '%s'\n",
1603                               encoding, BAD_CAST autoEnc);
1604                 xmlFree(encoding);
1605                 encoding = xmlStrdup(BAD_CAST autoEnc);
1606                 if (encoding == NULL)
1607                     xmlCtxtErrMemory(ctxt);
1608             }
1609         }
1610     }
1611 
1612     if (ctxt->encoding != NULL)
1613         xmlFree((xmlChar *) ctxt->encoding);
1614     ctxt->encoding = encoding;
1615 }
1616 
1617 /**
1618  * xmlCtxtGetDeclaredEncoding:
1619  * ctxt:  parser context
1620  *
1621  * Available since 2.14.0.
1622  *
1623  * Returns the encoding from the encoding declaration. This can differ
1624  * from the actual encoding.
1625  */
1626 const xmlChar *
xmlCtxtGetDeclaredEncoding(xmlParserCtxtPtr ctxt)1627 xmlCtxtGetDeclaredEncoding(xmlParserCtxtPtr ctxt) {
1628     if (ctxt == NULL)
1629         return(NULL);
1630 
1631     return(ctxt->encoding);
1632 }
1633 
1634 /**
1635  * xmlGetActualEncoding:
1636  * @ctxt:  the parser context
1637  *
1638  * Returns the actual used to parse the document. This can differ from
1639  * the declared encoding.
1640  */
1641 const xmlChar *
xmlGetActualEncoding(xmlParserCtxtPtr ctxt)1642 xmlGetActualEncoding(xmlParserCtxtPtr ctxt) {
1643     const xmlChar *encoding = NULL;
1644 
1645     if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
1646         (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
1647         /* Preserve encoding exactly */
1648         encoding = ctxt->encoding;
1649     } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
1650         encoding = BAD_CAST ctxt->input->buf->encoder->name;
1651     } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1652         encoding = BAD_CAST "UTF-8";
1653     }
1654 
1655     return(encoding);
1656 }
1657 
1658 /************************************************************************
1659  *									*
1660  *	Commodity functions to handle entities processing		*
1661  *									*
1662  ************************************************************************/
1663 
1664 /**
1665  * xmlFreeInputStream:
1666  * @input:  an xmlParserInputPtr
1667  *
1668  * Free up an input stream.
1669  */
1670 void
xmlFreeInputStream(xmlParserInputPtr input)1671 xmlFreeInputStream(xmlParserInputPtr input) {
1672     if (input == NULL) return;
1673 
1674     if (input->filename != NULL) xmlFree((char *) input->filename);
1675     if (input->version != NULL) xmlFree((char *) input->version);
1676     if ((input->free != NULL) && (input->base != NULL))
1677         input->free((xmlChar *) input->base);
1678     if (input->buf != NULL)
1679         xmlFreeParserInputBuffer(input->buf);
1680     xmlFree(input);
1681 }
1682 
1683 /**
1684  * xmlNewInputStream:
1685  * @ctxt:  an XML parser context
1686  *
1687  * DEPRECATED: Use xmlNewInputFromUrl or similar functions.
1688  *
1689  * Create a new input stream structure.
1690  *
1691  * Returns the new input stream or NULL
1692  */
1693 xmlParserInputPtr
xmlNewInputStream(xmlParserCtxtPtr ctxt)1694 xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1695     xmlParserInputPtr input;
1696 
1697     input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1698     if (input == NULL) {
1699         xmlCtxtErrMemory(ctxt);
1700 	return(NULL);
1701     }
1702     memset(input, 0, sizeof(xmlParserInput));
1703     input->line = 1;
1704     input->col = 1;
1705 
1706     return(input);
1707 }
1708 
1709 /**
1710  * xmlCtxtNewInputFromUrl:
1711  * @ctxt:  parser context
1712  * @url:  filename or URL
1713  * @publicId:  publid ID from doctype (optional)
1714  * @encoding:  character encoding (optional)
1715  * @flags:  unused, pass 0
1716  *
1717  * Creates a new parser input from the filesystem, the network or
1718  * a user-defined resource loader.
1719  *
1720  * Returns a new parser input.
1721  */
1722 xmlParserInputPtr
xmlCtxtNewInputFromUrl(xmlParserCtxtPtr ctxt,const char * url,const char * publicId,const char * encoding,int flags ATTRIBUTE_UNUSED)1723 xmlCtxtNewInputFromUrl(xmlParserCtxtPtr ctxt, const char *url,
1724                        const char *publicId, const char *encoding,
1725                        int flags ATTRIBUTE_UNUSED) {
1726     xmlParserInputPtr input;
1727 
1728     if ((ctxt == NULL) || (url == NULL))
1729 	return(NULL);
1730 
1731     input = xmlLoadResource(ctxt, url, publicId, XML_RESOURCE_MAIN_DOCUMENT);
1732     if (input == NULL)
1733         return(NULL);
1734 
1735     if (encoding != NULL)
1736         xmlSwitchInputEncodingName(ctxt, input, encoding);
1737 
1738     return(input);
1739 }
1740 
1741 /**
1742  * xmlNewInputInternal:
1743  * @buf:  parser input buffer
1744  * @filename:  filename or URL
1745  *
1746  * Internal helper function.
1747  *
1748  * Returns a new parser input.
1749  */
1750 static xmlParserInputPtr
xmlNewInputInternal(xmlParserInputBufferPtr buf,const char * filename)1751 xmlNewInputInternal(xmlParserInputBufferPtr buf, const char *filename) {
1752     xmlParserInputPtr input;
1753 
1754     input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1755     if (input == NULL) {
1756 	xmlFreeParserInputBuffer(buf);
1757 	return(NULL);
1758     }
1759     memset(input, 0, sizeof(xmlParserInput));
1760     input->line = 1;
1761     input->col = 1;
1762 
1763     input->buf = buf;
1764     xmlBufResetInput(input->buf->buffer, input);
1765 
1766     if (filename != NULL) {
1767         input->filename = xmlMemStrdup(filename);
1768         if (input->filename == NULL) {
1769             xmlFreeInputStream(input);
1770             return(NULL);
1771         }
1772     }
1773 
1774     return(input);
1775 }
1776 
1777 /**
1778  * xmlNewInputFromMemory:
1779  * @url:  base URL (optional)
1780  * @mem:  pointer to char array
1781  * @size:  size of array
1782  * @flags:  optimization hints
1783  *
1784  * Creates a new parser input to read from a memory area.
1785  *
1786  * @url is used as base to resolve external entities and for
1787  * error reporting.
1788  *
1789  * If the XML_INPUT_BUF_STATIC flag is set, the memory area must
1790  * stay unchanged until parsing has finished. This can avoid
1791  * temporary copies.
1792  *
1793  * If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory
1794  * area must contain a zero byte after the buffer at position @size.
1795  * This can avoid temporary copies.
1796  *
1797  * Available since 2.14.0.
1798  *
1799  * Returns a new parser input or NULL if a memory allocation failed.
1800  */
1801 xmlParserInputPtr
xmlNewInputFromMemory(const char * url,const void * mem,size_t size,int flags)1802 xmlNewInputFromMemory(const char *url, const void *mem, size_t size,
1803                       int flags) {
1804     xmlParserInputBufferPtr buf;
1805 
1806     if (mem == NULL)
1807 	return(NULL);
1808 
1809     buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE);
1810     if (buf == NULL)
1811         return(NULL);
1812 
1813     return(xmlNewInputInternal(buf, url));
1814 }
1815 
1816 /**
1817  * xmlCtxtNewInputFromMemory:
1818  * @ctxt:  parser context
1819  * @url:  base URL (optional)
1820  * @mem:  pointer to char array
1821  * @size:  size of array
1822  * @encoding:  character encoding (optional)
1823  * @flags:  optimization hints
1824  *
1825  * Returns a new parser input or NULL in case of error.
1826  */
1827 xmlParserInputPtr
xmlCtxtNewInputFromMemory(xmlParserCtxtPtr ctxt,const char * url,const void * mem,size_t size,const char * encoding,int flags)1828 xmlCtxtNewInputFromMemory(xmlParserCtxtPtr ctxt, const char *url,
1829                           const void *mem, size_t size,
1830                           const char *encoding, int flags) {
1831     xmlParserInputPtr input;
1832 
1833     if ((ctxt == NULL) || (mem == NULL))
1834 	return(NULL);
1835 
1836     input = xmlNewInputFromMemory(url, mem, size, flags);
1837     if (input == NULL) {
1838         xmlCtxtErrMemory(ctxt);
1839         return(NULL);
1840     }
1841 
1842     if (encoding != NULL)
1843         xmlSwitchInputEncodingName(ctxt, input, encoding);
1844 
1845     return(input);
1846 }
1847 
1848 /**
1849  * xmlNewInputFromString:
1850  * @url:  base URL (optional)
1851  * @str:  zero-terminated string
1852  * @flags:  optimization hints
1853  *
1854  * Creates a new parser input to read from a zero-terminated string.
1855  *
1856  * @url is used as base to resolve external entities and for
1857  * error reporting.
1858  *
1859  * If the XML_INPUT_BUF_STATIC flag is set, the string must
1860  * stay unchanged until parsing has finished. This can avoid
1861  * temporary copies.
1862  *
1863  * Available since 2.14.0.
1864  *
1865  * Returns a new parser input or NULL if a memory allocation failed.
1866  */
1867 xmlParserInputPtr
xmlNewInputFromString(const char * url,const char * str,int flags)1868 xmlNewInputFromString(const char *url, const char *str, int flags) {
1869     xmlParserInputBufferPtr buf;
1870 
1871     if (str == NULL)
1872 	return(NULL);
1873 
1874     buf = xmlNewInputBufferString(str, flags);
1875     if (buf == NULL)
1876         return(NULL);
1877 
1878     return(xmlNewInputInternal(buf, url));
1879 }
1880 
1881 /**
1882  * xmlCtxtNewInputFromString:
1883  * @ctxt:  parser context
1884  * @url:  base URL (optional)
1885  * @str:  zero-terminated string
1886  * @encoding:  character encoding (optional)
1887  * @flags:  optimization hints
1888  *
1889  * Returns a new parser input.
1890  */
1891 xmlParserInputPtr
xmlCtxtNewInputFromString(xmlParserCtxtPtr ctxt,const char * url,const char * str,const char * encoding,int flags)1892 xmlCtxtNewInputFromString(xmlParserCtxtPtr ctxt, const char *url,
1893                           const char *str, const char *encoding, int flags) {
1894     xmlParserInputPtr input;
1895 
1896     if ((ctxt == NULL) || (str == NULL))
1897 	return(NULL);
1898 
1899     input = xmlNewInputFromString(url, str, flags);
1900     if (input == NULL) {
1901         xmlCtxtErrMemory(ctxt);
1902         return(NULL);
1903     }
1904 
1905     if (encoding != NULL)
1906         xmlSwitchInputEncodingName(ctxt, input, encoding);
1907 
1908     return(input);
1909 }
1910 
1911 /**
1912  * xmlNewInputFromFd:
1913  * @url:  base URL (optional)
1914  * @fd:  file descriptor
1915  * @flags:  unused, pass 0
1916  *
1917  * Creates a new parser input to read from a zero-terminated string.
1918  *
1919  * @url is used as base to resolve external entities and for
1920  * error reporting.
1921  *
1922  * @fd is closed after parsing has finished.
1923  *
1924  * Available since 2.14.0.
1925  *
1926  * Returns a new parser input or NULL if a memory allocation failed.
1927  */
1928 xmlParserInputPtr
xmlNewInputFromFd(const char * url,int fd,int flags ATTRIBUTE_UNUSED)1929 xmlNewInputFromFd(const char *url, int fd, int flags ATTRIBUTE_UNUSED) {
1930     xmlParserInputBufferPtr buf;
1931 
1932     if (fd < 0)
1933 	return(NULL);
1934 
1935     buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
1936     if (buf == NULL)
1937         return(NULL);
1938 
1939     return(xmlNewInputInternal(buf, url));
1940 }
1941 
1942 /**
1943  * xmlCtxtNewInputFromFd:
1944  * @ctxt:  parser context
1945  * @url:  base URL (optional)
1946  * @fd:  file descriptor
1947  * @encoding:  character encoding (optional)
1948  * @flags:  unused, pass 0
1949  *
1950  * Returns a new parser input.
1951  */
1952 xmlParserInputPtr
xmlCtxtNewInputFromFd(xmlParserCtxtPtr ctxt,const char * url,int fd,const char * encoding,int flags)1953 xmlCtxtNewInputFromFd(xmlParserCtxtPtr ctxt, const char *url,
1954                       int fd, const char *encoding, int flags) {
1955     xmlParserInputPtr input;
1956 
1957     if ((ctxt == NULL) || (fd < 0))
1958 	return(NULL);
1959 
1960     input = xmlNewInputFromFd(url, fd, flags);
1961     if (input == NULL) {
1962 	xmlCtxtErrMemory(ctxt);
1963         return(NULL);
1964     }
1965 
1966     if (encoding != NULL)
1967         xmlSwitchInputEncodingName(ctxt, input, encoding);
1968 
1969     return(input);
1970 }
1971 
1972 /**
1973  * xmlNewInputFromIO:
1974  * @url:  base URL (optional)
1975  * @ioRead:  read callback
1976  * @ioClose:  close callback (optional)
1977  * @ioCtxt:  IO context
1978  * @flags:  unused, pass 0
1979  *
1980  * Creates a new parser input to read from input callbacks and
1981  * cintext.
1982  *
1983  * @url is used as base to resolve external entities and for
1984  * error reporting.
1985  *
1986  * @ioRead is called to read new data into a provided buffer.
1987  * It must return the number of bytes written into the buffer
1988  * ot a negative xmlParserErrors code on failure.
1989  *
1990  * @ioClose is called after parsing has finished.
1991  *
1992  * @ioCtxt is an opaque pointer passed to the callbacks.
1993  *
1994  * Available since 2.14.0.
1995  *
1996  * Returns a new parser input or NULL if a memory allocation failed.
1997  */
1998 xmlParserInputPtr
xmlNewInputFromIO(const char * url,xmlInputReadCallback ioRead,xmlInputCloseCallback ioClose,void * ioCtxt,int flags ATTRIBUTE_UNUSED)1999 xmlNewInputFromIO(const char *url, xmlInputReadCallback ioRead,
2000                   xmlInputCloseCallback ioClose, void *ioCtxt,
2001                   int flags ATTRIBUTE_UNUSED) {
2002     xmlParserInputBufferPtr buf;
2003 
2004     if (ioRead == NULL)
2005 	return(NULL);
2006 
2007     buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2008     if (buf == NULL) {
2009         if (ioClose != NULL)
2010             ioClose(ioCtxt);
2011         return(NULL);
2012     }
2013 
2014     buf->context = ioCtxt;
2015     buf->readcallback = ioRead;
2016     buf->closecallback = ioClose;
2017 
2018     return(xmlNewInputInternal(buf, url));
2019 }
2020 
2021 /**
2022  * xmlCtxtNewInputFromIO:
2023  * @ctxt:  parser context
2024  * @url:  base URL (optional)
2025  * @ioRead:  read callback
2026  * @ioClose:  close callback (optional)
2027  * @ioCtxt:  IO context
2028  * @encoding:  character encoding (optional)
2029  * @flags:  unused, pass 0
2030  *
2031  * Returns a new parser input.
2032  */
2033 xmlParserInputPtr
xmlCtxtNewInputFromIO(xmlParserCtxtPtr ctxt,const char * url,xmlInputReadCallback ioRead,xmlInputCloseCallback ioClose,void * ioCtxt,const char * encoding,int flags)2034 xmlCtxtNewInputFromIO(xmlParserCtxtPtr ctxt, const char *url,
2035                       xmlInputReadCallback ioRead,
2036                       xmlInputCloseCallback ioClose,
2037                       void *ioCtxt, const char *encoding, int flags) {
2038     xmlParserInputPtr input;
2039 
2040     if ((ctxt == NULL) || (ioRead == NULL))
2041 	return(NULL);
2042 
2043     input = xmlNewInputFromIO(url, ioRead, ioClose, ioCtxt, flags);
2044     if (input == NULL) {
2045         xmlCtxtErrMemory(ctxt);
2046         return(NULL);
2047     }
2048 
2049     if (encoding != NULL)
2050         xmlSwitchInputEncodingName(ctxt, input, encoding);
2051 
2052     return(input);
2053 }
2054 
2055 /**
2056  * xmlNewPushInput:
2057  * @url:  base URL (optional)
2058  * @chunk:  pointer to char array
2059  * @size:  size of array
2060  *
2061  * Creates a new parser input for a push parser.
2062  *
2063  * Returns a new parser input or NULL if a memory allocation failed.
2064  */
2065 xmlParserInputPtr
xmlNewPushInput(const char * url,const char * chunk,int size)2066 xmlNewPushInput(const char *url, const char *chunk, int size) {
2067     xmlParserInputBufferPtr buf;
2068     xmlParserInputPtr input;
2069 
2070     buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2071     if (buf == NULL)
2072         return(NULL);
2073 
2074     input = xmlNewInputInternal(buf, url);
2075     if (input == NULL)
2076 	return(NULL);
2077 
2078     input->flags |= XML_INPUT_PROGRESSIVE;
2079 
2080     if ((size > 0) && (chunk != NULL)) {
2081         int res;
2082 
2083 	res = xmlParserInputBufferPush(input->buf, size, chunk);
2084         xmlBufResetInput(input->buf->buffer, input);
2085         if (res < 0) {
2086             xmlFreeInputStream(input);
2087             return(NULL);
2088         }
2089     }
2090 
2091     return(input);
2092 }
2093 
2094 /**
2095  * xmlNewIOInputStream:
2096  * @ctxt:  an XML parser context
2097  * @buf:  an input buffer
2098  * @enc:  the charset encoding if known
2099  *
2100  * Create a new input stream structure encapsulating the @input into
2101  * a stream suitable for the parser.
2102  *
2103  * Returns the new input stream or NULL
2104  */
2105 xmlParserInputPtr
xmlNewIOInputStream(xmlParserCtxtPtr ctxt,xmlParserInputBufferPtr buf,xmlCharEncoding enc)2106 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf,
2107 	            xmlCharEncoding enc) {
2108     xmlParserInputPtr input;
2109     const char *encoding;
2110 
2111     if ((ctxt == NULL) || (buf == NULL))
2112         return(NULL);
2113 
2114     input = xmlNewInputInternal(buf, NULL);
2115     if (input == NULL) {
2116         xmlCtxtErrMemory(ctxt);
2117 	return(NULL);
2118     }
2119 
2120     encoding = xmlGetCharEncodingName(enc);
2121     if (encoding != NULL)
2122         xmlSwitchInputEncodingName(ctxt, input, encoding);
2123 
2124     return(input);
2125 }
2126 
2127 /**
2128  * xmlNewEntityInputStream:
2129  * @ctxt:  an XML parser context
2130  * @ent:  an Entity pointer
2131  *
2132  * DEPRECATED: Internal function, do not use.
2133  *
2134  * Create a new input stream based on an xmlEntityPtr
2135  *
2136  * Returns the new input stream or NULL
2137  */
2138 xmlParserInputPtr
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr ent)2139 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
2140     xmlParserInputPtr input;
2141 
2142     if ((ctxt == NULL) || (ent == NULL))
2143 	return(NULL);
2144 
2145     if (ent->content != NULL) {
2146         input = xmlCtxtNewInputFromString(ctxt, NULL,
2147                 (const char *) ent->content, NULL, XML_INPUT_BUF_STATIC);
2148     } else if (ent->URI != NULL) {
2149         xmlResourceType rtype;
2150 
2151         if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY)
2152             rtype = XML_RESOURCE_PARAMETER_ENTITY;
2153         else
2154             rtype = XML_RESOURCE_GENERAL_ENTITY;
2155 
2156         input = xmlLoadResource(ctxt, (char *) ent->URI,
2157                                 (char *) ent->ExternalID, rtype);
2158     } else {
2159         return(NULL);
2160     }
2161 
2162     if (input == NULL)
2163         return(NULL);
2164 
2165     input->entity = ent;
2166 
2167     return(input);
2168 }
2169 
2170 /**
2171  * xmlNewStringInputStream:
2172  * @ctxt:  an XML parser context
2173  * @buffer:  an memory buffer
2174  *
2175  * DEPRECATED: Use xmlNewInputFromString.
2176  *
2177  * Create a new input stream based on a memory buffer.
2178  *
2179  * Returns the new input stream
2180  */
2181 xmlParserInputPtr
xmlNewStringInputStream(xmlParserCtxtPtr ctxt,const xmlChar * buffer)2182 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2183     return(xmlCtxtNewInputFromString(ctxt, NULL, (const char *) buffer,
2184                                      NULL, 0));
2185 }
2186 
2187 
2188 /****************************************************************
2189  *								*
2190  *		External entities loading			*
2191  *								*
2192  ****************************************************************/
2193 
2194 #ifdef LIBXML_CATALOG_ENABLED
2195 
2196 /**
2197  * xmlResolveResourceFromCatalog:
2198  * @URL:  the URL for the entity to load
2199  * @ID:  the System ID for the entity to load
2200  * @ctxt:  the context in which the entity is called or NULL
2201  *
2202  * Resolves the URL and ID against the appropriate catalog.
2203  * This function is used by xmlDefaultExternalEntityLoader and
2204  * xmlNoNetExternalEntityLoader.
2205  *
2206  * Returns a new allocated URL, or NULL.
2207  */
2208 static xmlChar *
xmlResolveResourceFromCatalog(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2209 xmlResolveResourceFromCatalog(const char *URL, const char *ID,
2210                               xmlParserCtxtPtr ctxt) {
2211     xmlChar *resource = NULL;
2212     xmlCatalogAllow pref;
2213     int allowLocal = 0;
2214     int allowGlobal = 0;
2215 
2216     /*
2217      * If the resource doesn't exists as a file,
2218      * try to load it from the resource pointed in the catalogs
2219      */
2220     pref = xmlCatalogGetDefaults();
2221 
2222     if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
2223         ((pref == XML_CATA_ALLOW_ALL) ||
2224          (pref == XML_CATA_ALLOW_DOCUMENT)))
2225         allowLocal = 1;
2226 
2227     if (((ctxt == NULL) ||
2228          ((ctxt->options & XML_PARSE_NO_SYS_CATALOG) == 0)) &&
2229         ((pref == XML_CATA_ALLOW_ALL) ||
2230          (pref == XML_CATA_ALLOW_GLOBAL)))
2231         allowGlobal = 1;
2232 
2233     if ((pref != XML_CATA_ALLOW_NONE) && (!xmlNoNetExists(URL))) {
2234 	/*
2235 	 * Do a local lookup
2236 	 */
2237         if (allowLocal) {
2238 	    resource = xmlCatalogLocalResolve(ctxt->catalogs,
2239 					      (const xmlChar *)ID,
2240 					      (const xmlChar *)URL);
2241         }
2242 	/*
2243 	 * Try a global lookup
2244 	 */
2245 	if ((resource == NULL) && (allowGlobal)) {
2246 	    resource = xmlCatalogResolve((const xmlChar *)ID,
2247 					 (const xmlChar *)URL);
2248 	}
2249 	if ((resource == NULL) && (URL != NULL))
2250 	    resource = xmlStrdup((const xmlChar *) URL);
2251 
2252 	/*
2253 	 * TODO: do an URI lookup on the reference
2254 	 */
2255 	if ((resource != NULL) && (!xmlNoNetExists((const char *)resource))) {
2256 	    xmlChar *tmp = NULL;
2257 
2258 	    if (allowLocal) {
2259 		tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource);
2260 	    }
2261 	    if ((tmp == NULL) && (allowGlobal)) {
2262 		tmp = xmlCatalogResolveURI(resource);
2263 	    }
2264 
2265 	    if (tmp != NULL) {
2266 		xmlFree(resource);
2267 		resource = tmp;
2268 	    }
2269 	}
2270     }
2271 
2272     return resource;
2273 }
2274 
2275 #endif
2276 
2277 #ifdef LIBXML_HTTP_ENABLED
2278 static int
xmlCheckHTTPInputInternal(xmlParserInputPtr input)2279 xmlCheckHTTPInputInternal(xmlParserInputPtr input) {
2280     const char *encoding;
2281     const char *redir;
2282     const char *mime;
2283     int code;
2284 
2285     if ((input == NULL) || (input->buf == NULL) ||
2286         (input->buf->readcallback != xmlIOHTTPRead) ||
2287         (input->buf->context == NULL))
2288         return(XML_ERR_OK);
2289 
2290     code = xmlNanoHTTPReturnCode(input->buf->context);
2291     if (code >= 400) {
2292         /* fatal error */
2293         return(XML_IO_LOAD_ERROR);
2294     }
2295 
2296     mime = xmlNanoHTTPMimeType(input->buf->context);
2297     if ((xmlStrstr(BAD_CAST mime, BAD_CAST "/xml")) ||
2298         (xmlStrstr(BAD_CAST mime, BAD_CAST "+xml"))) {
2299         encoding = xmlNanoHTTPEncoding(input->buf->context);
2300         if (encoding != NULL) {
2301             xmlCharEncodingHandlerPtr handler;
2302             int res;
2303 
2304             res = xmlOpenCharEncodingHandler(encoding, /* output */ 0,
2305                                              &handler);
2306             if (res == 0)
2307                 xmlInputSetEncodingHandler(input, handler);
2308         }
2309     }
2310 
2311     redir = xmlNanoHTTPRedir(input->buf->context);
2312     if (redir != NULL) {
2313         if (input->filename != NULL)
2314             xmlFree((xmlChar *) input->filename);
2315         input->filename = xmlMemStrdup(redir);
2316         if (input->filename == NULL)
2317             return(XML_ERR_NO_MEMORY);
2318     }
2319 
2320     return(XML_ERR_OK);
2321 }
2322 #endif /* LIBXML_HTTP_ENABLED */
2323 
2324 /**
2325  * xmlCheckHTTPInput:
2326  * @ctxt: an XML parser context
2327  * @ret: an XML parser input
2328  *
2329  * DEPRECATED: Internal function, don't use.
2330  *
2331  * Check an input in case it was created from an HTTP stream, in that
2332  * case it will handle encoding and update of the base URL in case of
2333  * redirection. It also checks for HTTP errors in which case the input
2334  * is cleanly freed up and an appropriate error is raised in context
2335  *
2336  * Returns the input or NULL in case of HTTP error.
2337  */
2338 xmlParserInputPtr
xmlCheckHTTPInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr ret)2339 xmlCheckHTTPInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr ret) {
2340     /* Avoid unused variable warning if features are disabled. */
2341     (void) ctxt;
2342 
2343 #ifdef LIBXML_HTTP_ENABLED
2344     {
2345         int code = xmlCheckHTTPInputInternal(ret);
2346 
2347         if (code != XML_ERR_OK) {
2348             if (ret->filename != NULL)
2349                 xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, ret->filename);
2350             else
2351                 xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, "<null>");
2352             xmlFreeInputStream(ret);
2353             return(NULL);
2354         }
2355     }
2356 #endif
2357 
2358     return(ret);
2359 }
2360 
2361 /**
2362  * xmlNewInputFromUrl:
2363  * @filename:  the filename to use as entity
2364  * @flags:  XML_INPUT flags
2365  * @out:  pointer to new parser input
2366  *
2367  * Create a new input stream based on a file or a URL.
2368  *
2369  * The flag XML_INPUT_UNZIP allows decompression.
2370  *
2371  * The flag XML_INPUT_NETWORK allows network access.
2372  *
2373  * The following resource loaders will be called if they were
2374  * registered (in order of precedence):
2375  *
2376  * - the per-thread xmlParserInputBufferCreateFilenameFunc set with
2377  *   xmlParserInputBufferCreateFilenameDefault (deprecated)
2378  * - the default loader which will return
2379  *   - the result from a matching global input callback set with
2380  *     xmlRegisterInputCallbacks (deprecated)
2381  *   - a HTTP resource if support is compiled in.
2382  *   - a file opened from the filesystem, with automatic detection
2383  *     of compressed files if support is compiled in.
2384  *
2385  * Available since 2.14.0.
2386  *
2387  * Returns an xmlParserErrors code.
2388  */
2389 int
xmlNewInputFromUrl(const char * filename,int flags,xmlParserInputPtr * out)2390 xmlNewInputFromUrl(const char *filename, int flags, xmlParserInputPtr *out) {
2391     xmlParserInputBufferPtr buf;
2392     xmlParserInputPtr input;
2393     int code = XML_ERR_OK;
2394 
2395     if (out == NULL)
2396         return(XML_ERR_ARGUMENT);
2397     *out = NULL;
2398     if (filename == NULL)
2399         return(XML_ERR_ARGUMENT);
2400 
2401     if (xmlParserInputBufferCreateFilenameValue != NULL) {
2402         buf = xmlParserInputBufferCreateFilenameValue(filename,
2403                 XML_CHAR_ENCODING_NONE);
2404         if (buf == NULL)
2405             code = XML_IO_ENOENT;
2406     } else {
2407         code = xmlParserInputBufferCreateUrl(filename, XML_CHAR_ENCODING_NONE,
2408                                              flags, &buf);
2409     }
2410     if (code != XML_ERR_OK)
2411 	return(code);
2412 
2413     input = xmlNewInputInternal(buf, filename);
2414     if (input == NULL)
2415 	return(XML_ERR_NO_MEMORY);
2416 
2417 #ifdef LIBXML_HTTP_ENABLED
2418     code = xmlCheckHTTPInputInternal(input);
2419     if (code != XML_ERR_OK) {
2420         xmlFreeInputStream(input);
2421         return(code);
2422     }
2423 #endif
2424 
2425     *out = input;
2426     return(XML_ERR_OK);
2427 }
2428 
2429 /**
2430  * xmlNewInputFromFile:
2431  * @ctxt:  an XML parser context
2432  * @filename:  the filename to use as entity
2433  *
2434  * DEPRECATED: Use xmlNewInputFromUrl.
2435  *
2436  * Create a new input stream based on a file or an URL.
2437  *
2438  * Returns the new input stream or NULL in case of error
2439  */
2440 xmlParserInputPtr
xmlNewInputFromFile(xmlParserCtxtPtr ctxt,const char * filename)2441 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2442     xmlParserInputPtr input;
2443     int flags = 0;
2444     int code;
2445 
2446     if ((ctxt == NULL) || (filename == NULL))
2447         return(NULL);
2448 
2449     if ((ctxt->options & XML_PARSE_NO_UNZIP) == 0)
2450         flags |= XML_INPUT_UNZIP;
2451     if ((ctxt->options & XML_PARSE_NONET) == 0)
2452         flags |= XML_INPUT_NETWORK;
2453 
2454     code = xmlNewInputFromUrl(filename, flags, &input);
2455     if (code != XML_ERR_OK) {
2456         xmlCtxtErrIO(ctxt, code, filename);
2457         return(NULL);
2458     }
2459 
2460     return(input);
2461 }
2462 
2463 /**
2464  * xmlDefaultExternalEntityLoader:
2465  * @URL:  the URL for the entity to load
2466  * @ID:  the System ID for the entity to load
2467  * @ctxt:  the context in which the entity is called or NULL
2468  *
2469  * By default we don't load external entities, yet.
2470  *
2471  * Returns a new allocated xmlParserInputPtr, or NULL.
2472  */
2473 static xmlParserInputPtr
xmlDefaultExternalEntityLoader(const char * url,const char * ID,xmlParserCtxtPtr ctxt)2474 xmlDefaultExternalEntityLoader(const char *url, const char *ID,
2475                                xmlParserCtxtPtr ctxt)
2476 {
2477     xmlParserInputPtr input = NULL;
2478     char *resource = NULL;
2479 
2480     (void) ID;
2481 
2482     if (url == NULL)
2483         return(NULL);
2484 
2485 #ifdef LIBXML_CATALOG_ENABLED
2486     resource = (char *) xmlResolveResourceFromCatalog(url, ID, ctxt);
2487     if (resource != NULL)
2488 	url = resource;
2489 #endif
2490 
2491     if ((ctxt != NULL) &&
2492         (ctxt->options & XML_PARSE_NONET) &&
2493         (xmlStrncasecmp(BAD_CAST url, BAD_CAST "http://", 7) == 0)) {
2494         xmlCtxtErrIO(ctxt, XML_IO_NETWORK_ATTEMPT, url);
2495     } else {
2496         input = xmlNewInputFromFile(ctxt, url);
2497     }
2498 
2499     if (resource != NULL)
2500 	xmlFree(resource);
2501     return(input);
2502 }
2503 
2504 /**
2505  * xmlNoNetExternalEntityLoader:
2506  * @URL:  the URL for the entity to load
2507  * @ID:  the System ID for the entity to load
2508  * @ctxt:  the context in which the entity is called or NULL
2509  *
2510  * DEPRECATED: Use XML_PARSE_NONET.
2511  *
2512  * A specific entity loader disabling network accesses, though still
2513  * allowing local catalog accesses for resolution.
2514  *
2515  * Returns a new allocated xmlParserInputPtr, or NULL.
2516  */
2517 xmlParserInputPtr
xmlNoNetExternalEntityLoader(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2518 xmlNoNetExternalEntityLoader(const char *URL, const char *ID,
2519                              xmlParserCtxtPtr ctxt) {
2520     int oldOptions = 0;
2521     xmlParserInputPtr input;
2522 
2523     if (ctxt != NULL) {
2524         oldOptions = ctxt->options;
2525         ctxt->options |= XML_PARSE_NONET;
2526     }
2527 
2528     input = xmlDefaultExternalEntityLoader(URL, ID, ctxt);
2529 
2530     if (ctxt != NULL)
2531         ctxt->options = oldOptions;
2532 
2533     return(input);
2534 }
2535 
2536 /*
2537  * This global has to die eventually
2538  */
2539 static xmlExternalEntityLoader
2540 xmlCurrentExternalEntityLoader = xmlDefaultExternalEntityLoader;
2541 
2542 /**
2543  * xmlSetExternalEntityLoader:
2544  * @f:  the new entity resolver function
2545  *
2546  * DEPRECATED: This is a global setting and not thread-safe. Use
2547  * xmlCtxtSetResourceLoader or similar functions.
2548  *
2549  * Changes the default external entity resolver function for the
2550  * application.
2551  */
2552 void
xmlSetExternalEntityLoader(xmlExternalEntityLoader f)2553 xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
2554     xmlCurrentExternalEntityLoader = f;
2555 }
2556 
2557 /**
2558  * xmlGetExternalEntityLoader:
2559  *
2560  * DEPRECATED: See xmlSetExternalEntityLoader.
2561  *
2562  * Get the default external entity resolver function for the application
2563  *
2564  * Returns the xmlExternalEntityLoader function pointer
2565  */
2566 xmlExternalEntityLoader
xmlGetExternalEntityLoader(void)2567 xmlGetExternalEntityLoader(void) {
2568     return(xmlCurrentExternalEntityLoader);
2569 }
2570 
2571 /**
2572  * xmlCtxtSetResourceLoader:
2573  * @ctxt:  parser context
2574  * @loader:  callback
2575  * @vctxt:  user data
2576  *
2577  * Installs a custom callback to load documents, DTDs or external
2578  * entities.
2579  *
2580  * Available since 2.14.0.
2581  */
2582 void
xmlCtxtSetResourceLoader(xmlParserCtxtPtr ctxt,xmlResourceLoader loader,void * vctxt)2583 xmlCtxtSetResourceLoader(xmlParserCtxtPtr ctxt, xmlResourceLoader loader,
2584                          void *vctxt) {
2585     if (ctxt == NULL)
2586         return;
2587 
2588     ctxt->resourceLoader = loader;
2589     ctxt->resourceCtxt = vctxt;
2590 }
2591 
2592 /**
2593  * xmlLoadResource:
2594  * @ctxt:  parser context
2595  * @url:  the URL for the entity to load
2596  * @publicId:  the Public ID for the entity to load
2597  * @type:  resource type
2598  *
2599  * Returns the xmlParserInputPtr or NULL in case of error.
2600  */
2601 xmlParserInputPtr
xmlLoadResource(xmlParserCtxtPtr ctxt,const char * url,const char * publicId,xmlResourceType type)2602 xmlLoadResource(xmlParserCtxtPtr ctxt, const char *url, const char *publicId,
2603                 xmlResourceType type) {
2604     char *canonicFilename;
2605     xmlParserInputPtr ret;
2606 
2607     if (url == NULL)
2608         return(NULL);
2609 
2610     if ((ctxt != NULL) && (ctxt->resourceLoader != NULL)) {
2611         char *resource = NULL;
2612         int flags = 0;
2613         int code;
2614 
2615 #ifdef LIBXML_CATALOG_ENABLED
2616         resource = (char *) xmlResolveResourceFromCatalog(url, publicId, ctxt);
2617         if (resource != NULL)
2618             url = resource;
2619 #endif
2620 
2621         if ((ctxt->options & XML_PARSE_NO_UNZIP) == 0)
2622             flags |= XML_INPUT_UNZIP;
2623         if ((ctxt->options & XML_PARSE_NONET) == 0)
2624             flags |= XML_INPUT_NETWORK;
2625 
2626         code = ctxt->resourceLoader(ctxt->resourceCtxt, url, publicId, type,
2627                                     flags, &ret);
2628         if (code != XML_ERR_OK) {
2629             xmlCtxtErrIO(ctxt, code, url);
2630             ret = NULL;
2631         }
2632         if (resource != NULL)
2633             xmlFree(resource);
2634         return(ret);
2635     }
2636 
2637     canonicFilename = (char *) xmlCanonicPath((const xmlChar *) url);
2638     if (canonicFilename == NULL) {
2639         xmlCtxtErrMemory(ctxt);
2640         return(NULL);
2641     }
2642 
2643     ret = xmlCurrentExternalEntityLoader(canonicFilename, publicId, ctxt);
2644     xmlFree(canonicFilename);
2645     return(ret);
2646 }
2647 
2648 /**
2649  * xmlLoadExternalEntity:
2650  * @URL:  the URL for the entity to load
2651  * @ID:  the Public ID for the entity to load
2652  * @ctxt:  the context in which the entity is called or NULL
2653  *
2654  * @URL is a filename or URL. If if contains the substring "://",
2655  * it is assumed to be a Legacy Extended IRI. Otherwise, it is
2656  * treated as a filesystem path.
2657  *
2658  * @ID is an optional XML public ID, typically from a doctype
2659  * declaration. It is used for catalog lookups.
2660  *
2661  * If catalog lookup is enabled (default is yes) and URL or ID are
2662  * found in system or local XML catalogs, URL is replaced with the
2663  * result. Then the following resource loaders will be called if
2664  * they were registered (in order of precedence):
2665  *
2666  * - the resource loader set with xmlCtxtSetResourceLoader
2667  * - the global external entity loader set with
2668  *   xmlSetExternalEntityLoader (without catalog resolution,
2669  *   deprecated)
2670  * - the per-thread xmlParserInputBufferCreateFilenameFunc set with
2671  *   xmlParserInputBufferCreateFilenameDefault (deprecated)
2672  * - the default loader which will return
2673  *   - the result from a matching global input callback set with
2674  *     xmlRegisterInputCallbacks (deprecated)
2675  *   - a HTTP resource if support is compiled in.
2676  *   - a file opened from the filesystem, with automatic detection
2677  *     of compressed files if support is compiled in.
2678  *
2679  * Returns the xmlParserInputPtr or NULL
2680  */
2681 xmlParserInputPtr
xmlLoadExternalEntity(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2682 xmlLoadExternalEntity(const char *URL, const char *ID,
2683                       xmlParserCtxtPtr ctxt) {
2684     return(xmlLoadResource(ctxt, URL, ID, XML_RESOURCE_UNKNOWN));
2685 }
2686 
2687 /************************************************************************
2688  *									*
2689  *		Commodity functions to handle parser contexts		*
2690  *									*
2691  ************************************************************************/
2692 
2693 /**
2694  * xmlInitSAXParserCtxt:
2695  * @ctxt:  XML parser context
2696  * @sax:  SAX handlert
2697  * @userData:  user data
2698  *
2699  * Initialize a SAX parser context
2700  *
2701  * Returns 0 in case of success and -1 in case of error
2702  */
2703 
2704 static int
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt,const xmlSAXHandler * sax,void * userData)2705 xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
2706                      void *userData)
2707 {
2708     xmlParserInputPtr input;
2709 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2710     size_t initialNodeTabSize = 1;
2711 #else
2712     size_t initialNodeTabSize = 10;
2713 #endif
2714 
2715     if (ctxt == NULL)
2716         return(-1);
2717 
2718     if (ctxt->dict == NULL)
2719 	ctxt->dict = xmlDictCreate();
2720     if (ctxt->dict == NULL)
2721 	return(-1);
2722 
2723     if (ctxt->sax == NULL)
2724 	ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2725     if (ctxt->sax == NULL)
2726 	return(-1);
2727     if (sax == NULL) {
2728 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2729         xmlSAXVersion(ctxt->sax, 2);
2730         ctxt->userData = ctxt;
2731     } else {
2732 	if (sax->initialized == XML_SAX2_MAGIC) {
2733 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
2734         } else {
2735 	    memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2736 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
2737         }
2738         ctxt->userData = userData ? userData : ctxt;
2739     }
2740 
2741     ctxt->maxatts = 0;
2742     ctxt->atts = NULL;
2743     /* Allocate the Input stack */
2744     if (ctxt->inputTab == NULL) {
2745 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2746         size_t initialSize = 1;
2747 #else
2748         size_t initialSize = 5;
2749 #endif
2750 
2751 	ctxt->inputTab = xmlMalloc(initialSize * sizeof(xmlParserInputPtr));
2752 	ctxt->inputMax = initialSize;
2753     }
2754     if (ctxt->inputTab == NULL)
2755 	return(-1);
2756     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2757         xmlFreeInputStream(input);
2758     }
2759     ctxt->inputNr = 0;
2760     ctxt->input = NULL;
2761 
2762     ctxt->version = NULL;
2763     ctxt->encoding = NULL;
2764     ctxt->standalone = -1;
2765     ctxt->hasExternalSubset = 0;
2766     ctxt->hasPErefs = 0;
2767     ctxt->html = 0;
2768     ctxt->instate = XML_PARSER_START;
2769 
2770     /* Allocate the Node stack */
2771     if (ctxt->nodeTab == NULL) {
2772 	ctxt->nodeTab = xmlMalloc(initialNodeTabSize * sizeof(xmlNodePtr));
2773 	ctxt->nodeMax = initialNodeTabSize;
2774     }
2775     if (ctxt->nodeTab == NULL)
2776 	return(-1);
2777     ctxt->nodeNr = 0;
2778     ctxt->node = NULL;
2779 
2780     /* Allocate the Name stack */
2781     if (ctxt->nameTab == NULL) {
2782 	ctxt->nameTab = xmlMalloc(initialNodeTabSize * sizeof(xmlChar *));
2783 	ctxt->nameMax = initialNodeTabSize;
2784     }
2785     if (ctxt->nameTab == NULL)
2786 	return(-1);
2787     ctxt->nameNr = 0;
2788     ctxt->name = NULL;
2789 
2790     /* Allocate the space stack */
2791     if (ctxt->spaceTab == NULL) {
2792 	ctxt->spaceTab = xmlMalloc(initialNodeTabSize * sizeof(int));
2793 	ctxt->spaceMax = initialNodeTabSize;
2794     }
2795     if (ctxt->spaceTab == NULL)
2796 	return(-1);
2797     ctxt->spaceNr = 1;
2798     ctxt->spaceTab[0] = -1;
2799     ctxt->space = &ctxt->spaceTab[0];
2800     ctxt->myDoc = NULL;
2801     ctxt->wellFormed = 1;
2802     ctxt->nsWellFormed = 1;
2803     ctxt->valid = 1;
2804 
2805     ctxt->options = XML_PARSE_NODICT;
2806 
2807     /*
2808      * Initialize some parser options from deprecated global variables.
2809      * Note that the "modern" API taking options arguments or
2810      * xmlCtxtSetOptions will ignore these defaults. They're only
2811      * relevant if old API functions like xmlParseFile are used.
2812      */
2813     ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2814     if (ctxt->loadsubset) {
2815         ctxt->options |= XML_PARSE_DTDLOAD;
2816     }
2817     ctxt->validate = xmlDoValidityCheckingDefaultValue;
2818     if (ctxt->validate) {
2819         ctxt->options |= XML_PARSE_DTDVALID;
2820     }
2821     ctxt->pedantic = xmlPedanticParserDefaultValue;
2822     if (ctxt->pedantic) {
2823         ctxt->options |= XML_PARSE_PEDANTIC;
2824     }
2825     ctxt->linenumbers = xmlLineNumbersDefaultValue;
2826     ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2827     if (ctxt->keepBlanks == 0) {
2828 	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
2829 	ctxt->options |= XML_PARSE_NOBLANKS;
2830     }
2831     ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2832     if (ctxt->replaceEntities) {
2833         ctxt->options |= XML_PARSE_NOENT;
2834     }
2835     if (xmlGetWarningsDefaultValue == 0)
2836         ctxt->options |= XML_PARSE_NOWARNING;
2837 
2838     ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
2839     ctxt->vctxt.userData = ctxt;
2840     ctxt->vctxt.error = xmlParserValidityError;
2841     ctxt->vctxt.warning = xmlParserValidityWarning;
2842 
2843     ctxt->record_info = 0;
2844     ctxt->checkIndex = 0;
2845     ctxt->inSubset = 0;
2846     ctxt->errNo = XML_ERR_OK;
2847     ctxt->depth = 0;
2848     ctxt->catalogs = NULL;
2849     ctxt->sizeentities = 0;
2850     ctxt->sizeentcopy = 0;
2851     ctxt->input_id = 1;
2852     ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
2853     xmlInitNodeInfoSeq(&ctxt->node_seq);
2854 
2855     if (ctxt->nsdb == NULL) {
2856         ctxt->nsdb = xmlParserNsCreate();
2857         if (ctxt->nsdb == NULL)
2858             return(-1);
2859     }
2860 
2861     return(0);
2862 }
2863 
2864 /**
2865  * xmlInitParserCtxt:
2866  * @ctxt:  an XML parser context
2867  *
2868  * DEPRECATED: Internal function which will be made private in a future
2869  * version.
2870  *
2871  * Initialize a parser context
2872  *
2873  * Returns 0 in case of success and -1 in case of error
2874  */
2875 
2876 int
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)2877 xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2878 {
2879     return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
2880 }
2881 
2882 /**
2883  * xmlFreeParserCtxt:
2884  * @ctxt:  an XML parser context
2885  *
2886  * Free all the memory used by a parser context. However the parsed
2887  * document in ctxt->myDoc is not freed.
2888  */
2889 
2890 void
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)2891 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2892 {
2893     xmlParserInputPtr input;
2894 
2895     if (ctxt == NULL) return;
2896 
2897     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2898         xmlFreeInputStream(input);
2899     }
2900     if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2901     if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2902     if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2903     if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2904     if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2905     if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2906     if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2907     if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2908     if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2909 #ifdef LIBXML_SAX1_ENABLED
2910     if ((ctxt->sax != NULL) &&
2911         (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2912 #else
2913     if (ctxt->sax != NULL)
2914 #endif /* LIBXML_SAX1_ENABLED */
2915         xmlFree(ctxt->sax);
2916     if (ctxt->directory != NULL) xmlFree(ctxt->directory);
2917     if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2918     if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2919     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2920     if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
2921     if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
2922     if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
2923     if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2924     if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2925     if (ctxt->attsDefault != NULL)
2926         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2927     if (ctxt->attsSpecial != NULL)
2928         xmlHashFree(ctxt->attsSpecial, NULL);
2929     if (ctxt->freeElems != NULL) {
2930         xmlNodePtr cur, next;
2931 
2932 	cur = ctxt->freeElems;
2933 	while (cur != NULL) {
2934 	    next = cur->next;
2935 	    xmlFree(cur);
2936 	    cur = next;
2937 	}
2938     }
2939     if (ctxt->freeAttrs != NULL) {
2940         xmlAttrPtr cur, next;
2941 
2942 	cur = ctxt->freeAttrs;
2943 	while (cur != NULL) {
2944 	    next = cur->next;
2945 	    xmlFree(cur);
2946 	    cur = next;
2947 	}
2948     }
2949     /*
2950      * cleanup the error strings
2951      */
2952     if (ctxt->lastError.message != NULL)
2953         xmlFree(ctxt->lastError.message);
2954     if (ctxt->lastError.file != NULL)
2955         xmlFree(ctxt->lastError.file);
2956     if (ctxt->lastError.str1 != NULL)
2957         xmlFree(ctxt->lastError.str1);
2958     if (ctxt->lastError.str2 != NULL)
2959         xmlFree(ctxt->lastError.str2);
2960     if (ctxt->lastError.str3 != NULL)
2961         xmlFree(ctxt->lastError.str3);
2962 
2963 #ifdef LIBXML_CATALOG_ENABLED
2964     if (ctxt->catalogs != NULL)
2965 	xmlCatalogFreeLocal(ctxt->catalogs);
2966 #endif
2967     xmlFree(ctxt);
2968 }
2969 
2970 /**
2971  * xmlNewParserCtxt:
2972  *
2973  * Allocate and initialize a new parser context.
2974  *
2975  * Returns the xmlParserCtxtPtr or NULL
2976  */
2977 
2978 xmlParserCtxtPtr
xmlNewParserCtxt(void)2979 xmlNewParserCtxt(void)
2980 {
2981     return(xmlNewSAXParserCtxt(NULL, NULL));
2982 }
2983 
2984 /**
2985  * xmlNewSAXParserCtxt:
2986  * @sax:  SAX handler
2987  * @userData:  user data
2988  *
2989  * Allocate and initialize a new SAX parser context. If userData is NULL,
2990  * the parser context will be passed as user data.
2991  *
2992  * Available since 2.11.0. If you want support older versions,
2993  * it's best to invoke xmlNewParserCtxt and set ctxt->sax with
2994  * struct assignment.
2995  *
2996  * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
2997  */
2998 
2999 xmlParserCtxtPtr
xmlNewSAXParserCtxt(const xmlSAXHandler * sax,void * userData)3000 xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
3001 {
3002     xmlParserCtxtPtr ctxt;
3003 
3004     xmlInitParser();
3005 
3006     ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
3007     if (ctxt == NULL)
3008 	return(NULL);
3009     memset(ctxt, 0, sizeof(xmlParserCtxt));
3010     if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
3011         xmlFreeParserCtxt(ctxt);
3012 	return(NULL);
3013     }
3014     return(ctxt);
3015 }
3016 
3017 /**
3018  * xmlCtxtGetPrivate:
3019  * ctxt:  parser context
3020  *
3021  * Available since 2.14.0.
3022  *
3023  * Returns the private application data.
3024  */
3025 void *
xmlCtxtGetPrivate(xmlParserCtxtPtr ctxt)3026 xmlCtxtGetPrivate(xmlParserCtxtPtr ctxt) {
3027     if (ctxt == NULL)
3028         return(NULL);
3029 
3030     return(ctxt->_private);
3031 }
3032 
3033 /**
3034  * xmlCtxtSetPrivate:
3035  * ctxt:  parser context
3036  * priv:  private application data
3037  *
3038  * Available since 2.14.0.
3039  *
3040  * Set the private application data.
3041  */
3042 void
xmlCtxtSetPrivate(xmlParserCtxtPtr ctxt,void * priv)3043 xmlCtxtSetPrivate(xmlParserCtxtPtr ctxt, void *priv) {
3044     if (ctxt == NULL)
3045         return;
3046 
3047     ctxt->_private = priv;
3048 }
3049 
3050 /**
3051  * xmlCtxtGetCatalogs:
3052  * ctxt:  parser context
3053  *
3054  * Available since 2.14.0.
3055  *
3056  * Returns the local catalogs.
3057  */
3058 void *
xmlCtxtGetCatalogs(xmlParserCtxtPtr ctxt)3059 xmlCtxtGetCatalogs(xmlParserCtxtPtr ctxt) {
3060     if (ctxt == NULL)
3061         return(NULL);
3062 
3063     return(ctxt->catalogs);
3064 }
3065 
3066 /**
3067  * xmlCtxtSetCatalogs:
3068  * ctxt:  parser context
3069  * catalogs:  catalogs pointer
3070  *
3071  * Available since 2.14.0.
3072  *
3073  * Set the local catalogs.
3074  */
3075 void
xmlCtxtSetCatalogs(xmlParserCtxtPtr ctxt,void * catalogs)3076 xmlCtxtSetCatalogs(xmlParserCtxtPtr ctxt, void *catalogs) {
3077     if (ctxt == NULL)
3078         return;
3079 
3080     ctxt->catalogs = catalogs;
3081 }
3082 
3083 /**
3084  * xmlCtxtGetDict:
3085  * ctxt:  parser context
3086  *
3087  * Available since 2.14.0.
3088  *
3089  * Returns the dictionary.
3090  */
3091 xmlDictPtr
xmlCtxtGetDict(xmlParserCtxtPtr ctxt)3092 xmlCtxtGetDict(xmlParserCtxtPtr ctxt) {
3093     if (ctxt == NULL)
3094         return(NULL);
3095 
3096     return(ctxt->dict);
3097 }
3098 
3099 /**
3100  * xmlCtxtSetDict:
3101  * ctxt:  parser context
3102  * dict:  dictionary
3103  *
3104  * Available since 2.14.0.
3105  *
3106  * Set the dictionary. This should only be done immediately after
3107  * creating a parser context.
3108  */
3109 void
xmlCtxtSetDict(xmlParserCtxtPtr ctxt,xmlDictPtr dict)3110 xmlCtxtSetDict(xmlParserCtxtPtr ctxt, xmlDictPtr dict) {
3111     if (ctxt == NULL)
3112         return;
3113 
3114     if (ctxt->dict != NULL)
3115         xmlDictFree(ctxt->dict);
3116 
3117     xmlDictReference(dict);
3118     ctxt->dict = dict;
3119 }
3120 
3121 /************************************************************************
3122  *									*
3123  *		Handling of node information				*
3124  *									*
3125  ************************************************************************/
3126 
3127 /**
3128  * xmlClearParserCtxt:
3129  * @ctxt:  an XML parser context
3130  *
3131  * Clear (release owned resources) and reinitialize a parser context
3132  */
3133 
3134 void
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)3135 xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
3136 {
3137   if (ctxt==NULL)
3138     return;
3139   xmlClearNodeInfoSeq(&ctxt->node_seq);
3140   xmlCtxtReset(ctxt);
3141 }
3142 
3143 
3144 /**
3145  * xmlParserFindNodeInfo:
3146  * @ctx:  an XML parser context
3147  * @node:  an XML node within the tree
3148  *
3149  * DEPRECATED: Don't use.
3150  *
3151  * Find the parser node info struct for a given node
3152  *
3153  * Returns an xmlParserNodeInfo block pointer or NULL
3154  */
3155 const xmlParserNodeInfo *
xmlParserFindNodeInfo(xmlParserCtxtPtr ctx,xmlNodePtr node)3156 xmlParserFindNodeInfo(xmlParserCtxtPtr ctx, xmlNodePtr node)
3157 {
3158     unsigned long pos;
3159 
3160     if ((ctx == NULL) || (node == NULL))
3161         return (NULL);
3162     /* Find position where node should be at */
3163     pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
3164     if (pos < ctx->node_seq.length
3165         && ctx->node_seq.buffer[pos].node == node)
3166         return &ctx->node_seq.buffer[pos];
3167     else
3168         return NULL;
3169 }
3170 
3171 
3172 /**
3173  * xmlInitNodeInfoSeq:
3174  * @seq:  a node info sequence pointer
3175  *
3176  * DEPRECATED: Don't use.
3177  *
3178  * -- Initialize (set to initial state) node info sequence
3179  */
3180 void
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)3181 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3182 {
3183     if (seq == NULL)
3184         return;
3185     seq->length = 0;
3186     seq->maximum = 0;
3187     seq->buffer = NULL;
3188 }
3189 
3190 /**
3191  * xmlClearNodeInfoSeq:
3192  * @seq:  a node info sequence pointer
3193  *
3194  * DEPRECATED: Don't use.
3195  *
3196  * -- Clear (release memory and reinitialize) node
3197  *   info sequence
3198  */
3199 void
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)3200 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3201 {
3202     if (seq == NULL)
3203         return;
3204     if (seq->buffer != NULL)
3205         xmlFree(seq->buffer);
3206     xmlInitNodeInfoSeq(seq);
3207 }
3208 
3209 /**
3210  * xmlParserFindNodeInfoIndex:
3211  * @seq:  a node info sequence pointer
3212  * @node:  an XML node pointer
3213  *
3214  * DEPRECATED: Don't use.
3215  *
3216  * xmlParserFindNodeInfoIndex : Find the index that the info record for
3217  *   the given node is or should be at in a sorted sequence
3218  *
3219  * Returns a long indicating the position of the record
3220  */
3221 unsigned long
xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,xmlNodePtr node)3222 xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,
3223                            xmlNodePtr node)
3224 {
3225     unsigned long upper, lower, middle;
3226     int found = 0;
3227 
3228     if ((seq == NULL) || (node == NULL))
3229         return ((unsigned long) -1);
3230 
3231     /* Do a binary search for the key */
3232     lower = 1;
3233     upper = seq->length;
3234     middle = 0;
3235     while (lower <= upper && !found) {
3236         middle = lower + (upper - lower) / 2;
3237         if (node == seq->buffer[middle - 1].node)
3238             found = 1;
3239         else if (node < seq->buffer[middle - 1].node)
3240             upper = middle - 1;
3241         else
3242             lower = middle + 1;
3243     }
3244 
3245     /* Return position */
3246     if (middle == 0 || seq->buffer[middle - 1].node < node)
3247         return middle;
3248     else
3249         return middle - 1;
3250 }
3251 
3252 
3253 /**
3254  * xmlParserAddNodeInfo:
3255  * @ctxt:  an XML parser context
3256  * @info:  a node info sequence pointer
3257  *
3258  * DEPRECATED: Don't use.
3259  *
3260  * Insert node info record into the sorted sequence
3261  */
3262 void
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,xmlParserNodeInfoPtr info)3263 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
3264                      xmlParserNodeInfoPtr info)
3265 {
3266     unsigned long pos;
3267 
3268     if ((ctxt == NULL) || (info == NULL)) return;
3269 
3270     /* Find pos and check to see if node is already in the sequence */
3271     pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
3272                                      info->node);
3273 
3274     if ((pos < ctxt->node_seq.length) &&
3275         (ctxt->node_seq.buffer != NULL) &&
3276         (ctxt->node_seq.buffer[pos].node == info->node)) {
3277         ctxt->node_seq.buffer[pos] = *info;
3278     }
3279 
3280     /* Otherwise, we need to add new node to buffer */
3281     else {
3282         if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
3283 	    (ctxt->node_seq.buffer == NULL)) {
3284             xmlParserNodeInfo *tmp_buffer;
3285             unsigned int byte_size;
3286 
3287             if (ctxt->node_seq.maximum == 0)
3288                 ctxt->node_seq.maximum = 2;
3289             byte_size = (sizeof(*ctxt->node_seq.buffer) *
3290 			(2 * ctxt->node_seq.maximum));
3291 
3292             if (ctxt->node_seq.buffer == NULL)
3293                 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
3294             else
3295                 tmp_buffer =
3296                     (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
3297                                                      byte_size);
3298 
3299             if (tmp_buffer == NULL) {
3300 		xmlCtxtErrMemory(ctxt);
3301                 return;
3302             }
3303             ctxt->node_seq.buffer = tmp_buffer;
3304             ctxt->node_seq.maximum *= 2;
3305         }
3306 
3307         /* If position is not at end, move elements out of the way */
3308         if (pos != ctxt->node_seq.length) {
3309             unsigned long i;
3310 
3311             for (i = ctxt->node_seq.length; i > pos; i--)
3312                 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
3313         }
3314 
3315         /* Copy element and increase length */
3316         ctxt->node_seq.buffer[pos] = *info;
3317         ctxt->node_seq.length++;
3318     }
3319 }
3320 
3321 /************************************************************************
3322  *									*
3323  *		Defaults settings					*
3324  *									*
3325  ************************************************************************/
3326 /**
3327  * xmlPedanticParserDefault:
3328  * @val:  int 0 or 1
3329  *
3330  * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
3331  *
3332  * Set and return the previous value for enabling pedantic warnings.
3333  *
3334  * Returns the last value for 0 for no substitution, 1 for substitution.
3335  */
3336 
3337 int
xmlPedanticParserDefault(int val)3338 xmlPedanticParserDefault(int val) {
3339     int old = xmlPedanticParserDefaultValue;
3340 
3341     xmlPedanticParserDefaultValue = val;
3342     return(old);
3343 }
3344 
3345 /**
3346  * xmlLineNumbersDefault:
3347  * @val:  int 0 or 1
3348  *
3349  * DEPRECATED: The modern options API always enables line numbers.
3350  *
3351  * Set and return the previous value for enabling line numbers in elements
3352  * contents. This may break on old application and is turned off by default.
3353  *
3354  * Returns the last value for 0 for no substitution, 1 for substitution.
3355  */
3356 
3357 int
xmlLineNumbersDefault(int val)3358 xmlLineNumbersDefault(int val) {
3359     int old = xmlLineNumbersDefaultValue;
3360 
3361     xmlLineNumbersDefaultValue = val;
3362     return(old);
3363 }
3364 
3365 /**
3366  * xmlSubstituteEntitiesDefault:
3367  * @val:  int 0 or 1
3368  *
3369  * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
3370  *
3371  * Set and return the previous value for default entity support.
3372  * Initially the parser always keep entity references instead of substituting
3373  * entity values in the output. This function has to be used to change the
3374  * default parser behavior
3375  * SAX::substituteEntities() has to be used for changing that on a file by
3376  * file basis.
3377  *
3378  * Returns the last value for 0 for no substitution, 1 for substitution.
3379  */
3380 
3381 int
xmlSubstituteEntitiesDefault(int val)3382 xmlSubstituteEntitiesDefault(int val) {
3383     int old = xmlSubstituteEntitiesDefaultValue;
3384 
3385     xmlSubstituteEntitiesDefaultValue = val;
3386     return(old);
3387 }
3388 
3389 /**
3390  * xmlKeepBlanksDefault:
3391  * @val:  int 0 or 1
3392  *
3393  * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
3394  *
3395  * Set and return the previous value for default blanks text nodes support.
3396  * The 1.x version of the parser used an heuristic to try to detect
3397  * ignorable white spaces. As a result the SAX callback was generating
3398  * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
3399  * using the DOM output text nodes containing those blanks were not generated.
3400  * The 2.x and later version will switch to the XML standard way and
3401  * ignorableWhitespace() are only generated when running the parser in
3402  * validating mode and when the current element doesn't allow CDATA or
3403  * mixed content.
3404  * This function is provided as a way to force the standard behavior
3405  * on 1.X libs and to switch back to the old mode for compatibility when
3406  * running 1.X client code on 2.X . Upgrade of 1.X code should be done
3407  * by using xmlIsBlankNode() commodity function to detect the "empty"
3408  * nodes generated.
3409  * This value also affect autogeneration of indentation when saving code
3410  * if blanks sections are kept, indentation is not generated.
3411  *
3412  * Returns the last value for 0 for no substitution, 1 for substitution.
3413  */
3414 
3415 int
xmlKeepBlanksDefault(int val)3416 xmlKeepBlanksDefault(int val) {
3417     int old = xmlKeepBlanksDefaultValue;
3418 
3419     xmlKeepBlanksDefaultValue = val;
3420 #ifdef LIBXML_OUTPUT_ENABLED
3421     if (!val)
3422         xmlIndentTreeOutput = 1;
3423 #endif
3424     return(old);
3425 }
3426 
3427