1 /*
2 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * [email protected]
8 */
9
10 #define IN_LIBXML
11 #include "libxml.h"
12
13 #if defined(_WIN32)
14 #define XML_DIR_SEP '\\'
15 #else
16 #define XML_DIR_SEP '/'
17 #endif
18
19 #include <string.h>
20 #include <ctype.h>
21 #include <stdlib.h>
22
23 #include <libxml/xmlmemory.h>
24 #include <libxml/tree.h>
25 #include <libxml/parser.h>
26 #include <libxml/parserInternals.h>
27 #include <libxml/entities.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/encoding.h>
30 #include <libxml/xmlIO.h>
31 #include <libxml/uri.h>
32 #include <libxml/dict.h>
33 #include <libxml/xmlsave.h>
34 #ifdef LIBXML_CATALOG_ENABLED
35 #include <libxml/catalog.h>
36 #endif
37 #include <libxml/chvalid.h>
38 #include <libxml/nanohttp.h>
39
40 #define CUR(ctxt) ctxt->input->cur
41 #define END(ctxt) ctxt->input->end
42
43 #include "private/buf.h"
44 #include "private/enc.h"
45 #include "private/error.h"
46 #include "private/io.h"
47 #include "private/parser.h"
48
49 #define XML_MAX_ERRORS 100
50
51 /*
52 * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
53 * factor of serialized output after entity expansion.
54 */
55 #define XML_MAX_AMPLIFICATION_DEFAULT 5
56
57 /*
58 * Various global defaults for parsing
59 */
60
61 /**
62 * xmlCheckVersion:
63 * @version: the include version number
64 *
65 * check the compiled lib version against the include one.
66 */
67 void
xmlCheckVersion(int version)68 xmlCheckVersion(int version) {
69 int myversion = LIBXML_VERSION;
70
71 xmlInitParser();
72
73 if ((myversion / 10000) != (version / 10000)) {
74 xmlPrintErrorMessage(
75 "Fatal: program compiled against libxml %d using libxml %d\n",
76 (version / 10000), (myversion / 10000));
77 } else if ((myversion / 100) < (version / 100)) {
78 xmlPrintErrorMessage(
79 "Warning: program compiled against libxml %d using older %d\n",
80 (version / 100), (myversion / 100));
81 }
82 }
83
84
85 /************************************************************************
86 * *
87 * Some factorized error routines *
88 * *
89 ************************************************************************/
90
91
92 /**
93 * xmlCtxtSetErrorHandler:
94 * @ctxt: an XML parser context
95 * @handler: error handler
96 * @data: data for error handler
97 *
98 * Register a callback function that will be called on errors and
99 * warnings. If handler is NULL, the error handler will be deactivated.
100 *
101 * This is the recommended way to collect errors from the parser and
102 * takes precedence over all other error reporting mechanisms.
103 * These are (in order of precedence):
104 *
105 * - per-context structured handler (xmlCtxtSetErrorHandler)
106 * - per-context structured "serror" SAX handler
107 * - global structured handler (xmlSetStructuredErrorFunc)
108 * - per-context generic "error" and "warning" SAX handlers
109 * - global generic handler (xmlSetGenericErrorFunc)
110 * - print to stderr
111 *
112 * Available since 2.13.0.
113 */
114 void
xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt,xmlStructuredErrorFunc handler,void * data)115 xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt, xmlStructuredErrorFunc handler,
116 void *data)
117 {
118 if (ctxt == NULL)
119 return;
120 ctxt->errorHandler = handler;
121 ctxt->errorCtxt = data;
122 }
123
124 /**
125 * xmlCtxtGetLastError:
126 * @ctx: an XML parser context
127 *
128 * Get the last parsing error registered.
129 *
130 * Returns NULL if no error occurred or a pointer to the error
131 */
132 const xmlError *
xmlCtxtGetLastError(void * ctx)133 xmlCtxtGetLastError(void *ctx)
134 {
135 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
136
137 if (ctxt == NULL)
138 return (NULL);
139 if (ctxt->lastError.code == XML_ERR_OK)
140 return (NULL);
141 return (&ctxt->lastError);
142 }
143
144 /**
145 * xmlCtxtResetLastError:
146 * @ctx: an XML parser context
147 *
148 * Cleanup the last global error registered. For parsing error
149 * this does not change the well-formedness result.
150 */
151 void
xmlCtxtResetLastError(void * ctx)152 xmlCtxtResetLastError(void *ctx)
153 {
154 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
155
156 if (ctxt == NULL)
157 return;
158 ctxt->errNo = XML_ERR_OK;
159 if (ctxt->lastError.code == XML_ERR_OK)
160 return;
161 xmlResetError(&ctxt->lastError);
162 }
163
164 /**
165 * xmlCtxtErrMemory:
166 * @ctxt: an XML parser context
167 *
168 * Handle an out-of-memory error.
169 *
170 * Available since 2.13.0.
171 */
172 void
xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)173 xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)
174 {
175 xmlStructuredErrorFunc schannel = NULL;
176 xmlGenericErrorFunc channel = NULL;
177 void *data;
178
179 if (ctxt == NULL)
180 return;
181
182 ctxt->errNo = XML_ERR_NO_MEMORY;
183 ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
184 ctxt->wellFormed = 0;
185 ctxt->disableSAX = 2;
186
187 if (ctxt->errorHandler) {
188 schannel = ctxt->errorHandler;
189 data = ctxt->errorCtxt;
190 } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
191 (ctxt->sax->serror != NULL)) {
192 schannel = ctxt->sax->serror;
193 data = ctxt->userData;
194 } else {
195 channel = ctxt->sax->error;
196 data = ctxt->userData;
197 }
198
199 xmlRaiseMemoryError(schannel, channel, data, XML_FROM_PARSER,
200 &ctxt->lastError);
201 }
202
203 /**
204 * xmlCtxtErrIO:
205 * @ctxt: parser context
206 * @code: xmlParserErrors code
207 * @uri: filename or URI (optional)
208 *
209 * If filename is empty, use the one from context input if available.
210 *
211 * Report an IO error to the parser context.
212 */
213 void
xmlCtxtErrIO(xmlParserCtxtPtr ctxt,int code,const char * uri)214 xmlCtxtErrIO(xmlParserCtxtPtr ctxt, int code, const char *uri)
215 {
216 const char *errstr, *msg, *str1, *str2;
217 xmlErrorLevel level;
218
219 if (ctxt == NULL)
220 return;
221
222 if (((code == XML_IO_ENOENT) ||
223 (code == XML_IO_UNKNOWN))) {
224 /*
225 * Only report a warning if a file could not be found. This should
226 * only be done for external entities, but the external entity loader
227 * of xsltproc can try multiple paths and assumes that ENOENT doesn't
228 * raise an error and aborts parsing.
229 */
230 if (ctxt->validate == 0)
231 level = XML_ERR_WARNING;
232 else
233 level = XML_ERR_ERROR;
234 } else if (code == XML_IO_NETWORK_ATTEMPT) {
235 level = XML_ERR_ERROR;
236 } else {
237 level = XML_ERR_FATAL;
238 }
239
240 errstr = xmlErrString(code);
241
242 if (uri == NULL) {
243 msg = "%s\n";
244 str1 = errstr;
245 str2 = NULL;
246 } else {
247 msg = "failed to load \"%s\": %s\n";
248 str1 = uri;
249 str2 = errstr;
250 }
251
252 xmlCtxtErr(ctxt, NULL, XML_FROM_IO, code, level,
253 (const xmlChar *) uri, NULL, NULL, 0,
254 msg, str1, str2);
255 }
256
257 static int
xmlCtxtIsCatastrophicError(xmlParserCtxtPtr ctxt)258 xmlCtxtIsCatastrophicError(xmlParserCtxtPtr ctxt) {
259 int fatal = 0;
260 int code;
261
262 if (ctxt == NULL)
263 return(1);
264
265 if (ctxt->lastError.level != XML_ERR_FATAL)
266 return(0);
267
268 code = ctxt->lastError.code;
269
270 switch (code) {
271 case XML_ERR_NO_MEMORY:
272 case XML_ERR_RESOURCE_LIMIT:
273 case XML_ERR_SYSTEM:
274 case XML_ERR_ARGUMENT:
275 case XML_ERR_INTERNAL_ERROR:
276 fatal = 1;
277 break;
278 default:
279 if ((code >= 1500) && (code <= 1599))
280 fatal = 1;
281 break;
282 }
283
284 return(fatal);
285 }
286
287 /**
288 * xmlCtxtVErr:
289 * @ctxt: a parser context
290 * @node: the current node or NULL
291 * @domain: the domain for the error
292 * @code: the code for the error
293 * @level: the xmlErrorLevel for the error
294 * @str1: extra string info
295 * @str2: extra string info
296 * @str3: extra string info
297 * @int1: extra int info
298 * @msg: the message to display/transmit
299 * @ap: extra parameters for the message display
300 *
301 * Raise a parser error.
302 */
303 void
xmlCtxtVErr(xmlParserCtxtPtr ctxt,xmlNodePtr node,xmlErrorDomain domain,xmlParserErrors code,xmlErrorLevel level,const xmlChar * str1,const xmlChar * str2,const xmlChar * str3,int int1,const char * msg,va_list ap)304 xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
305 xmlParserErrors code, xmlErrorLevel level,
306 const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
307 int int1, const char *msg, va_list ap)
308 {
309 xmlStructuredErrorFunc schannel = NULL;
310 xmlGenericErrorFunc channel = NULL;
311 void *data = NULL;
312 const char *file = NULL;
313 int line = 0;
314 int col = 0;
315 int res;
316
317 if (code == XML_ERR_NO_MEMORY) {
318 xmlCtxtErrMemory(ctxt);
319 return;
320 }
321
322 if (ctxt == NULL)
323 return;
324
325 if (PARSER_STOPPED(ctxt))
326 return;
327
328 if (level == XML_ERR_WARNING) {
329 if (ctxt->nbWarnings >= XML_MAX_ERRORS)
330 goto done;
331 ctxt->nbWarnings += 1;
332 } else {
333 /* Report at least one fatal error. */
334 if ((ctxt->nbErrors >= XML_MAX_ERRORS) &&
335 ((level < XML_ERR_FATAL) || (ctxt->wellFormed == 0)))
336 goto done;
337 ctxt->nbErrors += 1;
338 }
339
340 if (((ctxt->options & XML_PARSE_NOERROR) == 0) &&
341 ((level != XML_ERR_WARNING) ||
342 ((ctxt->options & XML_PARSE_NOWARNING) == 0))) {
343 if (ctxt->errorHandler) {
344 schannel = ctxt->errorHandler;
345 data = ctxt->errorCtxt;
346 } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
347 (ctxt->sax->serror != NULL)) {
348 schannel = ctxt->sax->serror;
349 data = ctxt->userData;
350 } else if ((domain == XML_FROM_VALID) || (domain == XML_FROM_DTD)) {
351 if (level == XML_ERR_WARNING)
352 channel = ctxt->vctxt.warning;
353 else
354 channel = ctxt->vctxt.error;
355 data = ctxt->vctxt.userData;
356 } else {
357 if (level == XML_ERR_WARNING)
358 channel = ctxt->sax->warning;
359 else
360 channel = ctxt->sax->error;
361 data = ctxt->userData;
362 }
363 }
364
365 if (ctxt->input != NULL) {
366 xmlParserInputPtr input = ctxt->input;
367
368 if ((input->filename == NULL) &&
369 (ctxt->inputNr > 1)) {
370 input = ctxt->inputTab[ctxt->inputNr - 2];
371 }
372 file = input->filename;
373 line = input->line;
374 col = input->col;
375 }
376
377 res = xmlVRaiseError(schannel, channel, data, ctxt, node, domain, code,
378 level, file, line, (const char *) str1,
379 (const char *) str2, (const char *) str3, int1, col,
380 msg, ap);
381
382 if (res < 0) {
383 xmlCtxtErrMemory(ctxt);
384 return;
385 }
386
387 done:
388 if (level >= XML_ERR_ERROR)
389 ctxt->errNo = code;
390 if (level == XML_ERR_FATAL) {
391 ctxt->wellFormed = 0;
392
393 if (xmlCtxtIsCatastrophicError(ctxt))
394 ctxt->disableSAX = 2; /* stop parser */
395 else if (ctxt->recovery == 0)
396 ctxt->disableSAX = 1;
397 }
398 }
399
400 /**
401 * xmlCtxtErr:
402 * @ctxt: a parser context
403 * @node: the current node or NULL
404 * @domain: the domain for the error
405 * @code: the code for the error
406 * @level: the xmlErrorLevel for the error
407 * @str1: extra string info
408 * @str2: extra string info
409 * @str3: extra string info
410 * @int1: extra int info
411 * @msg: the message to display/transmit
412 * @...: extra parameters for the message display
413 *
414 * Raise a parser error.
415 */
416 void
xmlCtxtErr(xmlParserCtxtPtr ctxt,xmlNodePtr node,xmlErrorDomain domain,xmlParserErrors code,xmlErrorLevel level,const xmlChar * str1,const xmlChar * str2,const xmlChar * str3,int int1,const char * msg,...)417 xmlCtxtErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
418 xmlParserErrors code, xmlErrorLevel level,
419 const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
420 int int1, const char *msg, ...)
421 {
422 va_list ap;
423
424 va_start(ap, msg);
425 xmlCtxtVErr(ctxt, node, domain, code, level,
426 str1, str2, str3, int1, msg, ap);
427 va_end(ap);
428 }
429
430 /**
431 * xmlCtxtGetStatus:
432 * @ctxt: an XML parser context
433 *
434 * Get well-formedness and validation status after parsing. Also
435 * reports catastrophic errors which are not related to parsing
436 * like out-of-memory, I/O or other errors.
437 *
438 * Available since 2.14.0.
439 *
440 * Returns a bitmask of XML_STATUS_* flags ORed together.
441 */
442 int
xmlCtxtGetStatus(xmlParserCtxt * ctxt)443 xmlCtxtGetStatus(xmlParserCtxt *ctxt) {
444 int bits = 0;
445
446 if (xmlCtxtIsCatastrophicError(ctxt)) {
447 bits |= XML_STATUS_CATASTROPHIC_ERROR |
448 XML_STATUS_NOT_WELL_FORMED |
449 XML_STATUS_NOT_NS_WELL_FORMED;
450 if ((ctxt != NULL) && (ctxt->validate))
451 bits |= XML_STATUS_DTD_VALIDATION_FAILED;
452
453 return(bits);
454 }
455
456 if (!ctxt->wellFormed)
457 bits |= XML_STATUS_NOT_WELL_FORMED;
458 if (!ctxt->nsWellFormed)
459 bits |= XML_STATUS_NOT_NS_WELL_FORMED;
460 if ((ctxt->validate) && (!ctxt->valid))
461 bits |= XML_STATUS_DTD_VALIDATION_FAILED;
462
463 return(bits);
464 }
465
466 /**
467 * xmlFatalErr:
468 * @ctxt: an XML parser context
469 * @code: the error number
470 * @info: extra information string
471 *
472 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
473 */
474 void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors code,const char * info)475 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors code, const char *info)
476 {
477 const char *errmsg;
478
479 errmsg = xmlErrString(code);
480
481 if (info == NULL) {
482 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, XML_ERR_FATAL,
483 NULL, NULL, NULL, 0, "%s\n", errmsg);
484 } else {
485 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, XML_ERR_FATAL,
486 (const xmlChar *) info, NULL, NULL, 0,
487 "%s: %s\n", errmsg, info);
488 }
489 }
490
491 /**
492 * xmlIsLetter:
493 * @c: an unicode character (int)
494 *
495 * DEPRECATED: Internal function, don't use.
496 *
497 * Check whether the character is allowed by the production
498 * [84] Letter ::= BaseChar | Ideographic
499 *
500 * Returns 0 if not, non-zero otherwise
501 */
502 int
xmlIsLetter(int c)503 xmlIsLetter(int c) {
504 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
505 }
506
507 /************************************************************************
508 * *
509 * Input handling functions for progressive parsing *
510 * *
511 ************************************************************************/
512
513 /* we need to keep enough input to show errors in context */
514 #define LINE_LEN 80
515
516 /**
517 * xmlHaltParser:
518 * @ctxt: an XML parser context
519 *
520 * Blocks further parser processing don't override error
521 * for internal use
522 */
523 void
xmlHaltParser(xmlParserCtxtPtr ctxt)524 xmlHaltParser(xmlParserCtxtPtr ctxt) {
525 if (ctxt == NULL)
526 return;
527 ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
528 ctxt->disableSAX = 2;
529 }
530
531 /**
532 * xmlParserInputRead:
533 * @in: an XML parser input
534 * @len: an indicative size for the lookahead
535 *
536 * DEPRECATED: This function was internal and is deprecated.
537 *
538 * Returns -1 as this is an error to use it.
539 */
540 int
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED,int len ATTRIBUTE_UNUSED)541 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
542 return(-1);
543 }
544
545 /**
546 * xmlParserGrow:
547 * @ctxt: an XML parser context
548 *
549 * Grow the input buffer.
550 *
551 * Returns the number of bytes read or -1 in case of error.
552 */
553 int
xmlParserGrow(xmlParserCtxtPtr ctxt)554 xmlParserGrow(xmlParserCtxtPtr ctxt) {
555 xmlParserInputPtr in = ctxt->input;
556 xmlParserInputBufferPtr buf = in->buf;
557 size_t curEnd = in->end - in->cur;
558 size_t curBase = in->cur - in->base;
559 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
560 XML_MAX_HUGE_LENGTH :
561 XML_MAX_LOOKUP_LIMIT;
562 int ret;
563
564 if (buf == NULL)
565 return(0);
566 /* Don't grow push parser buffer. */
567 if (PARSER_PROGRESSIVE(ctxt))
568 return(0);
569 /* Don't grow memory buffers. */
570 if ((buf->encoder == NULL) && (buf->readcallback == NULL))
571 return(0);
572 if (buf->error != 0)
573 return(-1);
574
575 if (curBase > maxLength) {
576 xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
577 "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
578 xmlHaltParser(ctxt);
579 return(-1);
580 }
581
582 if (curEnd >= INPUT_CHUNK)
583 return(0);
584
585 ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
586 xmlBufUpdateInput(buf->buffer, in, curBase);
587
588 if (ret < 0) {
589 xmlCtxtErrIO(ctxt, buf->error, NULL);
590 }
591
592 return(ret);
593 }
594
595 /**
596 * xmlParserInputGrow:
597 * @in: an XML parser input
598 * @len: an indicative size for the lookahead
599 *
600 * DEPRECATED: Don't use.
601 *
602 * This function increase the input for the parser. It tries to
603 * preserve pointers to the input buffer, and keep already read data
604 *
605 * Returns the amount of char read, or -1 in case of error, 0 indicate the
606 * end of this entity
607 */
608 int
xmlParserInputGrow(xmlParserInputPtr in,int len)609 xmlParserInputGrow(xmlParserInputPtr in, int len) {
610 int ret;
611 size_t indx;
612
613 if ((in == NULL) || (len < 0)) return(-1);
614 if (in->buf == NULL) return(-1);
615 if (in->base == NULL) return(-1);
616 if (in->cur == NULL) return(-1);
617 if (in->buf->buffer == NULL) return(-1);
618
619 /* Don't grow memory buffers. */
620 if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
621 return(0);
622
623 indx = in->cur - in->base;
624 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
625 return(0);
626 }
627 ret = xmlParserInputBufferGrow(in->buf, len);
628
629 in->base = xmlBufContent(in->buf->buffer);
630 if (in->base == NULL) {
631 in->base = BAD_CAST "";
632 in->cur = in->base;
633 in->end = in->base;
634 return(-1);
635 }
636 in->cur = in->base + indx;
637 in->end = xmlBufEnd(in->buf->buffer);
638
639 return(ret);
640 }
641
642 /**
643 * xmlParserShrink:
644 * @ctxt: an XML parser context
645 *
646 * Shrink the input buffer.
647 */
648 void
xmlParserShrink(xmlParserCtxtPtr ctxt)649 xmlParserShrink(xmlParserCtxtPtr ctxt) {
650 xmlParserInputPtr in = ctxt->input;
651 xmlParserInputBufferPtr buf = in->buf;
652 size_t used, res;
653
654 if (buf == NULL)
655 return;
656
657 used = in->cur - in->base;
658
659 if (used > LINE_LEN) {
660 res = xmlBufShrink(buf->buffer, used - LINE_LEN);
661
662 if (res > 0) {
663 used -= res;
664 if ((res > ULONG_MAX) ||
665 (in->consumed > ULONG_MAX - (unsigned long)res))
666 in->consumed = ULONG_MAX;
667 else
668 in->consumed += res;
669 }
670
671 xmlBufUpdateInput(buf->buffer, in, used);
672 }
673 }
674
675 /**
676 * xmlParserInputShrink:
677 * @in: an XML parser input
678 *
679 * DEPRECATED: Don't use.
680 *
681 * This function removes used input for the parser.
682 */
683 void
xmlParserInputShrink(xmlParserInputPtr in)684 xmlParserInputShrink(xmlParserInputPtr in) {
685 size_t used;
686 size_t ret;
687
688 if (in == NULL) return;
689 if (in->buf == NULL) return;
690 if (in->base == NULL) return;
691 if (in->cur == NULL) return;
692 if (in->buf->buffer == NULL) return;
693
694 used = in->cur - in->base;
695
696 if (used > LINE_LEN) {
697 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
698 if (ret > 0) {
699 used -= ret;
700 if ((ret > ULONG_MAX) ||
701 (in->consumed > ULONG_MAX - (unsigned long)ret))
702 in->consumed = ULONG_MAX;
703 else
704 in->consumed += ret;
705 }
706
707 xmlBufUpdateInput(in->buf->buffer, in, used);
708 }
709 }
710
711 /************************************************************************
712 * *
713 * UTF8 character input and related functions *
714 * *
715 ************************************************************************/
716
717 /**
718 * xmlNextChar:
719 * @ctxt: the XML parser context
720 *
721 * DEPRECATED: Internal function, do not use.
722 *
723 * Skip to the next char input char.
724 */
725
726 void
xmlNextChar(xmlParserCtxtPtr ctxt)727 xmlNextChar(xmlParserCtxtPtr ctxt)
728 {
729 const unsigned char *cur;
730 size_t avail;
731 int c;
732
733 if ((ctxt == NULL) || (ctxt->input == NULL))
734 return;
735
736 avail = ctxt->input->end - ctxt->input->cur;
737
738 if (avail < INPUT_CHUNK) {
739 xmlParserGrow(ctxt);
740 if (ctxt->input->cur >= ctxt->input->end)
741 return;
742 avail = ctxt->input->end - ctxt->input->cur;
743 }
744
745 cur = ctxt->input->cur;
746 c = *cur;
747
748 if (c < 0x80) {
749 if (c == '\n') {
750 ctxt->input->cur++;
751 ctxt->input->line++;
752 ctxt->input->col = 1;
753 } else if (c == '\r') {
754 /*
755 * 2.11 End-of-Line Handling
756 * the literal two-character sequence "#xD#xA" or a standalone
757 * literal #xD, an XML processor must pass to the application
758 * the single character #xA.
759 */
760 ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
761 ctxt->input->line++;
762 ctxt->input->col = 1;
763 return;
764 } else {
765 ctxt->input->cur++;
766 ctxt->input->col++;
767 }
768 } else {
769 ctxt->input->col++;
770
771 if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
772 goto encoding_error;
773
774 if (c < 0xe0) {
775 /* 2-byte code */
776 if (c < 0xc2)
777 goto encoding_error;
778 ctxt->input->cur += 2;
779 } else {
780 unsigned int val = (c << 8) | cur[1];
781
782 if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
783 goto encoding_error;
784
785 if (c < 0xf0) {
786 /* 3-byte code */
787 if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
788 goto encoding_error;
789 ctxt->input->cur += 3;
790 } else {
791 if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
792 goto encoding_error;
793
794 /* 4-byte code */
795 if ((val < 0xf090) || (val >= 0xf490))
796 goto encoding_error;
797 ctxt->input->cur += 4;
798 }
799 }
800 }
801
802 return;
803
804 encoding_error:
805 /* Only report the first error */
806 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
807 xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
808 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
809 }
810 ctxt->input->cur++;
811 }
812
813 /**
814 * xmlCurrentChar:
815 * @ctxt: the XML parser context
816 * @len: pointer to the length of the char read
817 *
818 * DEPRECATED: Internal function, do not use.
819 *
820 * The current char value, if using UTF-8 this may actually span multiple
821 * bytes in the input buffer. Implement the end of line normalization:
822 * 2.11 End-of-Line Handling
823 * Wherever an external parsed entity or the literal entity value
824 * of an internal parsed entity contains either the literal two-character
825 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
826 * must pass to the application the single character #xA.
827 * This behavior can conveniently be produced by normalizing all
828 * line breaks to #xA on input, before parsing.)
829 *
830 * Returns the current char value and its length
831 */
832
833 int
xmlCurrentChar(xmlParserCtxtPtr ctxt,int * len)834 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
835 const unsigned char *cur;
836 size_t avail;
837 int c;
838
839 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
840
841 avail = ctxt->input->end - ctxt->input->cur;
842
843 if (avail < INPUT_CHUNK) {
844 xmlParserGrow(ctxt);
845 avail = ctxt->input->end - ctxt->input->cur;
846 }
847
848 cur = ctxt->input->cur;
849 c = *cur;
850
851 if (c < 0x80) {
852 /* 1-byte code */
853 if (c < 0x20) {
854 /*
855 * 2.11 End-of-Line Handling
856 * the literal two-character sequence "#xD#xA" or a standalone
857 * literal #xD, an XML processor must pass to the application
858 * the single character #xA.
859 */
860 if (c == '\r') {
861 /*
862 * TODO: This function shouldn't change the 'cur' pointer
863 * as side effect, but the NEXTL macro in parser.c relies
864 * on this behavior when incrementing line numbers.
865 */
866 if (cur[1] == '\n')
867 ctxt->input->cur++;
868 *len = 1;
869 c = '\n';
870 } else if (c == 0) {
871 if (ctxt->input->cur >= ctxt->input->end) {
872 *len = 0;
873 } else {
874 *len = 1;
875 /*
876 * TODO: Null bytes should be handled by callers,
877 * but this can be tricky.
878 */
879 xmlFatalErr(ctxt, XML_ERR_INVALID_CHAR,
880 "Char 0x0 out of allowed range\n");
881 }
882 } else {
883 *len = 1;
884 }
885 } else {
886 *len = 1;
887 }
888
889 return(c);
890 } else {
891 int val;
892
893 if (avail < 2)
894 goto incomplete_sequence;
895 if ((cur[1] & 0xc0) != 0x80)
896 goto encoding_error;
897
898 if (c < 0xe0) {
899 /* 2-byte code */
900 if (c < 0xc2)
901 goto encoding_error;
902 val = (c & 0x1f) << 6;
903 val |= cur[1] & 0x3f;
904 *len = 2;
905 } else {
906 if (avail < 3)
907 goto incomplete_sequence;
908 if ((cur[2] & 0xc0) != 0x80)
909 goto encoding_error;
910
911 if (c < 0xf0) {
912 /* 3-byte code */
913 val = (c & 0xf) << 12;
914 val |= (cur[1] & 0x3f) << 6;
915 val |= cur[2] & 0x3f;
916 if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
917 goto encoding_error;
918 *len = 3;
919 } else {
920 if (avail < 4)
921 goto incomplete_sequence;
922 if ((cur[3] & 0xc0) != 0x80)
923 goto encoding_error;
924
925 /* 4-byte code */
926 val = (c & 0x0f) << 18;
927 val |= (cur[1] & 0x3f) << 12;
928 val |= (cur[2] & 0x3f) << 6;
929 val |= cur[3] & 0x3f;
930 if ((val < 0x10000) || (val >= 0x110000))
931 goto encoding_error;
932 *len = 4;
933 }
934 }
935
936 return(val);
937 }
938
939 encoding_error:
940 /* Only report the first error */
941 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
942 xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
943 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
944 }
945 *len = 1;
946 return(XML_INVALID_CHAR);
947
948 incomplete_sequence:
949 /*
950 * An encoding problem may arise from a truncated input buffer
951 * splitting a character in the middle. In that case do not raise
952 * an error but return 0. This should only happen when push parsing
953 * char data.
954 */
955 *len = 0;
956 return(0);
957 }
958
959 /**
960 * xmlStringCurrentChar:
961 * @ctxt: the XML parser context
962 * @cur: pointer to the beginning of the char
963 * @len: pointer to the length of the char read
964 *
965 * DEPRECATED: Internal function, do not use.
966 *
967 * The current char value, if using UTF-8 this may actually span multiple
968 * bytes in the input buffer.
969 *
970 * Returns the current char value and its length
971 */
972
973 int
xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,const xmlChar * cur,int * len)974 xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
975 const xmlChar *cur, int *len) {
976 int c;
977
978 if ((cur == NULL) || (len == NULL))
979 return(0);
980
981 /* cur is zero-terminated, so we can lie about its length. */
982 *len = 4;
983 c = xmlGetUTF8Char(cur, len);
984
985 return((c < 0) ? 0 : c);
986 }
987
988 /**
989 * xmlCopyCharMultiByte:
990 * @out: pointer to an array of xmlChar
991 * @val: the char value
992 *
993 * append the char value in the array
994 *
995 * Returns the number of xmlChar written
996 */
997 int
xmlCopyCharMultiByte(xmlChar * out,int val)998 xmlCopyCharMultiByte(xmlChar *out, int val) {
999 if ((out == NULL) || (val < 0)) return(0);
1000 /*
1001 * We are supposed to handle UTF8, check it's valid
1002 * From rfc2044: encoding of the Unicode values on UTF-8:
1003 *
1004 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1005 * 0000 0000-0000 007F 0xxxxxxx
1006 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1007 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1008 */
1009 if (val >= 0x80) {
1010 xmlChar *savedout = out;
1011 int bits;
1012 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1013 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1014 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1015 else {
1016 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1017 xmlAbort("xmlCopyCharMultiByte: codepoint out of range\n");
1018 #endif
1019 return(0);
1020 }
1021 for ( ; bits >= 0; bits-= 6)
1022 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1023 return (out - savedout);
1024 }
1025 *out = val;
1026 return 1;
1027 }
1028
1029 /**
1030 * xmlCopyChar:
1031 * @len: Ignored, compatibility
1032 * @out: pointer to an array of xmlChar
1033 * @val: the char value
1034 *
1035 * DEPRECATED: Don't use.
1036 *
1037 * append the char value in the array
1038 *
1039 * Returns the number of xmlChar written
1040 */
1041
1042 int
xmlCopyChar(int len ATTRIBUTE_UNUSED,xmlChar * out,int val)1043 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1044 if ((out == NULL) || (val < 0)) return(0);
1045 /* the len parameter is ignored */
1046 if (val >= 0x80) {
1047 return(xmlCopyCharMultiByte (out, val));
1048 }
1049 *out = val;
1050 return 1;
1051 }
1052
1053 /************************************************************************
1054 * *
1055 * Commodity functions to switch encodings *
1056 * *
1057 ************************************************************************/
1058
1059 /**
1060 * xmlCtxtSetCharEncConvImpl:
1061 * @ctxt: parser context
1062 * @impl: callback
1063 * @vctxt: user data
1064 *
1065 * Installs a custom implementation to convert between character
1066 * encodings.
1067 *
1068 * This bypasses legacy feature like global encoding handlers or
1069 * encoding aliases.
1070 *
1071 * Available since 2.14.0.
1072 */
1073 void
xmlCtxtSetCharEncConvImpl(xmlParserCtxtPtr ctxt,xmlCharEncConvImpl impl,void * vctxt)1074 xmlCtxtSetCharEncConvImpl(xmlParserCtxtPtr ctxt, xmlCharEncConvImpl impl,
1075 void *vctxt) {
1076 if (ctxt == NULL)
1077 return;
1078
1079 ctxt->convImpl = impl;
1080 ctxt->convCtxt = vctxt;
1081 }
1082
1083 static int
xmlDetectEBCDIC(xmlParserCtxtPtr ctxt,xmlCharEncodingHandlerPtr * hout)1084 xmlDetectEBCDIC(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr *hout) {
1085 xmlChar out[200];
1086 xmlParserInputPtr input = ctxt->input;
1087 xmlCharEncodingHandlerPtr handler;
1088 int inlen, outlen, res, i;
1089
1090 *hout = NULL;
1091
1092 /*
1093 * To detect the EBCDIC code page, we convert the first 200 bytes
1094 * to IBM037 (EBCDIC-US) and try to find the encoding declaration.
1095 */
1096 res = xmlCreateCharEncodingHandler("IBM037", /* output */ 0,
1097 ctxt->convImpl, ctxt->convCtxt, &handler);
1098 if (res != 0)
1099 return(res);
1100 outlen = sizeof(out) - 1;
1101 inlen = input->end - input->cur;
1102 res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
1103 /*
1104 * Return the EBCDIC handler if decoding failed. The error will
1105 * be reported later.
1106 */
1107 if (res < 0)
1108 goto done;
1109 out[outlen] = 0;
1110
1111 for (i = 0; i < outlen; i++) {
1112 if (out[i] == '>')
1113 break;
1114 if ((out[i] == 'e') &&
1115 (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1116 int start, cur, quote;
1117
1118 i += 8;
1119 while (IS_BLANK_CH(out[i]))
1120 i += 1;
1121 if (out[i++] != '=')
1122 break;
1123 while (IS_BLANK_CH(out[i]))
1124 i += 1;
1125 quote = out[i++];
1126 if ((quote != '\'') && (quote != '"'))
1127 break;
1128 start = i;
1129 cur = out[i];
1130 while (((cur >= 'a') && (cur <= 'z')) ||
1131 ((cur >= 'A') && (cur <= 'Z')) ||
1132 ((cur >= '0') && (cur <= '9')) ||
1133 (cur == '.') || (cur == '_') ||
1134 (cur == '-'))
1135 cur = out[++i];
1136 if (cur != quote)
1137 break;
1138 out[i] = 0;
1139 xmlCharEncCloseFunc(handler);
1140 res = xmlCreateCharEncodingHandler((char *) out + start,
1141 /* output */ 0, ctxt->convImpl, ctxt->convCtxt,
1142 &handler);
1143 if (res != 0)
1144 return(res);
1145 *hout = handler;
1146 return(0);
1147 }
1148 }
1149
1150 done:
1151 /*
1152 * Encoding handlers are stateful, so we have to recreate them.
1153 */
1154 xmlCharEncCloseFunc(handler);
1155 res = xmlCreateCharEncodingHandler("IBM037", /* output */ 0,
1156 ctxt->convImpl, ctxt->convCtxt, &handler);
1157 if (res != 0)
1158 return(res);
1159 *hout = handler;
1160 return(0);
1161 }
1162
1163 /**
1164 * xmlSwitchEncoding:
1165 * @ctxt: the parser context
1166 * @enc: the encoding value (number)
1167 *
1168 * Use encoding specified by enum to decode input data. This overrides
1169 * the encoding found in the XML declaration.
1170 *
1171 * This function can also be used to override the encoding of chunks
1172 * passed to xmlParseChunk.
1173 *
1174 * Returns 0 in case of success, -1 otherwise
1175 */
1176 int
xmlSwitchEncoding(xmlParserCtxtPtr ctxt,xmlCharEncoding enc)1177 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1178 {
1179 xmlCharEncodingHandlerPtr handler = NULL;
1180 int ret;
1181 int res;
1182
1183 if ((ctxt == NULL) || (ctxt->input == NULL))
1184 return(-1);
1185
1186 res = xmlLookupCharEncodingHandler(enc, &handler);
1187 if (res != 0) {
1188 xmlFatalErr(ctxt, res, NULL);
1189 return(-1);
1190 }
1191
1192 ret = xmlSwitchToEncoding(ctxt, handler);
1193
1194 if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1195 ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1196 }
1197
1198 return(ret);
1199 }
1200
1201 /**
1202 * xmlSwitchInputEncodingName:
1203 * @ctxt: the parser context
1204 * @input: the input strea,
1205 * @encoding: the encoding name
1206 *
1207 * Returns 0 in case of success, -1 otherwise
1208 */
1209 static int
xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,const char * encoding)1210 xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1211 const char *encoding) {
1212 xmlCharEncodingHandlerPtr handler;
1213 int res;
1214
1215 if (encoding == NULL)
1216 return(-1);
1217
1218 res = xmlCreateCharEncodingHandler(encoding, /* output */ 0,
1219 ctxt->convImpl, ctxt->convCtxt, &handler);
1220 if (res == XML_ERR_UNSUPPORTED_ENCODING) {
1221 xmlWarningMsg(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1222 "Unsupported encoding: %s\n", BAD_CAST encoding, NULL);
1223 return(-1);
1224 } else if (res != XML_ERR_OK) {
1225 xmlFatalErr(ctxt, res, encoding);
1226 return(-1);
1227 }
1228
1229 res = xmlInputSetEncodingHandler(input, handler);
1230 if (res != XML_ERR_OK) {
1231 xmlCtxtErrIO(ctxt, res, NULL);
1232 return(-1);
1233 }
1234
1235 return(0);
1236 }
1237
1238 /**
1239 * xmlSwitchEncodingName:
1240 * @ctxt: the parser context
1241 * @encoding: the encoding name
1242 *
1243 * Use specified encoding to decode input data. This overrides the
1244 * encoding found in the XML declaration.
1245 *
1246 * This function can also be used to override the encoding of chunks
1247 * passed to xmlParseChunk.
1248 *
1249 * Available since 2.13.0.
1250 *
1251 * Returns 0 in case of success, -1 otherwise
1252 */
1253 int
xmlSwitchEncodingName(xmlParserCtxtPtr ctxt,const char * encoding)1254 xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding) {
1255 if (ctxt == NULL)
1256 return(-1);
1257
1258 return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding));
1259 }
1260
1261 /**
1262 * xmlInputSetEncodingHandler:
1263 * @input: the input stream
1264 * @handler: the encoding handler
1265 *
1266 * Use encoding handler to decode input data.
1267 *
1268 * Closes the handler on error.
1269 *
1270 * Returns an xmlParserErrors code.
1271 */
1272 int
xmlInputSetEncodingHandler(xmlParserInputPtr input,xmlCharEncodingHandlerPtr handler)1273 xmlInputSetEncodingHandler(xmlParserInputPtr input,
1274 xmlCharEncodingHandlerPtr handler) {
1275 xmlParserInputBufferPtr in;
1276 xmlBufPtr buf;
1277 int code = XML_ERR_OK;
1278
1279 if ((input == NULL) || (input->buf == NULL)) {
1280 xmlCharEncCloseFunc(handler);
1281 return(XML_ERR_ARGUMENT);
1282 }
1283 in = input->buf;
1284
1285 input->flags |= XML_INPUT_HAS_ENCODING;
1286
1287 /*
1288 * UTF-8 requires no encoding handler.
1289 */
1290 if ((handler != NULL) &&
1291 (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1292 xmlCharEncCloseFunc(handler);
1293 handler = NULL;
1294 }
1295
1296 if (in->encoder == handler)
1297 return(XML_ERR_OK);
1298
1299 if (in->encoder != NULL) {
1300 /*
1301 * Switching encodings during parsing is a really bad idea,
1302 * but Chromium can switch between ISO-8859-1 and UTF-16 before
1303 * separate calls to xmlParseChunk.
1304 *
1305 * TODO: We should check whether the "raw" input buffer is empty and
1306 * convert the old content using the old encoder.
1307 */
1308
1309 xmlCharEncCloseFunc(in->encoder);
1310 in->encoder = handler;
1311 return(XML_ERR_OK);
1312 }
1313
1314 buf = xmlBufCreate(XML_IO_BUFFER_SIZE);
1315 if (buf == NULL) {
1316 xmlCharEncCloseFunc(handler);
1317 return(XML_ERR_NO_MEMORY);
1318 }
1319
1320 in->encoder = handler;
1321 in->raw = in->buffer;
1322 in->buffer = buf;
1323
1324 /*
1325 * Is there already some content down the pipe to convert ?
1326 */
1327 if (input->end > input->base) {
1328 size_t processed;
1329 size_t nbchars;
1330 int res;
1331
1332 /*
1333 * Shrink the current input buffer.
1334 * Move it as the raw buffer and create a new input buffer
1335 */
1336 processed = input->cur - input->base;
1337 xmlBufShrink(in->raw, processed);
1338 input->consumed += processed;
1339 in->rawconsumed = processed;
1340
1341 nbchars = 4000 /* MINLEN */;
1342 res = xmlCharEncInput(in, &nbchars);
1343 if (res < 0)
1344 code = in->error;
1345 }
1346
1347 xmlBufResetInput(in->buffer, input);
1348
1349 return(code);
1350 }
1351
1352 /**
1353 * xmlSwitchInputEncoding:
1354 * @ctxt: the parser context, only for error reporting
1355 * @input: the input stream
1356 * @handler: the encoding handler
1357 *
1358 * DEPRECATED: Internal function, don't use.
1359 *
1360 * Use encoding handler to decode input data.
1361 *
1362 * Returns 0 in case of success, -1 otherwise
1363 */
1364 int
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,xmlCharEncodingHandlerPtr handler)1365 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1366 xmlCharEncodingHandlerPtr handler) {
1367 int code = xmlInputSetEncodingHandler(input, handler);
1368
1369 if (code != XML_ERR_OK) {
1370 xmlCtxtErrIO(ctxt, code, NULL);
1371 return(-1);
1372 }
1373
1374 return(0);
1375 }
1376
1377 /**
1378 * xmlSwitchToEncoding:
1379 * @ctxt: the parser context
1380 * @handler: the encoding handler
1381 *
1382 * Use encoding handler to decode input data.
1383 *
1384 * This function can be used to enforce the encoding of chunks passed
1385 * to xmlParseChunk.
1386 *
1387 * Returns 0 in case of success, -1 otherwise
1388 */
1389 int
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt,xmlCharEncodingHandlerPtr handler)1390 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1391 {
1392 int code;
1393
1394 if (ctxt == NULL)
1395 return(-1);
1396
1397 code = xmlInputSetEncodingHandler(ctxt->input, handler);
1398 if (code != XML_ERR_OK) {
1399 xmlCtxtErrIO(ctxt, code, NULL);
1400 return(-1);
1401 }
1402
1403 return(0);
1404 }
1405
1406 /**
1407 * xmlDetectEncoding:
1408 * @ctxt: the parser context
1409 *
1410 * Handle optional BOM, detect and switch to encoding.
1411 *
1412 * Assumes that there are at least four bytes in the input buffer.
1413 */
1414 void
xmlDetectEncoding(xmlParserCtxtPtr ctxt)1415 xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
1416 const xmlChar *in;
1417 xmlCharEncoding enc;
1418 int bomSize;
1419 int autoFlag = 0;
1420
1421 if (xmlParserGrow(ctxt) < 0)
1422 return;
1423 in = ctxt->input->cur;
1424 if (ctxt->input->end - in < 4)
1425 return;
1426
1427 if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1428 /*
1429 * If the encoding was already set, only skip the BOM which was
1430 * possibly decoded to UTF-8.
1431 */
1432 if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1433 ctxt->input->cur += 3;
1434 }
1435
1436 return;
1437 }
1438
1439 enc = XML_CHAR_ENCODING_NONE;
1440 bomSize = 0;
1441
1442 switch (in[0]) {
1443 case 0x00:
1444 if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1445 enc = XML_CHAR_ENCODING_UCS4BE;
1446 autoFlag = XML_INPUT_AUTO_OTHER;
1447 } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1448 enc = XML_CHAR_ENCODING_UTF16BE;
1449 autoFlag = XML_INPUT_AUTO_UTF16BE;
1450 }
1451 break;
1452
1453 case 0x3C:
1454 if (in[1] == 0x00) {
1455 if ((in[2] == 0x00) && (in[3] == 0x00)) {
1456 enc = XML_CHAR_ENCODING_UCS4LE;
1457 autoFlag = XML_INPUT_AUTO_OTHER;
1458 } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1459 enc = XML_CHAR_ENCODING_UTF16LE;
1460 autoFlag = XML_INPUT_AUTO_UTF16LE;
1461 }
1462 }
1463 break;
1464
1465 case 0x4C:
1466 if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1467 enc = XML_CHAR_ENCODING_EBCDIC;
1468 autoFlag = XML_INPUT_AUTO_OTHER;
1469 }
1470 break;
1471
1472 case 0xEF:
1473 if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1474 enc = XML_CHAR_ENCODING_UTF8;
1475 autoFlag = XML_INPUT_AUTO_UTF8;
1476 bomSize = 3;
1477 }
1478 break;
1479
1480 case 0xFE:
1481 if (in[1] == 0xFF) {
1482 enc = XML_CHAR_ENCODING_UTF16BE;
1483 autoFlag = XML_INPUT_AUTO_UTF16BE;
1484 bomSize = 2;
1485 }
1486 break;
1487
1488 case 0xFF:
1489 if (in[1] == 0xFE) {
1490 enc = XML_CHAR_ENCODING_UTF16LE;
1491 autoFlag = XML_INPUT_AUTO_UTF16LE;
1492 bomSize = 2;
1493 }
1494 break;
1495 }
1496
1497 if (bomSize > 0) {
1498 ctxt->input->cur += bomSize;
1499 }
1500
1501 if (enc != XML_CHAR_ENCODING_NONE) {
1502 ctxt->input->flags |= autoFlag;
1503
1504 if (enc == XML_CHAR_ENCODING_EBCDIC) {
1505 xmlCharEncodingHandlerPtr handler;
1506 int res;
1507
1508 res = xmlDetectEBCDIC(ctxt, &handler);
1509 if (res != XML_ERR_OK) {
1510 xmlFatalErr(ctxt, res, "detecting EBCDIC\n");
1511 } else {
1512 xmlSwitchToEncoding(ctxt, handler);
1513 }
1514 } else {
1515 xmlSwitchEncoding(ctxt, enc);
1516 }
1517 }
1518 }
1519
1520 /**
1521 * xmlSetDeclaredEncoding:
1522 * @ctxt: the parser context
1523 * @encoding: declared encoding
1524 *
1525 * Set the encoding from a declaration in the document.
1526 *
1527 * If no encoding was set yet, switch the encoding. Otherwise, only warn
1528 * about encoding mismatches.
1529 *
1530 * Takes ownership of 'encoding'.
1531 */
1532 void
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt,xmlChar * encoding)1533 xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
1534 if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1535 ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1536 xmlCharEncodingHandlerPtr handler;
1537 int res;
1538
1539 /*
1540 * xmlSwitchEncodingName treats unsupported encodings as
1541 * warnings, but we want it to be an error in an encoding
1542 * declaration.
1543 */
1544 res = xmlCreateCharEncodingHandler((const char *) encoding,
1545 /* output */ 0, ctxt->convImpl, ctxt->convCtxt, &handler);
1546 if (res != XML_ERR_OK) {
1547 xmlFatalErr(ctxt, res, (const char *) encoding);
1548 xmlFree(encoding);
1549 return;
1550 }
1551
1552 res = xmlInputSetEncodingHandler(ctxt->input, handler);
1553 if (res != XML_ERR_OK) {
1554 xmlCtxtErrIO(ctxt, res, NULL);
1555 xmlFree(encoding);
1556 return;
1557 }
1558
1559 ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1560 } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1561 static const char *allowedUTF8[] = {
1562 "UTF-8", "UTF8", NULL
1563 };
1564 static const char *allowedUTF16LE[] = {
1565 "UTF-16", "UTF-16LE", "UTF16", NULL
1566 };
1567 static const char *allowedUTF16BE[] = {
1568 "UTF-16", "UTF-16BE", "UTF16", NULL
1569 };
1570 const char **allowed = NULL;
1571 const char *autoEnc = NULL;
1572
1573 switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1574 case XML_INPUT_AUTO_UTF8:
1575 allowed = allowedUTF8;
1576 autoEnc = "UTF-8";
1577 break;
1578 case XML_INPUT_AUTO_UTF16LE:
1579 allowed = allowedUTF16LE;
1580 autoEnc = "UTF-16LE";
1581 break;
1582 case XML_INPUT_AUTO_UTF16BE:
1583 allowed = allowedUTF16BE;
1584 autoEnc = "UTF-16BE";
1585 break;
1586 }
1587
1588 if (allowed != NULL) {
1589 const char **p;
1590 int match = 0;
1591
1592 for (p = allowed; *p != NULL; p++) {
1593 if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1594 match = 1;
1595 break;
1596 }
1597 }
1598
1599 if (match == 0) {
1600 xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1601 "Encoding '%s' doesn't match "
1602 "auto-detected '%s'\n",
1603 encoding, BAD_CAST autoEnc);
1604 xmlFree(encoding);
1605 encoding = xmlStrdup(BAD_CAST autoEnc);
1606 if (encoding == NULL)
1607 xmlCtxtErrMemory(ctxt);
1608 }
1609 }
1610 }
1611
1612 if (ctxt->encoding != NULL)
1613 xmlFree((xmlChar *) ctxt->encoding);
1614 ctxt->encoding = encoding;
1615 }
1616
1617 /**
1618 * xmlCtxtGetDeclaredEncoding:
1619 * ctxt: parser context
1620 *
1621 * Available since 2.14.0.
1622 *
1623 * Returns the encoding from the encoding declaration. This can differ
1624 * from the actual encoding.
1625 */
1626 const xmlChar *
xmlCtxtGetDeclaredEncoding(xmlParserCtxtPtr ctxt)1627 xmlCtxtGetDeclaredEncoding(xmlParserCtxtPtr ctxt) {
1628 if (ctxt == NULL)
1629 return(NULL);
1630
1631 return(ctxt->encoding);
1632 }
1633
1634 /**
1635 * xmlGetActualEncoding:
1636 * @ctxt: the parser context
1637 *
1638 * Returns the actual used to parse the document. This can differ from
1639 * the declared encoding.
1640 */
1641 const xmlChar *
xmlGetActualEncoding(xmlParserCtxtPtr ctxt)1642 xmlGetActualEncoding(xmlParserCtxtPtr ctxt) {
1643 const xmlChar *encoding = NULL;
1644
1645 if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
1646 (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
1647 /* Preserve encoding exactly */
1648 encoding = ctxt->encoding;
1649 } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
1650 encoding = BAD_CAST ctxt->input->buf->encoder->name;
1651 } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1652 encoding = BAD_CAST "UTF-8";
1653 }
1654
1655 return(encoding);
1656 }
1657
1658 /************************************************************************
1659 * *
1660 * Commodity functions to handle entities processing *
1661 * *
1662 ************************************************************************/
1663
1664 /**
1665 * xmlFreeInputStream:
1666 * @input: an xmlParserInputPtr
1667 *
1668 * Free up an input stream.
1669 */
1670 void
xmlFreeInputStream(xmlParserInputPtr input)1671 xmlFreeInputStream(xmlParserInputPtr input) {
1672 if (input == NULL) return;
1673
1674 if (input->filename != NULL) xmlFree((char *) input->filename);
1675 if (input->version != NULL) xmlFree((char *) input->version);
1676 if ((input->free != NULL) && (input->base != NULL))
1677 input->free((xmlChar *) input->base);
1678 if (input->buf != NULL)
1679 xmlFreeParserInputBuffer(input->buf);
1680 xmlFree(input);
1681 }
1682
1683 /**
1684 * xmlNewInputStream:
1685 * @ctxt: an XML parser context
1686 *
1687 * DEPRECATED: Use xmlNewInputFromUrl or similar functions.
1688 *
1689 * Create a new input stream structure.
1690 *
1691 * Returns the new input stream or NULL
1692 */
1693 xmlParserInputPtr
xmlNewInputStream(xmlParserCtxtPtr ctxt)1694 xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1695 xmlParserInputPtr input;
1696
1697 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1698 if (input == NULL) {
1699 xmlCtxtErrMemory(ctxt);
1700 return(NULL);
1701 }
1702 memset(input, 0, sizeof(xmlParserInput));
1703 input->line = 1;
1704 input->col = 1;
1705
1706 return(input);
1707 }
1708
1709 /**
1710 * xmlCtxtNewInputFromUrl:
1711 * @ctxt: parser context
1712 * @url: filename or URL
1713 * @publicId: publid ID from doctype (optional)
1714 * @encoding: character encoding (optional)
1715 * @flags: unused, pass 0
1716 *
1717 * Creates a new parser input from the filesystem, the network or
1718 * a user-defined resource loader.
1719 *
1720 * Returns a new parser input.
1721 */
1722 xmlParserInputPtr
xmlCtxtNewInputFromUrl(xmlParserCtxtPtr ctxt,const char * url,const char * publicId,const char * encoding,int flags ATTRIBUTE_UNUSED)1723 xmlCtxtNewInputFromUrl(xmlParserCtxtPtr ctxt, const char *url,
1724 const char *publicId, const char *encoding,
1725 int flags ATTRIBUTE_UNUSED) {
1726 xmlParserInputPtr input;
1727
1728 if ((ctxt == NULL) || (url == NULL))
1729 return(NULL);
1730
1731 input = xmlLoadResource(ctxt, url, publicId, XML_RESOURCE_MAIN_DOCUMENT);
1732 if (input == NULL)
1733 return(NULL);
1734
1735 if (encoding != NULL)
1736 xmlSwitchInputEncodingName(ctxt, input, encoding);
1737
1738 return(input);
1739 }
1740
1741 /**
1742 * xmlNewInputInternal:
1743 * @buf: parser input buffer
1744 * @filename: filename or URL
1745 *
1746 * Internal helper function.
1747 *
1748 * Returns a new parser input.
1749 */
1750 static xmlParserInputPtr
xmlNewInputInternal(xmlParserInputBufferPtr buf,const char * filename)1751 xmlNewInputInternal(xmlParserInputBufferPtr buf, const char *filename) {
1752 xmlParserInputPtr input;
1753
1754 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1755 if (input == NULL) {
1756 xmlFreeParserInputBuffer(buf);
1757 return(NULL);
1758 }
1759 memset(input, 0, sizeof(xmlParserInput));
1760 input->line = 1;
1761 input->col = 1;
1762
1763 input->buf = buf;
1764 xmlBufResetInput(input->buf->buffer, input);
1765
1766 if (filename != NULL) {
1767 input->filename = xmlMemStrdup(filename);
1768 if (input->filename == NULL) {
1769 xmlFreeInputStream(input);
1770 return(NULL);
1771 }
1772 }
1773
1774 return(input);
1775 }
1776
1777 /**
1778 * xmlNewInputFromMemory:
1779 * @url: base URL (optional)
1780 * @mem: pointer to char array
1781 * @size: size of array
1782 * @flags: optimization hints
1783 *
1784 * Creates a new parser input to read from a memory area.
1785 *
1786 * @url is used as base to resolve external entities and for
1787 * error reporting.
1788 *
1789 * If the XML_INPUT_BUF_STATIC flag is set, the memory area must
1790 * stay unchanged until parsing has finished. This can avoid
1791 * temporary copies.
1792 *
1793 * If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory
1794 * area must contain a zero byte after the buffer at position @size.
1795 * This can avoid temporary copies.
1796 *
1797 * Available since 2.14.0.
1798 *
1799 * Returns a new parser input or NULL if a memory allocation failed.
1800 */
1801 xmlParserInputPtr
xmlNewInputFromMemory(const char * url,const void * mem,size_t size,int flags)1802 xmlNewInputFromMemory(const char *url, const void *mem, size_t size,
1803 int flags) {
1804 xmlParserInputBufferPtr buf;
1805
1806 if (mem == NULL)
1807 return(NULL);
1808
1809 buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE);
1810 if (buf == NULL)
1811 return(NULL);
1812
1813 return(xmlNewInputInternal(buf, url));
1814 }
1815
1816 /**
1817 * xmlCtxtNewInputFromMemory:
1818 * @ctxt: parser context
1819 * @url: base URL (optional)
1820 * @mem: pointer to char array
1821 * @size: size of array
1822 * @encoding: character encoding (optional)
1823 * @flags: optimization hints
1824 *
1825 * Returns a new parser input or NULL in case of error.
1826 */
1827 xmlParserInputPtr
xmlCtxtNewInputFromMemory(xmlParserCtxtPtr ctxt,const char * url,const void * mem,size_t size,const char * encoding,int flags)1828 xmlCtxtNewInputFromMemory(xmlParserCtxtPtr ctxt, const char *url,
1829 const void *mem, size_t size,
1830 const char *encoding, int flags) {
1831 xmlParserInputPtr input;
1832
1833 if ((ctxt == NULL) || (mem == NULL))
1834 return(NULL);
1835
1836 input = xmlNewInputFromMemory(url, mem, size, flags);
1837 if (input == NULL) {
1838 xmlCtxtErrMemory(ctxt);
1839 return(NULL);
1840 }
1841
1842 if (encoding != NULL)
1843 xmlSwitchInputEncodingName(ctxt, input, encoding);
1844
1845 return(input);
1846 }
1847
1848 /**
1849 * xmlNewInputFromString:
1850 * @url: base URL (optional)
1851 * @str: zero-terminated string
1852 * @flags: optimization hints
1853 *
1854 * Creates a new parser input to read from a zero-terminated string.
1855 *
1856 * @url is used as base to resolve external entities and for
1857 * error reporting.
1858 *
1859 * If the XML_INPUT_BUF_STATIC flag is set, the string must
1860 * stay unchanged until parsing has finished. This can avoid
1861 * temporary copies.
1862 *
1863 * Available since 2.14.0.
1864 *
1865 * Returns a new parser input or NULL if a memory allocation failed.
1866 */
1867 xmlParserInputPtr
xmlNewInputFromString(const char * url,const char * str,int flags)1868 xmlNewInputFromString(const char *url, const char *str, int flags) {
1869 xmlParserInputBufferPtr buf;
1870
1871 if (str == NULL)
1872 return(NULL);
1873
1874 buf = xmlNewInputBufferString(str, flags);
1875 if (buf == NULL)
1876 return(NULL);
1877
1878 return(xmlNewInputInternal(buf, url));
1879 }
1880
1881 /**
1882 * xmlCtxtNewInputFromString:
1883 * @ctxt: parser context
1884 * @url: base URL (optional)
1885 * @str: zero-terminated string
1886 * @encoding: character encoding (optional)
1887 * @flags: optimization hints
1888 *
1889 * Returns a new parser input.
1890 */
1891 xmlParserInputPtr
xmlCtxtNewInputFromString(xmlParserCtxtPtr ctxt,const char * url,const char * str,const char * encoding,int flags)1892 xmlCtxtNewInputFromString(xmlParserCtxtPtr ctxt, const char *url,
1893 const char *str, const char *encoding, int flags) {
1894 xmlParserInputPtr input;
1895
1896 if ((ctxt == NULL) || (str == NULL))
1897 return(NULL);
1898
1899 input = xmlNewInputFromString(url, str, flags);
1900 if (input == NULL) {
1901 xmlCtxtErrMemory(ctxt);
1902 return(NULL);
1903 }
1904
1905 if (encoding != NULL)
1906 xmlSwitchInputEncodingName(ctxt, input, encoding);
1907
1908 return(input);
1909 }
1910
1911 /**
1912 * xmlNewInputFromFd:
1913 * @url: base URL (optional)
1914 * @fd: file descriptor
1915 * @flags: unused, pass 0
1916 *
1917 * Creates a new parser input to read from a zero-terminated string.
1918 *
1919 * @url is used as base to resolve external entities and for
1920 * error reporting.
1921 *
1922 * @fd is closed after parsing has finished.
1923 *
1924 * Available since 2.14.0.
1925 *
1926 * Returns a new parser input or NULL if a memory allocation failed.
1927 */
1928 xmlParserInputPtr
xmlNewInputFromFd(const char * url,int fd,int flags ATTRIBUTE_UNUSED)1929 xmlNewInputFromFd(const char *url, int fd, int flags ATTRIBUTE_UNUSED) {
1930 xmlParserInputBufferPtr buf;
1931
1932 if (fd < 0)
1933 return(NULL);
1934
1935 buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
1936 if (buf == NULL)
1937 return(NULL);
1938
1939 return(xmlNewInputInternal(buf, url));
1940 }
1941
1942 /**
1943 * xmlCtxtNewInputFromFd:
1944 * @ctxt: parser context
1945 * @url: base URL (optional)
1946 * @fd: file descriptor
1947 * @encoding: character encoding (optional)
1948 * @flags: unused, pass 0
1949 *
1950 * Returns a new parser input.
1951 */
1952 xmlParserInputPtr
xmlCtxtNewInputFromFd(xmlParserCtxtPtr ctxt,const char * url,int fd,const char * encoding,int flags)1953 xmlCtxtNewInputFromFd(xmlParserCtxtPtr ctxt, const char *url,
1954 int fd, const char *encoding, int flags) {
1955 xmlParserInputPtr input;
1956
1957 if ((ctxt == NULL) || (fd < 0))
1958 return(NULL);
1959
1960 input = xmlNewInputFromFd(url, fd, flags);
1961 if (input == NULL) {
1962 xmlCtxtErrMemory(ctxt);
1963 return(NULL);
1964 }
1965
1966 if (encoding != NULL)
1967 xmlSwitchInputEncodingName(ctxt, input, encoding);
1968
1969 return(input);
1970 }
1971
1972 /**
1973 * xmlNewInputFromIO:
1974 * @url: base URL (optional)
1975 * @ioRead: read callback
1976 * @ioClose: close callback (optional)
1977 * @ioCtxt: IO context
1978 * @flags: unused, pass 0
1979 *
1980 * Creates a new parser input to read from input callbacks and
1981 * cintext.
1982 *
1983 * @url is used as base to resolve external entities and for
1984 * error reporting.
1985 *
1986 * @ioRead is called to read new data into a provided buffer.
1987 * It must return the number of bytes written into the buffer
1988 * ot a negative xmlParserErrors code on failure.
1989 *
1990 * @ioClose is called after parsing has finished.
1991 *
1992 * @ioCtxt is an opaque pointer passed to the callbacks.
1993 *
1994 * Available since 2.14.0.
1995 *
1996 * Returns a new parser input or NULL if a memory allocation failed.
1997 */
1998 xmlParserInputPtr
xmlNewInputFromIO(const char * url,xmlInputReadCallback ioRead,xmlInputCloseCallback ioClose,void * ioCtxt,int flags ATTRIBUTE_UNUSED)1999 xmlNewInputFromIO(const char *url, xmlInputReadCallback ioRead,
2000 xmlInputCloseCallback ioClose, void *ioCtxt,
2001 int flags ATTRIBUTE_UNUSED) {
2002 xmlParserInputBufferPtr buf;
2003
2004 if (ioRead == NULL)
2005 return(NULL);
2006
2007 buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2008 if (buf == NULL) {
2009 if (ioClose != NULL)
2010 ioClose(ioCtxt);
2011 return(NULL);
2012 }
2013
2014 buf->context = ioCtxt;
2015 buf->readcallback = ioRead;
2016 buf->closecallback = ioClose;
2017
2018 return(xmlNewInputInternal(buf, url));
2019 }
2020
2021 /**
2022 * xmlCtxtNewInputFromIO:
2023 * @ctxt: parser context
2024 * @url: base URL (optional)
2025 * @ioRead: read callback
2026 * @ioClose: close callback (optional)
2027 * @ioCtxt: IO context
2028 * @encoding: character encoding (optional)
2029 * @flags: unused, pass 0
2030 *
2031 * Returns a new parser input.
2032 */
2033 xmlParserInputPtr
xmlCtxtNewInputFromIO(xmlParserCtxtPtr ctxt,const char * url,xmlInputReadCallback ioRead,xmlInputCloseCallback ioClose,void * ioCtxt,const char * encoding,int flags)2034 xmlCtxtNewInputFromIO(xmlParserCtxtPtr ctxt, const char *url,
2035 xmlInputReadCallback ioRead,
2036 xmlInputCloseCallback ioClose,
2037 void *ioCtxt, const char *encoding, int flags) {
2038 xmlParserInputPtr input;
2039
2040 if ((ctxt == NULL) || (ioRead == NULL))
2041 return(NULL);
2042
2043 input = xmlNewInputFromIO(url, ioRead, ioClose, ioCtxt, flags);
2044 if (input == NULL) {
2045 xmlCtxtErrMemory(ctxt);
2046 return(NULL);
2047 }
2048
2049 if (encoding != NULL)
2050 xmlSwitchInputEncodingName(ctxt, input, encoding);
2051
2052 return(input);
2053 }
2054
2055 /**
2056 * xmlNewPushInput:
2057 * @url: base URL (optional)
2058 * @chunk: pointer to char array
2059 * @size: size of array
2060 *
2061 * Creates a new parser input for a push parser.
2062 *
2063 * Returns a new parser input or NULL if a memory allocation failed.
2064 */
2065 xmlParserInputPtr
xmlNewPushInput(const char * url,const char * chunk,int size)2066 xmlNewPushInput(const char *url, const char *chunk, int size) {
2067 xmlParserInputBufferPtr buf;
2068 xmlParserInputPtr input;
2069
2070 buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2071 if (buf == NULL)
2072 return(NULL);
2073
2074 input = xmlNewInputInternal(buf, url);
2075 if (input == NULL)
2076 return(NULL);
2077
2078 input->flags |= XML_INPUT_PROGRESSIVE;
2079
2080 if ((size > 0) && (chunk != NULL)) {
2081 int res;
2082
2083 res = xmlParserInputBufferPush(input->buf, size, chunk);
2084 xmlBufResetInput(input->buf->buffer, input);
2085 if (res < 0) {
2086 xmlFreeInputStream(input);
2087 return(NULL);
2088 }
2089 }
2090
2091 return(input);
2092 }
2093
2094 /**
2095 * xmlNewIOInputStream:
2096 * @ctxt: an XML parser context
2097 * @buf: an input buffer
2098 * @enc: the charset encoding if known
2099 *
2100 * Create a new input stream structure encapsulating the @input into
2101 * a stream suitable for the parser.
2102 *
2103 * Returns the new input stream or NULL
2104 */
2105 xmlParserInputPtr
xmlNewIOInputStream(xmlParserCtxtPtr ctxt,xmlParserInputBufferPtr buf,xmlCharEncoding enc)2106 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf,
2107 xmlCharEncoding enc) {
2108 xmlParserInputPtr input;
2109 const char *encoding;
2110
2111 if ((ctxt == NULL) || (buf == NULL))
2112 return(NULL);
2113
2114 input = xmlNewInputInternal(buf, NULL);
2115 if (input == NULL) {
2116 xmlCtxtErrMemory(ctxt);
2117 return(NULL);
2118 }
2119
2120 encoding = xmlGetCharEncodingName(enc);
2121 if (encoding != NULL)
2122 xmlSwitchInputEncodingName(ctxt, input, encoding);
2123
2124 return(input);
2125 }
2126
2127 /**
2128 * xmlNewEntityInputStream:
2129 * @ctxt: an XML parser context
2130 * @ent: an Entity pointer
2131 *
2132 * DEPRECATED: Internal function, do not use.
2133 *
2134 * Create a new input stream based on an xmlEntityPtr
2135 *
2136 * Returns the new input stream or NULL
2137 */
2138 xmlParserInputPtr
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr ent)2139 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
2140 xmlParserInputPtr input;
2141
2142 if ((ctxt == NULL) || (ent == NULL))
2143 return(NULL);
2144
2145 if (ent->content != NULL) {
2146 input = xmlCtxtNewInputFromString(ctxt, NULL,
2147 (const char *) ent->content, NULL, XML_INPUT_BUF_STATIC);
2148 } else if (ent->URI != NULL) {
2149 xmlResourceType rtype;
2150
2151 if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY)
2152 rtype = XML_RESOURCE_PARAMETER_ENTITY;
2153 else
2154 rtype = XML_RESOURCE_GENERAL_ENTITY;
2155
2156 input = xmlLoadResource(ctxt, (char *) ent->URI,
2157 (char *) ent->ExternalID, rtype);
2158 } else {
2159 return(NULL);
2160 }
2161
2162 if (input == NULL)
2163 return(NULL);
2164
2165 input->entity = ent;
2166
2167 return(input);
2168 }
2169
2170 /**
2171 * xmlNewStringInputStream:
2172 * @ctxt: an XML parser context
2173 * @buffer: an memory buffer
2174 *
2175 * DEPRECATED: Use xmlNewInputFromString.
2176 *
2177 * Create a new input stream based on a memory buffer.
2178 *
2179 * Returns the new input stream
2180 */
2181 xmlParserInputPtr
xmlNewStringInputStream(xmlParserCtxtPtr ctxt,const xmlChar * buffer)2182 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2183 return(xmlCtxtNewInputFromString(ctxt, NULL, (const char *) buffer,
2184 NULL, 0));
2185 }
2186
2187
2188 /****************************************************************
2189 * *
2190 * External entities loading *
2191 * *
2192 ****************************************************************/
2193
2194 #ifdef LIBXML_CATALOG_ENABLED
2195
2196 /**
2197 * xmlResolveResourceFromCatalog:
2198 * @URL: the URL for the entity to load
2199 * @ID: the System ID for the entity to load
2200 * @ctxt: the context in which the entity is called or NULL
2201 *
2202 * Resolves the URL and ID against the appropriate catalog.
2203 * This function is used by xmlDefaultExternalEntityLoader and
2204 * xmlNoNetExternalEntityLoader.
2205 *
2206 * Returns a new allocated URL, or NULL.
2207 */
2208 static xmlChar *
xmlResolveResourceFromCatalog(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2209 xmlResolveResourceFromCatalog(const char *URL, const char *ID,
2210 xmlParserCtxtPtr ctxt) {
2211 xmlChar *resource = NULL;
2212 xmlCatalogAllow pref;
2213 int allowLocal = 0;
2214 int allowGlobal = 0;
2215
2216 /*
2217 * If the resource doesn't exists as a file,
2218 * try to load it from the resource pointed in the catalogs
2219 */
2220 pref = xmlCatalogGetDefaults();
2221
2222 if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
2223 ((pref == XML_CATA_ALLOW_ALL) ||
2224 (pref == XML_CATA_ALLOW_DOCUMENT)))
2225 allowLocal = 1;
2226
2227 if (((ctxt == NULL) ||
2228 ((ctxt->options & XML_PARSE_NO_SYS_CATALOG) == 0)) &&
2229 ((pref == XML_CATA_ALLOW_ALL) ||
2230 (pref == XML_CATA_ALLOW_GLOBAL)))
2231 allowGlobal = 1;
2232
2233 if ((pref != XML_CATA_ALLOW_NONE) && (!xmlNoNetExists(URL))) {
2234 /*
2235 * Do a local lookup
2236 */
2237 if (allowLocal) {
2238 resource = xmlCatalogLocalResolve(ctxt->catalogs,
2239 (const xmlChar *)ID,
2240 (const xmlChar *)URL);
2241 }
2242 /*
2243 * Try a global lookup
2244 */
2245 if ((resource == NULL) && (allowGlobal)) {
2246 resource = xmlCatalogResolve((const xmlChar *)ID,
2247 (const xmlChar *)URL);
2248 }
2249 if ((resource == NULL) && (URL != NULL))
2250 resource = xmlStrdup((const xmlChar *) URL);
2251
2252 /*
2253 * TODO: do an URI lookup on the reference
2254 */
2255 if ((resource != NULL) && (!xmlNoNetExists((const char *)resource))) {
2256 xmlChar *tmp = NULL;
2257
2258 if (allowLocal) {
2259 tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource);
2260 }
2261 if ((tmp == NULL) && (allowGlobal)) {
2262 tmp = xmlCatalogResolveURI(resource);
2263 }
2264
2265 if (tmp != NULL) {
2266 xmlFree(resource);
2267 resource = tmp;
2268 }
2269 }
2270 }
2271
2272 return resource;
2273 }
2274
2275 #endif
2276
2277 #ifdef LIBXML_HTTP_ENABLED
2278 static int
xmlCheckHTTPInputInternal(xmlParserInputPtr input)2279 xmlCheckHTTPInputInternal(xmlParserInputPtr input) {
2280 const char *encoding;
2281 const char *redir;
2282 const char *mime;
2283 int code;
2284
2285 if ((input == NULL) || (input->buf == NULL) ||
2286 (input->buf->readcallback != xmlIOHTTPRead) ||
2287 (input->buf->context == NULL))
2288 return(XML_ERR_OK);
2289
2290 code = xmlNanoHTTPReturnCode(input->buf->context);
2291 if (code >= 400) {
2292 /* fatal error */
2293 return(XML_IO_LOAD_ERROR);
2294 }
2295
2296 mime = xmlNanoHTTPMimeType(input->buf->context);
2297 if ((xmlStrstr(BAD_CAST mime, BAD_CAST "/xml")) ||
2298 (xmlStrstr(BAD_CAST mime, BAD_CAST "+xml"))) {
2299 encoding = xmlNanoHTTPEncoding(input->buf->context);
2300 if (encoding != NULL) {
2301 xmlCharEncodingHandlerPtr handler;
2302 int res;
2303
2304 res = xmlOpenCharEncodingHandler(encoding, /* output */ 0,
2305 &handler);
2306 if (res == 0)
2307 xmlInputSetEncodingHandler(input, handler);
2308 }
2309 }
2310
2311 redir = xmlNanoHTTPRedir(input->buf->context);
2312 if (redir != NULL) {
2313 if (input->filename != NULL)
2314 xmlFree((xmlChar *) input->filename);
2315 input->filename = xmlMemStrdup(redir);
2316 if (input->filename == NULL)
2317 return(XML_ERR_NO_MEMORY);
2318 }
2319
2320 return(XML_ERR_OK);
2321 }
2322 #endif /* LIBXML_HTTP_ENABLED */
2323
2324 /**
2325 * xmlCheckHTTPInput:
2326 * @ctxt: an XML parser context
2327 * @ret: an XML parser input
2328 *
2329 * DEPRECATED: Internal function, don't use.
2330 *
2331 * Check an input in case it was created from an HTTP stream, in that
2332 * case it will handle encoding and update of the base URL in case of
2333 * redirection. It also checks for HTTP errors in which case the input
2334 * is cleanly freed up and an appropriate error is raised in context
2335 *
2336 * Returns the input or NULL in case of HTTP error.
2337 */
2338 xmlParserInputPtr
xmlCheckHTTPInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr ret)2339 xmlCheckHTTPInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr ret) {
2340 /* Avoid unused variable warning if features are disabled. */
2341 (void) ctxt;
2342
2343 #ifdef LIBXML_HTTP_ENABLED
2344 {
2345 int code = xmlCheckHTTPInputInternal(ret);
2346
2347 if (code != XML_ERR_OK) {
2348 if (ret->filename != NULL)
2349 xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, ret->filename);
2350 else
2351 xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, "<null>");
2352 xmlFreeInputStream(ret);
2353 return(NULL);
2354 }
2355 }
2356 #endif
2357
2358 return(ret);
2359 }
2360
2361 /**
2362 * xmlNewInputFromUrl:
2363 * @filename: the filename to use as entity
2364 * @flags: XML_INPUT flags
2365 * @out: pointer to new parser input
2366 *
2367 * Create a new input stream based on a file or a URL.
2368 *
2369 * The flag XML_INPUT_UNZIP allows decompression.
2370 *
2371 * The flag XML_INPUT_NETWORK allows network access.
2372 *
2373 * The following resource loaders will be called if they were
2374 * registered (in order of precedence):
2375 *
2376 * - the per-thread xmlParserInputBufferCreateFilenameFunc set with
2377 * xmlParserInputBufferCreateFilenameDefault (deprecated)
2378 * - the default loader which will return
2379 * - the result from a matching global input callback set with
2380 * xmlRegisterInputCallbacks (deprecated)
2381 * - a HTTP resource if support is compiled in.
2382 * - a file opened from the filesystem, with automatic detection
2383 * of compressed files if support is compiled in.
2384 *
2385 * Available since 2.14.0.
2386 *
2387 * Returns an xmlParserErrors code.
2388 */
2389 int
xmlNewInputFromUrl(const char * filename,int flags,xmlParserInputPtr * out)2390 xmlNewInputFromUrl(const char *filename, int flags, xmlParserInputPtr *out) {
2391 xmlParserInputBufferPtr buf;
2392 xmlParserInputPtr input;
2393 int code = XML_ERR_OK;
2394
2395 if (out == NULL)
2396 return(XML_ERR_ARGUMENT);
2397 *out = NULL;
2398 if (filename == NULL)
2399 return(XML_ERR_ARGUMENT);
2400
2401 if (xmlParserInputBufferCreateFilenameValue != NULL) {
2402 buf = xmlParserInputBufferCreateFilenameValue(filename,
2403 XML_CHAR_ENCODING_NONE);
2404 if (buf == NULL)
2405 code = XML_IO_ENOENT;
2406 } else {
2407 code = xmlParserInputBufferCreateUrl(filename, XML_CHAR_ENCODING_NONE,
2408 flags, &buf);
2409 }
2410 if (code != XML_ERR_OK)
2411 return(code);
2412
2413 input = xmlNewInputInternal(buf, filename);
2414 if (input == NULL)
2415 return(XML_ERR_NO_MEMORY);
2416
2417 #ifdef LIBXML_HTTP_ENABLED
2418 code = xmlCheckHTTPInputInternal(input);
2419 if (code != XML_ERR_OK) {
2420 xmlFreeInputStream(input);
2421 return(code);
2422 }
2423 #endif
2424
2425 *out = input;
2426 return(XML_ERR_OK);
2427 }
2428
2429 /**
2430 * xmlNewInputFromFile:
2431 * @ctxt: an XML parser context
2432 * @filename: the filename to use as entity
2433 *
2434 * DEPRECATED: Use xmlNewInputFromUrl.
2435 *
2436 * Create a new input stream based on a file or an URL.
2437 *
2438 * Returns the new input stream or NULL in case of error
2439 */
2440 xmlParserInputPtr
xmlNewInputFromFile(xmlParserCtxtPtr ctxt,const char * filename)2441 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2442 xmlParserInputPtr input;
2443 int flags = 0;
2444 int code;
2445
2446 if ((ctxt == NULL) || (filename == NULL))
2447 return(NULL);
2448
2449 if ((ctxt->options & XML_PARSE_NO_UNZIP) == 0)
2450 flags |= XML_INPUT_UNZIP;
2451 if ((ctxt->options & XML_PARSE_NONET) == 0)
2452 flags |= XML_INPUT_NETWORK;
2453
2454 code = xmlNewInputFromUrl(filename, flags, &input);
2455 if (code != XML_ERR_OK) {
2456 xmlCtxtErrIO(ctxt, code, filename);
2457 return(NULL);
2458 }
2459
2460 return(input);
2461 }
2462
2463 /**
2464 * xmlDefaultExternalEntityLoader:
2465 * @URL: the URL for the entity to load
2466 * @ID: the System ID for the entity to load
2467 * @ctxt: the context in which the entity is called or NULL
2468 *
2469 * By default we don't load external entities, yet.
2470 *
2471 * Returns a new allocated xmlParserInputPtr, or NULL.
2472 */
2473 static xmlParserInputPtr
xmlDefaultExternalEntityLoader(const char * url,const char * ID,xmlParserCtxtPtr ctxt)2474 xmlDefaultExternalEntityLoader(const char *url, const char *ID,
2475 xmlParserCtxtPtr ctxt)
2476 {
2477 xmlParserInputPtr input = NULL;
2478 char *resource = NULL;
2479
2480 (void) ID;
2481
2482 if (url == NULL)
2483 return(NULL);
2484
2485 #ifdef LIBXML_CATALOG_ENABLED
2486 resource = (char *) xmlResolveResourceFromCatalog(url, ID, ctxt);
2487 if (resource != NULL)
2488 url = resource;
2489 #endif
2490
2491 if ((ctxt != NULL) &&
2492 (ctxt->options & XML_PARSE_NONET) &&
2493 (xmlStrncasecmp(BAD_CAST url, BAD_CAST "http://", 7) == 0)) {
2494 xmlCtxtErrIO(ctxt, XML_IO_NETWORK_ATTEMPT, url);
2495 } else {
2496 input = xmlNewInputFromFile(ctxt, url);
2497 }
2498
2499 if (resource != NULL)
2500 xmlFree(resource);
2501 return(input);
2502 }
2503
2504 /**
2505 * xmlNoNetExternalEntityLoader:
2506 * @URL: the URL for the entity to load
2507 * @ID: the System ID for the entity to load
2508 * @ctxt: the context in which the entity is called or NULL
2509 *
2510 * DEPRECATED: Use XML_PARSE_NONET.
2511 *
2512 * A specific entity loader disabling network accesses, though still
2513 * allowing local catalog accesses for resolution.
2514 *
2515 * Returns a new allocated xmlParserInputPtr, or NULL.
2516 */
2517 xmlParserInputPtr
xmlNoNetExternalEntityLoader(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2518 xmlNoNetExternalEntityLoader(const char *URL, const char *ID,
2519 xmlParserCtxtPtr ctxt) {
2520 int oldOptions = 0;
2521 xmlParserInputPtr input;
2522
2523 if (ctxt != NULL) {
2524 oldOptions = ctxt->options;
2525 ctxt->options |= XML_PARSE_NONET;
2526 }
2527
2528 input = xmlDefaultExternalEntityLoader(URL, ID, ctxt);
2529
2530 if (ctxt != NULL)
2531 ctxt->options = oldOptions;
2532
2533 return(input);
2534 }
2535
2536 /*
2537 * This global has to die eventually
2538 */
2539 static xmlExternalEntityLoader
2540 xmlCurrentExternalEntityLoader = xmlDefaultExternalEntityLoader;
2541
2542 /**
2543 * xmlSetExternalEntityLoader:
2544 * @f: the new entity resolver function
2545 *
2546 * DEPRECATED: This is a global setting and not thread-safe. Use
2547 * xmlCtxtSetResourceLoader or similar functions.
2548 *
2549 * Changes the default external entity resolver function for the
2550 * application.
2551 */
2552 void
xmlSetExternalEntityLoader(xmlExternalEntityLoader f)2553 xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
2554 xmlCurrentExternalEntityLoader = f;
2555 }
2556
2557 /**
2558 * xmlGetExternalEntityLoader:
2559 *
2560 * DEPRECATED: See xmlSetExternalEntityLoader.
2561 *
2562 * Get the default external entity resolver function for the application
2563 *
2564 * Returns the xmlExternalEntityLoader function pointer
2565 */
2566 xmlExternalEntityLoader
xmlGetExternalEntityLoader(void)2567 xmlGetExternalEntityLoader(void) {
2568 return(xmlCurrentExternalEntityLoader);
2569 }
2570
2571 /**
2572 * xmlCtxtSetResourceLoader:
2573 * @ctxt: parser context
2574 * @loader: callback
2575 * @vctxt: user data
2576 *
2577 * Installs a custom callback to load documents, DTDs or external
2578 * entities.
2579 *
2580 * Available since 2.14.0.
2581 */
2582 void
xmlCtxtSetResourceLoader(xmlParserCtxtPtr ctxt,xmlResourceLoader loader,void * vctxt)2583 xmlCtxtSetResourceLoader(xmlParserCtxtPtr ctxt, xmlResourceLoader loader,
2584 void *vctxt) {
2585 if (ctxt == NULL)
2586 return;
2587
2588 ctxt->resourceLoader = loader;
2589 ctxt->resourceCtxt = vctxt;
2590 }
2591
2592 /**
2593 * xmlLoadResource:
2594 * @ctxt: parser context
2595 * @url: the URL for the entity to load
2596 * @publicId: the Public ID for the entity to load
2597 * @type: resource type
2598 *
2599 * Returns the xmlParserInputPtr or NULL in case of error.
2600 */
2601 xmlParserInputPtr
xmlLoadResource(xmlParserCtxtPtr ctxt,const char * url,const char * publicId,xmlResourceType type)2602 xmlLoadResource(xmlParserCtxtPtr ctxt, const char *url, const char *publicId,
2603 xmlResourceType type) {
2604 char *canonicFilename;
2605 xmlParserInputPtr ret;
2606
2607 if (url == NULL)
2608 return(NULL);
2609
2610 if ((ctxt != NULL) && (ctxt->resourceLoader != NULL)) {
2611 char *resource = NULL;
2612 int flags = 0;
2613 int code;
2614
2615 #ifdef LIBXML_CATALOG_ENABLED
2616 resource = (char *) xmlResolveResourceFromCatalog(url, publicId, ctxt);
2617 if (resource != NULL)
2618 url = resource;
2619 #endif
2620
2621 if ((ctxt->options & XML_PARSE_NO_UNZIP) == 0)
2622 flags |= XML_INPUT_UNZIP;
2623 if ((ctxt->options & XML_PARSE_NONET) == 0)
2624 flags |= XML_INPUT_NETWORK;
2625
2626 code = ctxt->resourceLoader(ctxt->resourceCtxt, url, publicId, type,
2627 flags, &ret);
2628 if (code != XML_ERR_OK) {
2629 xmlCtxtErrIO(ctxt, code, url);
2630 ret = NULL;
2631 }
2632 if (resource != NULL)
2633 xmlFree(resource);
2634 return(ret);
2635 }
2636
2637 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) url);
2638 if (canonicFilename == NULL) {
2639 xmlCtxtErrMemory(ctxt);
2640 return(NULL);
2641 }
2642
2643 ret = xmlCurrentExternalEntityLoader(canonicFilename, publicId, ctxt);
2644 xmlFree(canonicFilename);
2645 return(ret);
2646 }
2647
2648 /**
2649 * xmlLoadExternalEntity:
2650 * @URL: the URL for the entity to load
2651 * @ID: the Public ID for the entity to load
2652 * @ctxt: the context in which the entity is called or NULL
2653 *
2654 * @URL is a filename or URL. If if contains the substring "://",
2655 * it is assumed to be a Legacy Extended IRI. Otherwise, it is
2656 * treated as a filesystem path.
2657 *
2658 * @ID is an optional XML public ID, typically from a doctype
2659 * declaration. It is used for catalog lookups.
2660 *
2661 * If catalog lookup is enabled (default is yes) and URL or ID are
2662 * found in system or local XML catalogs, URL is replaced with the
2663 * result. Then the following resource loaders will be called if
2664 * they were registered (in order of precedence):
2665 *
2666 * - the resource loader set with xmlCtxtSetResourceLoader
2667 * - the global external entity loader set with
2668 * xmlSetExternalEntityLoader (without catalog resolution,
2669 * deprecated)
2670 * - the per-thread xmlParserInputBufferCreateFilenameFunc set with
2671 * xmlParserInputBufferCreateFilenameDefault (deprecated)
2672 * - the default loader which will return
2673 * - the result from a matching global input callback set with
2674 * xmlRegisterInputCallbacks (deprecated)
2675 * - a HTTP resource if support is compiled in.
2676 * - a file opened from the filesystem, with automatic detection
2677 * of compressed files if support is compiled in.
2678 *
2679 * Returns the xmlParserInputPtr or NULL
2680 */
2681 xmlParserInputPtr
xmlLoadExternalEntity(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2682 xmlLoadExternalEntity(const char *URL, const char *ID,
2683 xmlParserCtxtPtr ctxt) {
2684 return(xmlLoadResource(ctxt, URL, ID, XML_RESOURCE_UNKNOWN));
2685 }
2686
2687 /************************************************************************
2688 * *
2689 * Commodity functions to handle parser contexts *
2690 * *
2691 ************************************************************************/
2692
2693 /**
2694 * xmlInitSAXParserCtxt:
2695 * @ctxt: XML parser context
2696 * @sax: SAX handlert
2697 * @userData: user data
2698 *
2699 * Initialize a SAX parser context
2700 *
2701 * Returns 0 in case of success and -1 in case of error
2702 */
2703
2704 static int
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt,const xmlSAXHandler * sax,void * userData)2705 xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
2706 void *userData)
2707 {
2708 xmlParserInputPtr input;
2709 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2710 size_t initialNodeTabSize = 1;
2711 #else
2712 size_t initialNodeTabSize = 10;
2713 #endif
2714
2715 if (ctxt == NULL)
2716 return(-1);
2717
2718 if (ctxt->dict == NULL)
2719 ctxt->dict = xmlDictCreate();
2720 if (ctxt->dict == NULL)
2721 return(-1);
2722
2723 if (ctxt->sax == NULL)
2724 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2725 if (ctxt->sax == NULL)
2726 return(-1);
2727 if (sax == NULL) {
2728 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2729 xmlSAXVersion(ctxt->sax, 2);
2730 ctxt->userData = ctxt;
2731 } else {
2732 if (sax->initialized == XML_SAX2_MAGIC) {
2733 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
2734 } else {
2735 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2736 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
2737 }
2738 ctxt->userData = userData ? userData : ctxt;
2739 }
2740
2741 ctxt->maxatts = 0;
2742 ctxt->atts = NULL;
2743 /* Allocate the Input stack */
2744 if (ctxt->inputTab == NULL) {
2745 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2746 size_t initialSize = 1;
2747 #else
2748 size_t initialSize = 5;
2749 #endif
2750
2751 ctxt->inputTab = xmlMalloc(initialSize * sizeof(xmlParserInputPtr));
2752 ctxt->inputMax = initialSize;
2753 }
2754 if (ctxt->inputTab == NULL)
2755 return(-1);
2756 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2757 xmlFreeInputStream(input);
2758 }
2759 ctxt->inputNr = 0;
2760 ctxt->input = NULL;
2761
2762 ctxt->version = NULL;
2763 ctxt->encoding = NULL;
2764 ctxt->standalone = -1;
2765 ctxt->hasExternalSubset = 0;
2766 ctxt->hasPErefs = 0;
2767 ctxt->html = 0;
2768 ctxt->instate = XML_PARSER_START;
2769
2770 /* Allocate the Node stack */
2771 if (ctxt->nodeTab == NULL) {
2772 ctxt->nodeTab = xmlMalloc(initialNodeTabSize * sizeof(xmlNodePtr));
2773 ctxt->nodeMax = initialNodeTabSize;
2774 }
2775 if (ctxt->nodeTab == NULL)
2776 return(-1);
2777 ctxt->nodeNr = 0;
2778 ctxt->node = NULL;
2779
2780 /* Allocate the Name stack */
2781 if (ctxt->nameTab == NULL) {
2782 ctxt->nameTab = xmlMalloc(initialNodeTabSize * sizeof(xmlChar *));
2783 ctxt->nameMax = initialNodeTabSize;
2784 }
2785 if (ctxt->nameTab == NULL)
2786 return(-1);
2787 ctxt->nameNr = 0;
2788 ctxt->name = NULL;
2789
2790 /* Allocate the space stack */
2791 if (ctxt->spaceTab == NULL) {
2792 ctxt->spaceTab = xmlMalloc(initialNodeTabSize * sizeof(int));
2793 ctxt->spaceMax = initialNodeTabSize;
2794 }
2795 if (ctxt->spaceTab == NULL)
2796 return(-1);
2797 ctxt->spaceNr = 1;
2798 ctxt->spaceTab[0] = -1;
2799 ctxt->space = &ctxt->spaceTab[0];
2800 ctxt->myDoc = NULL;
2801 ctxt->wellFormed = 1;
2802 ctxt->nsWellFormed = 1;
2803 ctxt->valid = 1;
2804
2805 ctxt->options = XML_PARSE_NODICT;
2806
2807 /*
2808 * Initialize some parser options from deprecated global variables.
2809 * Note that the "modern" API taking options arguments or
2810 * xmlCtxtSetOptions will ignore these defaults. They're only
2811 * relevant if old API functions like xmlParseFile are used.
2812 */
2813 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2814 if (ctxt->loadsubset) {
2815 ctxt->options |= XML_PARSE_DTDLOAD;
2816 }
2817 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2818 if (ctxt->validate) {
2819 ctxt->options |= XML_PARSE_DTDVALID;
2820 }
2821 ctxt->pedantic = xmlPedanticParserDefaultValue;
2822 if (ctxt->pedantic) {
2823 ctxt->options |= XML_PARSE_PEDANTIC;
2824 }
2825 ctxt->linenumbers = xmlLineNumbersDefaultValue;
2826 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2827 if (ctxt->keepBlanks == 0) {
2828 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
2829 ctxt->options |= XML_PARSE_NOBLANKS;
2830 }
2831 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2832 if (ctxt->replaceEntities) {
2833 ctxt->options |= XML_PARSE_NOENT;
2834 }
2835 if (xmlGetWarningsDefaultValue == 0)
2836 ctxt->options |= XML_PARSE_NOWARNING;
2837
2838 ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
2839 ctxt->vctxt.userData = ctxt;
2840 ctxt->vctxt.error = xmlParserValidityError;
2841 ctxt->vctxt.warning = xmlParserValidityWarning;
2842
2843 ctxt->record_info = 0;
2844 ctxt->checkIndex = 0;
2845 ctxt->inSubset = 0;
2846 ctxt->errNo = XML_ERR_OK;
2847 ctxt->depth = 0;
2848 ctxt->catalogs = NULL;
2849 ctxt->sizeentities = 0;
2850 ctxt->sizeentcopy = 0;
2851 ctxt->input_id = 1;
2852 ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
2853 xmlInitNodeInfoSeq(&ctxt->node_seq);
2854
2855 if (ctxt->nsdb == NULL) {
2856 ctxt->nsdb = xmlParserNsCreate();
2857 if (ctxt->nsdb == NULL)
2858 return(-1);
2859 }
2860
2861 return(0);
2862 }
2863
2864 /**
2865 * xmlInitParserCtxt:
2866 * @ctxt: an XML parser context
2867 *
2868 * DEPRECATED: Internal function which will be made private in a future
2869 * version.
2870 *
2871 * Initialize a parser context
2872 *
2873 * Returns 0 in case of success and -1 in case of error
2874 */
2875
2876 int
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)2877 xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2878 {
2879 return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
2880 }
2881
2882 /**
2883 * xmlFreeParserCtxt:
2884 * @ctxt: an XML parser context
2885 *
2886 * Free all the memory used by a parser context. However the parsed
2887 * document in ctxt->myDoc is not freed.
2888 */
2889
2890 void
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)2891 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2892 {
2893 xmlParserInputPtr input;
2894
2895 if (ctxt == NULL) return;
2896
2897 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2898 xmlFreeInputStream(input);
2899 }
2900 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2901 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2902 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2903 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2904 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2905 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2906 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2907 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2908 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2909 #ifdef LIBXML_SAX1_ENABLED
2910 if ((ctxt->sax != NULL) &&
2911 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2912 #else
2913 if (ctxt->sax != NULL)
2914 #endif /* LIBXML_SAX1_ENABLED */
2915 xmlFree(ctxt->sax);
2916 if (ctxt->directory != NULL) xmlFree(ctxt->directory);
2917 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2918 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2919 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2920 if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
2921 if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
2922 if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
2923 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2924 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2925 if (ctxt->attsDefault != NULL)
2926 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2927 if (ctxt->attsSpecial != NULL)
2928 xmlHashFree(ctxt->attsSpecial, NULL);
2929 if (ctxt->freeElems != NULL) {
2930 xmlNodePtr cur, next;
2931
2932 cur = ctxt->freeElems;
2933 while (cur != NULL) {
2934 next = cur->next;
2935 xmlFree(cur);
2936 cur = next;
2937 }
2938 }
2939 if (ctxt->freeAttrs != NULL) {
2940 xmlAttrPtr cur, next;
2941
2942 cur = ctxt->freeAttrs;
2943 while (cur != NULL) {
2944 next = cur->next;
2945 xmlFree(cur);
2946 cur = next;
2947 }
2948 }
2949 /*
2950 * cleanup the error strings
2951 */
2952 if (ctxt->lastError.message != NULL)
2953 xmlFree(ctxt->lastError.message);
2954 if (ctxt->lastError.file != NULL)
2955 xmlFree(ctxt->lastError.file);
2956 if (ctxt->lastError.str1 != NULL)
2957 xmlFree(ctxt->lastError.str1);
2958 if (ctxt->lastError.str2 != NULL)
2959 xmlFree(ctxt->lastError.str2);
2960 if (ctxt->lastError.str3 != NULL)
2961 xmlFree(ctxt->lastError.str3);
2962
2963 #ifdef LIBXML_CATALOG_ENABLED
2964 if (ctxt->catalogs != NULL)
2965 xmlCatalogFreeLocal(ctxt->catalogs);
2966 #endif
2967 xmlFree(ctxt);
2968 }
2969
2970 /**
2971 * xmlNewParserCtxt:
2972 *
2973 * Allocate and initialize a new parser context.
2974 *
2975 * Returns the xmlParserCtxtPtr or NULL
2976 */
2977
2978 xmlParserCtxtPtr
xmlNewParserCtxt(void)2979 xmlNewParserCtxt(void)
2980 {
2981 return(xmlNewSAXParserCtxt(NULL, NULL));
2982 }
2983
2984 /**
2985 * xmlNewSAXParserCtxt:
2986 * @sax: SAX handler
2987 * @userData: user data
2988 *
2989 * Allocate and initialize a new SAX parser context. If userData is NULL,
2990 * the parser context will be passed as user data.
2991 *
2992 * Available since 2.11.0. If you want support older versions,
2993 * it's best to invoke xmlNewParserCtxt and set ctxt->sax with
2994 * struct assignment.
2995 *
2996 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
2997 */
2998
2999 xmlParserCtxtPtr
xmlNewSAXParserCtxt(const xmlSAXHandler * sax,void * userData)3000 xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
3001 {
3002 xmlParserCtxtPtr ctxt;
3003
3004 xmlInitParser();
3005
3006 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
3007 if (ctxt == NULL)
3008 return(NULL);
3009 memset(ctxt, 0, sizeof(xmlParserCtxt));
3010 if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
3011 xmlFreeParserCtxt(ctxt);
3012 return(NULL);
3013 }
3014 return(ctxt);
3015 }
3016
3017 /**
3018 * xmlCtxtGetPrivate:
3019 * ctxt: parser context
3020 *
3021 * Available since 2.14.0.
3022 *
3023 * Returns the private application data.
3024 */
3025 void *
xmlCtxtGetPrivate(xmlParserCtxtPtr ctxt)3026 xmlCtxtGetPrivate(xmlParserCtxtPtr ctxt) {
3027 if (ctxt == NULL)
3028 return(NULL);
3029
3030 return(ctxt->_private);
3031 }
3032
3033 /**
3034 * xmlCtxtSetPrivate:
3035 * ctxt: parser context
3036 * priv: private application data
3037 *
3038 * Available since 2.14.0.
3039 *
3040 * Set the private application data.
3041 */
3042 void
xmlCtxtSetPrivate(xmlParserCtxtPtr ctxt,void * priv)3043 xmlCtxtSetPrivate(xmlParserCtxtPtr ctxt, void *priv) {
3044 if (ctxt == NULL)
3045 return;
3046
3047 ctxt->_private = priv;
3048 }
3049
3050 /**
3051 * xmlCtxtGetCatalogs:
3052 * ctxt: parser context
3053 *
3054 * Available since 2.14.0.
3055 *
3056 * Returns the local catalogs.
3057 */
3058 void *
xmlCtxtGetCatalogs(xmlParserCtxtPtr ctxt)3059 xmlCtxtGetCatalogs(xmlParserCtxtPtr ctxt) {
3060 if (ctxt == NULL)
3061 return(NULL);
3062
3063 return(ctxt->catalogs);
3064 }
3065
3066 /**
3067 * xmlCtxtSetCatalogs:
3068 * ctxt: parser context
3069 * catalogs: catalogs pointer
3070 *
3071 * Available since 2.14.0.
3072 *
3073 * Set the local catalogs.
3074 */
3075 void
xmlCtxtSetCatalogs(xmlParserCtxtPtr ctxt,void * catalogs)3076 xmlCtxtSetCatalogs(xmlParserCtxtPtr ctxt, void *catalogs) {
3077 if (ctxt == NULL)
3078 return;
3079
3080 ctxt->catalogs = catalogs;
3081 }
3082
3083 /**
3084 * xmlCtxtGetDict:
3085 * ctxt: parser context
3086 *
3087 * Available since 2.14.0.
3088 *
3089 * Returns the dictionary.
3090 */
3091 xmlDictPtr
xmlCtxtGetDict(xmlParserCtxtPtr ctxt)3092 xmlCtxtGetDict(xmlParserCtxtPtr ctxt) {
3093 if (ctxt == NULL)
3094 return(NULL);
3095
3096 return(ctxt->dict);
3097 }
3098
3099 /**
3100 * xmlCtxtSetDict:
3101 * ctxt: parser context
3102 * dict: dictionary
3103 *
3104 * Available since 2.14.0.
3105 *
3106 * Set the dictionary. This should only be done immediately after
3107 * creating a parser context.
3108 */
3109 void
xmlCtxtSetDict(xmlParserCtxtPtr ctxt,xmlDictPtr dict)3110 xmlCtxtSetDict(xmlParserCtxtPtr ctxt, xmlDictPtr dict) {
3111 if (ctxt == NULL)
3112 return;
3113
3114 if (ctxt->dict != NULL)
3115 xmlDictFree(ctxt->dict);
3116
3117 xmlDictReference(dict);
3118 ctxt->dict = dict;
3119 }
3120
3121 /************************************************************************
3122 * *
3123 * Handling of node information *
3124 * *
3125 ************************************************************************/
3126
3127 /**
3128 * xmlClearParserCtxt:
3129 * @ctxt: an XML parser context
3130 *
3131 * Clear (release owned resources) and reinitialize a parser context
3132 */
3133
3134 void
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)3135 xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
3136 {
3137 if (ctxt==NULL)
3138 return;
3139 xmlClearNodeInfoSeq(&ctxt->node_seq);
3140 xmlCtxtReset(ctxt);
3141 }
3142
3143
3144 /**
3145 * xmlParserFindNodeInfo:
3146 * @ctx: an XML parser context
3147 * @node: an XML node within the tree
3148 *
3149 * DEPRECATED: Don't use.
3150 *
3151 * Find the parser node info struct for a given node
3152 *
3153 * Returns an xmlParserNodeInfo block pointer or NULL
3154 */
3155 const xmlParserNodeInfo *
xmlParserFindNodeInfo(xmlParserCtxtPtr ctx,xmlNodePtr node)3156 xmlParserFindNodeInfo(xmlParserCtxtPtr ctx, xmlNodePtr node)
3157 {
3158 unsigned long pos;
3159
3160 if ((ctx == NULL) || (node == NULL))
3161 return (NULL);
3162 /* Find position where node should be at */
3163 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
3164 if (pos < ctx->node_seq.length
3165 && ctx->node_seq.buffer[pos].node == node)
3166 return &ctx->node_seq.buffer[pos];
3167 else
3168 return NULL;
3169 }
3170
3171
3172 /**
3173 * xmlInitNodeInfoSeq:
3174 * @seq: a node info sequence pointer
3175 *
3176 * DEPRECATED: Don't use.
3177 *
3178 * -- Initialize (set to initial state) node info sequence
3179 */
3180 void
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)3181 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3182 {
3183 if (seq == NULL)
3184 return;
3185 seq->length = 0;
3186 seq->maximum = 0;
3187 seq->buffer = NULL;
3188 }
3189
3190 /**
3191 * xmlClearNodeInfoSeq:
3192 * @seq: a node info sequence pointer
3193 *
3194 * DEPRECATED: Don't use.
3195 *
3196 * -- Clear (release memory and reinitialize) node
3197 * info sequence
3198 */
3199 void
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)3200 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3201 {
3202 if (seq == NULL)
3203 return;
3204 if (seq->buffer != NULL)
3205 xmlFree(seq->buffer);
3206 xmlInitNodeInfoSeq(seq);
3207 }
3208
3209 /**
3210 * xmlParserFindNodeInfoIndex:
3211 * @seq: a node info sequence pointer
3212 * @node: an XML node pointer
3213 *
3214 * DEPRECATED: Don't use.
3215 *
3216 * xmlParserFindNodeInfoIndex : Find the index that the info record for
3217 * the given node is or should be at in a sorted sequence
3218 *
3219 * Returns a long indicating the position of the record
3220 */
3221 unsigned long
xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,xmlNodePtr node)3222 xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,
3223 xmlNodePtr node)
3224 {
3225 unsigned long upper, lower, middle;
3226 int found = 0;
3227
3228 if ((seq == NULL) || (node == NULL))
3229 return ((unsigned long) -1);
3230
3231 /* Do a binary search for the key */
3232 lower = 1;
3233 upper = seq->length;
3234 middle = 0;
3235 while (lower <= upper && !found) {
3236 middle = lower + (upper - lower) / 2;
3237 if (node == seq->buffer[middle - 1].node)
3238 found = 1;
3239 else if (node < seq->buffer[middle - 1].node)
3240 upper = middle - 1;
3241 else
3242 lower = middle + 1;
3243 }
3244
3245 /* Return position */
3246 if (middle == 0 || seq->buffer[middle - 1].node < node)
3247 return middle;
3248 else
3249 return middle - 1;
3250 }
3251
3252
3253 /**
3254 * xmlParserAddNodeInfo:
3255 * @ctxt: an XML parser context
3256 * @info: a node info sequence pointer
3257 *
3258 * DEPRECATED: Don't use.
3259 *
3260 * Insert node info record into the sorted sequence
3261 */
3262 void
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,xmlParserNodeInfoPtr info)3263 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
3264 xmlParserNodeInfoPtr info)
3265 {
3266 unsigned long pos;
3267
3268 if ((ctxt == NULL) || (info == NULL)) return;
3269
3270 /* Find pos and check to see if node is already in the sequence */
3271 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
3272 info->node);
3273
3274 if ((pos < ctxt->node_seq.length) &&
3275 (ctxt->node_seq.buffer != NULL) &&
3276 (ctxt->node_seq.buffer[pos].node == info->node)) {
3277 ctxt->node_seq.buffer[pos] = *info;
3278 }
3279
3280 /* Otherwise, we need to add new node to buffer */
3281 else {
3282 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
3283 (ctxt->node_seq.buffer == NULL)) {
3284 xmlParserNodeInfo *tmp_buffer;
3285 unsigned int byte_size;
3286
3287 if (ctxt->node_seq.maximum == 0)
3288 ctxt->node_seq.maximum = 2;
3289 byte_size = (sizeof(*ctxt->node_seq.buffer) *
3290 (2 * ctxt->node_seq.maximum));
3291
3292 if (ctxt->node_seq.buffer == NULL)
3293 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
3294 else
3295 tmp_buffer =
3296 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
3297 byte_size);
3298
3299 if (tmp_buffer == NULL) {
3300 xmlCtxtErrMemory(ctxt);
3301 return;
3302 }
3303 ctxt->node_seq.buffer = tmp_buffer;
3304 ctxt->node_seq.maximum *= 2;
3305 }
3306
3307 /* If position is not at end, move elements out of the way */
3308 if (pos != ctxt->node_seq.length) {
3309 unsigned long i;
3310
3311 for (i = ctxt->node_seq.length; i > pos; i--)
3312 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
3313 }
3314
3315 /* Copy element and increase length */
3316 ctxt->node_seq.buffer[pos] = *info;
3317 ctxt->node_seq.length++;
3318 }
3319 }
3320
3321 /************************************************************************
3322 * *
3323 * Defaults settings *
3324 * *
3325 ************************************************************************/
3326 /**
3327 * xmlPedanticParserDefault:
3328 * @val: int 0 or 1
3329 *
3330 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
3331 *
3332 * Set and return the previous value for enabling pedantic warnings.
3333 *
3334 * Returns the last value for 0 for no substitution, 1 for substitution.
3335 */
3336
3337 int
xmlPedanticParserDefault(int val)3338 xmlPedanticParserDefault(int val) {
3339 int old = xmlPedanticParserDefaultValue;
3340
3341 xmlPedanticParserDefaultValue = val;
3342 return(old);
3343 }
3344
3345 /**
3346 * xmlLineNumbersDefault:
3347 * @val: int 0 or 1
3348 *
3349 * DEPRECATED: The modern options API always enables line numbers.
3350 *
3351 * Set and return the previous value for enabling line numbers in elements
3352 * contents. This may break on old application and is turned off by default.
3353 *
3354 * Returns the last value for 0 for no substitution, 1 for substitution.
3355 */
3356
3357 int
xmlLineNumbersDefault(int val)3358 xmlLineNumbersDefault(int val) {
3359 int old = xmlLineNumbersDefaultValue;
3360
3361 xmlLineNumbersDefaultValue = val;
3362 return(old);
3363 }
3364
3365 /**
3366 * xmlSubstituteEntitiesDefault:
3367 * @val: int 0 or 1
3368 *
3369 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
3370 *
3371 * Set and return the previous value for default entity support.
3372 * Initially the parser always keep entity references instead of substituting
3373 * entity values in the output. This function has to be used to change the
3374 * default parser behavior
3375 * SAX::substituteEntities() has to be used for changing that on a file by
3376 * file basis.
3377 *
3378 * Returns the last value for 0 for no substitution, 1 for substitution.
3379 */
3380
3381 int
xmlSubstituteEntitiesDefault(int val)3382 xmlSubstituteEntitiesDefault(int val) {
3383 int old = xmlSubstituteEntitiesDefaultValue;
3384
3385 xmlSubstituteEntitiesDefaultValue = val;
3386 return(old);
3387 }
3388
3389 /**
3390 * xmlKeepBlanksDefault:
3391 * @val: int 0 or 1
3392 *
3393 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
3394 *
3395 * Set and return the previous value for default blanks text nodes support.
3396 * The 1.x version of the parser used an heuristic to try to detect
3397 * ignorable white spaces. As a result the SAX callback was generating
3398 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
3399 * using the DOM output text nodes containing those blanks were not generated.
3400 * The 2.x and later version will switch to the XML standard way and
3401 * ignorableWhitespace() are only generated when running the parser in
3402 * validating mode and when the current element doesn't allow CDATA or
3403 * mixed content.
3404 * This function is provided as a way to force the standard behavior
3405 * on 1.X libs and to switch back to the old mode for compatibility when
3406 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
3407 * by using xmlIsBlankNode() commodity function to detect the "empty"
3408 * nodes generated.
3409 * This value also affect autogeneration of indentation when saving code
3410 * if blanks sections are kept, indentation is not generated.
3411 *
3412 * Returns the last value for 0 for no substitution, 1 for substitution.
3413 */
3414
3415 int
xmlKeepBlanksDefault(int val)3416 xmlKeepBlanksDefault(int val) {
3417 int old = xmlKeepBlanksDefaultValue;
3418
3419 xmlKeepBlanksDefaultValue = val;
3420 #ifdef LIBXML_OUTPUT_ENABLED
3421 if (!val)
3422 xmlIndentTreeOutput = 1;
3423 #endif
3424 return(old);
3425 }
3426
3427