1 /*
2 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * [email protected]
8 */
9
10 #define IN_LIBXML
11 #include "libxml.h"
12
13 #if defined(_WIN32)
14 #define XML_DIR_SEP '\\'
15 #else
16 #define XML_DIR_SEP '/'
17 #endif
18
19 #include <string.h>
20 #include <ctype.h>
21 #include <stdlib.h>
22
23 #include <libxml/xmlmemory.h>
24 #include <libxml/tree.h>
25 #include <libxml/parser.h>
26 #include <libxml/parserInternals.h>
27 #include <libxml/entities.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/encoding.h>
30 #include <libxml/xmlIO.h>
31 #include <libxml/uri.h>
32 #include <libxml/dict.h>
33 #include <libxml/xmlsave.h>
34 #ifdef LIBXML_CATALOG_ENABLED
35 #include <libxml/catalog.h>
36 #endif
37 #include <libxml/chvalid.h>
38 #include <libxml/nanohttp.h>
39
40 #define CUR(ctxt) ctxt->input->cur
41 #define END(ctxt) ctxt->input->end
42
43 #include "private/buf.h"
44 #include "private/enc.h"
45 #include "private/error.h"
46 #include "private/io.h"
47 #include "private/parser.h"
48
49 #define XML_MAX_ERRORS 100
50
51 /*
52 * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
53 * factor of serialized output after entity expansion.
54 */
55 #define XML_MAX_AMPLIFICATION_DEFAULT 5
56
57 /*
58 * Various global defaults for parsing
59 */
60
61 /**
62 * xmlCheckVersion:
63 * @version: the include version number
64 *
65 * check the compiled lib version against the include one.
66 */
67 void
xmlCheckVersion(int version)68 xmlCheckVersion(int version) {
69 int myversion = LIBXML_VERSION;
70
71 xmlInitParser();
72
73 if ((myversion / 10000) != (version / 10000)) {
74 fprintf(stderr,
75 "Fatal: program compiled against libxml %d using libxml %d\n",
76 (version / 10000), (myversion / 10000));
77 } else if ((myversion / 100) < (version / 100)) {
78 fprintf(stderr,
79 "Warning: program compiled against libxml %d using older %d\n",
80 (version / 100), (myversion / 100));
81 }
82 }
83
84
85 /************************************************************************
86 * *
87 * Some factorized error routines *
88 * *
89 ************************************************************************/
90
91
92 /**
93 * xmlCtxtSetErrorHandler:
94 * @ctxt: an XML parser context
95 * @handler: error handler
96 * @data: data for error handler
97 *
98 * Register a callback function that will be called on errors and
99 * warnings. If handler is NULL, the error handler will be deactivated.
100 *
101 * This is the recommended way to collect errors from the parser and
102 * takes precedence over all other error reporting mechanisms.
103 * These are (in order of precedence):
104 *
105 * - per-context structured handler (xmlCtxtSetErrorHandler)
106 * - per-context structured "serror" SAX handler
107 * - global structured handler (xmlSetStructuredErrorFunc)
108 * - per-context generic "error" and "warning" SAX handlers
109 * - global generic handler (xmlSetGenericErrorFunc)
110 * - print to stderr
111 *
112 * Available since 2.13.0.
113 */
114 void
xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt,xmlStructuredErrorFunc handler,void * data)115 xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt, xmlStructuredErrorFunc handler,
116 void *data)
117 {
118 if (ctxt == NULL)
119 return;
120 ctxt->errorHandler = handler;
121 ctxt->errorCtxt = data;
122 }
123
124 /**
125 * xmlCtxtErrMemory:
126 * @ctxt: an XML parser context
127 * @domain: domain
128 *
129 * Handle an out-of-memory error
130 */
131 void
xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)132 xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)
133 {
134 xmlStructuredErrorFunc schannel = NULL;
135 xmlGenericErrorFunc channel = NULL;
136 void *data;
137
138 ctxt->errNo = XML_ERR_NO_MEMORY;
139 ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
140 ctxt->wellFormed = 0;
141 ctxt->disableSAX = 2;
142
143 if (ctxt->errorHandler) {
144 schannel = ctxt->errorHandler;
145 data = ctxt->errorCtxt;
146 } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
147 (ctxt->sax->serror != NULL)) {
148 schannel = ctxt->sax->serror;
149 data = ctxt->userData;
150 } else {
151 channel = ctxt->sax->error;
152 data = ctxt->userData;
153 }
154
155 xmlRaiseMemoryError(schannel, channel, data, XML_FROM_PARSER,
156 &ctxt->lastError);
157 }
158
159 /**
160 * xmlCtxtErrIO:
161 * @ctxt: parser context
162 * @code: xmlParserErrors code
163 * @uri: filename or URI (optional)
164 *
165 * If filename is empty, use the one from context input if available.
166 *
167 * Report an IO error to the parser context.
168 */
169 void
xmlCtxtErrIO(xmlParserCtxtPtr ctxt,int code,const char * uri)170 xmlCtxtErrIO(xmlParserCtxtPtr ctxt, int code, const char *uri)
171 {
172 const char *errstr, *msg, *str1, *str2;
173 xmlErrorLevel level;
174
175 if (ctxt == NULL)
176 return;
177
178 /*
179 * Don't report a well-formedness error if an external entity could
180 * not be found. We assume that inputNr is zero for the document
181 * entity which is somewhat fragile.
182 */
183 if ((ctxt->inputNr > 0) &&
184 ((code == XML_IO_ENOENT) ||
185 (code == XML_IO_NETWORK_ATTEMPT) ||
186 (code == XML_IO_UNKNOWN))) {
187 if (ctxt->validate == 0)
188 level = XML_ERR_WARNING;
189 else
190 level = XML_ERR_ERROR;
191 } else {
192 level = XML_ERR_FATAL;
193 }
194
195 errstr = xmlErrString(code);
196
197 if (uri == NULL) {
198 msg = "%s\n";
199 str1 = errstr;
200 str2 = NULL;
201 } else {
202 msg = "failed to load \"%s\": %s\n";
203 str1 = uri;
204 str2 = errstr;
205 }
206
207 xmlCtxtErr(ctxt, NULL, XML_FROM_IO, code, level,
208 (const xmlChar *) uri, NULL, NULL, 0,
209 msg, str1, str2);
210 }
211
212 void
xmlCtxtVErr(xmlParserCtxtPtr ctxt,xmlNodePtr node,xmlErrorDomain domain,xmlParserErrors code,xmlErrorLevel level,const xmlChar * str1,const xmlChar * str2,const xmlChar * str3,int int1,const char * msg,va_list ap)213 xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
214 xmlParserErrors code, xmlErrorLevel level,
215 const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
216 int int1, const char *msg, va_list ap)
217 {
218 xmlStructuredErrorFunc schannel = NULL;
219 xmlGenericErrorFunc channel = NULL;
220 void *data = NULL;
221 const char *file = NULL;
222 int line = 0;
223 int col = 0;
224 int res;
225
226 if (code == XML_ERR_NO_MEMORY) {
227 xmlCtxtErrMemory(ctxt);
228 return;
229 }
230
231 if (PARSER_STOPPED(ctxt))
232 return;
233
234 if (level == XML_ERR_WARNING) {
235 if (ctxt->nbWarnings >= XML_MAX_ERRORS)
236 return;
237 ctxt->nbWarnings += 1;
238 } else {
239 if (ctxt->nbErrors >= XML_MAX_ERRORS)
240 return;
241 ctxt->nbErrors += 1;
242 }
243
244 if (((ctxt->options & XML_PARSE_NOERROR) == 0) &&
245 ((level != XML_ERR_WARNING) ||
246 ((ctxt->options & XML_PARSE_NOWARNING) == 0))) {
247 if (ctxt->errorHandler) {
248 schannel = ctxt->errorHandler;
249 data = ctxt->errorCtxt;
250 } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
251 (ctxt->sax->serror != NULL)) {
252 schannel = ctxt->sax->serror;
253 data = ctxt->userData;
254 } else if ((domain == XML_FROM_VALID) || (domain == XML_FROM_DTD)) {
255 if (level == XML_ERR_WARNING)
256 channel = ctxt->vctxt.warning;
257 else
258 channel = ctxt->vctxt.error;
259 data = ctxt->vctxt.userData;
260 } else {
261 if (level == XML_ERR_WARNING)
262 channel = ctxt->sax->warning;
263 else
264 channel = ctxt->sax->error;
265 data = ctxt->userData;
266 }
267 }
268
269 if (ctxt->input != NULL) {
270 xmlParserInputPtr input = ctxt->input;
271
272 if ((input->filename == NULL) &&
273 (ctxt->inputNr > 1)) {
274 input = ctxt->inputTab[ctxt->inputNr - 2];
275 }
276 file = input->filename;
277 line = input->line;
278 col = input->col;
279 }
280
281 res = xmlVRaiseError(schannel, channel, data, ctxt, node, domain, code,
282 level, file, line, (const char *) str1,
283 (const char *) str2, (const char *) str3, int1, col,
284 msg, ap);
285
286 if (res < 0) {
287 xmlCtxtErrMemory(ctxt);
288 return;
289 }
290
291 if (level >= XML_ERR_ERROR)
292 ctxt->errNo = code;
293 if (level == XML_ERR_FATAL) {
294 ctxt->wellFormed = 0;
295 if (ctxt->recovery == 0)
296 ctxt->disableSAX = 1;
297 }
298
299 return;
300 }
301
302 void
xmlCtxtErr(xmlParserCtxtPtr ctxt,xmlNodePtr node,xmlErrorDomain domain,xmlParserErrors code,xmlErrorLevel level,const xmlChar * str1,const xmlChar * str2,const xmlChar * str3,int int1,const char * msg,...)303 xmlCtxtErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
304 xmlParserErrors code, xmlErrorLevel level,
305 const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
306 int int1, const char *msg, ...)
307 {
308 va_list ap;
309
310 va_start(ap, msg);
311 xmlCtxtVErr(ctxt, node, domain, code, level,
312 str1, str2, str3, int1, msg, ap);
313 va_end(ap);
314 }
315
316 /**
317 * xmlFatalErr:
318 * @ctxt: an XML parser context
319 * @error: the error number
320 * @info: extra information string
321 *
322 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
323 */
324 void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors code,const char * info)325 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors code, const char *info)
326 {
327 const char *errmsg;
328 xmlErrorLevel level;
329
330 if (code == XML_ERR_UNSUPPORTED_ENCODING)
331 level = XML_ERR_WARNING;
332 else
333 level = XML_ERR_FATAL;
334
335 errmsg = xmlErrString(code);
336
337 if (info == NULL) {
338 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, level,
339 NULL, NULL, NULL, 0, "%s\n", errmsg);
340 } else {
341 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, level,
342 (const xmlChar *) info, NULL, NULL, 0,
343 "%s: %s\n", errmsg, info);
344 }
345 }
346
347 /**
348 * xmlIsLetter:
349 * @c: an unicode character (int)
350 *
351 * Check whether the character is allowed by the production
352 * [84] Letter ::= BaseChar | Ideographic
353 *
354 * Returns 0 if not, non-zero otherwise
355 */
356 int
xmlIsLetter(int c)357 xmlIsLetter(int c) {
358 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
359 }
360
361 /************************************************************************
362 * *
363 * Input handling functions for progressive parsing *
364 * *
365 ************************************************************************/
366
367 /* we need to keep enough input to show errors in context */
368 #define LINE_LEN 80
369
370 /**
371 * xmlHaltParser:
372 * @ctxt: an XML parser context
373 *
374 * Blocks further parser processing don't override error
375 * for internal use
376 */
377 void
xmlHaltParser(xmlParserCtxtPtr ctxt)378 xmlHaltParser(xmlParserCtxtPtr ctxt) {
379 if (ctxt == NULL)
380 return;
381 ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
382 ctxt->disableSAX = 2;
383 }
384
385 /**
386 * xmlParserInputRead:
387 * @in: an XML parser input
388 * @len: an indicative size for the lookahead
389 *
390 * DEPRECATED: This function was internal and is deprecated.
391 *
392 * Returns -1 as this is an error to use it.
393 */
394 int
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED,int len ATTRIBUTE_UNUSED)395 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
396 return(-1);
397 }
398
399 /**
400 * xmlParserGrow:
401 * @ctxt: an XML parser context
402 *
403 * Grow the input buffer.
404 *
405 * Returns the number of bytes read or -1 in case of error.
406 */
407 int
xmlParserGrow(xmlParserCtxtPtr ctxt)408 xmlParserGrow(xmlParserCtxtPtr ctxt) {
409 xmlParserInputPtr in = ctxt->input;
410 xmlParserInputBufferPtr buf = in->buf;
411 size_t curEnd = in->end - in->cur;
412 size_t curBase = in->cur - in->base;
413 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
414 XML_MAX_HUGE_LENGTH :
415 XML_MAX_LOOKUP_LIMIT;
416 int ret;
417
418 if (buf == NULL)
419 return(0);
420 /* Don't grow push parser buffer. */
421 if (PARSER_PROGRESSIVE(ctxt))
422 return(0);
423 /* Don't grow memory buffers. */
424 if ((buf->encoder == NULL) && (buf->readcallback == NULL))
425 return(0);
426 if (buf->error != 0)
427 return(-1);
428
429 if (curBase > maxLength) {
430 xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
431 "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
432 xmlHaltParser(ctxt);
433 return(-1);
434 }
435
436 if (curEnd >= INPUT_CHUNK)
437 return(0);
438
439 ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
440 xmlBufUpdateInput(buf->buffer, in, curBase);
441
442 if (ret < 0) {
443 xmlCtxtErrIO(ctxt, buf->error, NULL);
444 }
445
446 return(ret);
447 }
448
449 /**
450 * xmlParserInputGrow:
451 * @in: an XML parser input
452 * @len: an indicative size for the lookahead
453 *
454 * DEPRECATED: Don't use.
455 *
456 * This function increase the input for the parser. It tries to
457 * preserve pointers to the input buffer, and keep already read data
458 *
459 * Returns the amount of char read, or -1 in case of error, 0 indicate the
460 * end of this entity
461 */
462 int
xmlParserInputGrow(xmlParserInputPtr in,int len)463 xmlParserInputGrow(xmlParserInputPtr in, int len) {
464 int ret;
465 size_t indx;
466
467 if ((in == NULL) || (len < 0)) return(-1);
468 if (in->buf == NULL) return(-1);
469 if (in->base == NULL) return(-1);
470 if (in->cur == NULL) return(-1);
471 if (in->buf->buffer == NULL) return(-1);
472
473 /* Don't grow memory buffers. */
474 if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
475 return(0);
476
477 indx = in->cur - in->base;
478 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
479 return(0);
480 }
481 ret = xmlParserInputBufferGrow(in->buf, len);
482
483 in->base = xmlBufContent(in->buf->buffer);
484 if (in->base == NULL) {
485 in->base = BAD_CAST "";
486 in->cur = in->base;
487 in->end = in->base;
488 return(-1);
489 }
490 in->cur = in->base + indx;
491 in->end = xmlBufEnd(in->buf->buffer);
492
493 return(ret);
494 }
495
496 /**
497 * xmlParserShrink:
498 * @ctxt: an XML parser context
499 *
500 * Shrink the input buffer.
501 */
502 void
xmlParserShrink(xmlParserCtxtPtr ctxt)503 xmlParserShrink(xmlParserCtxtPtr ctxt) {
504 xmlParserInputPtr in = ctxt->input;
505 xmlParserInputBufferPtr buf = in->buf;
506 size_t used;
507
508 if (buf == NULL)
509 return;
510 /* Don't shrink pull parser memory buffers. */
511 if ((!PARSER_PROGRESSIVE(ctxt)) &&
512 (buf->encoder == NULL) &&
513 (buf->readcallback == NULL))
514 return;
515
516 used = in->cur - in->base;
517 /*
518 * Do not shrink on large buffers whose only a tiny fraction
519 * was consumed
520 */
521 if (used > INPUT_CHUNK) {
522 size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
523
524 if (res > 0) {
525 used -= res;
526 if ((res > ULONG_MAX) ||
527 (in->consumed > ULONG_MAX - (unsigned long)res))
528 in->consumed = ULONG_MAX;
529 else
530 in->consumed += res;
531 }
532 }
533
534 xmlBufUpdateInput(buf->buffer, in, used);
535 }
536
537 /**
538 * xmlParserInputShrink:
539 * @in: an XML parser input
540 *
541 * DEPRECATED: Don't use.
542 *
543 * This function removes used input for the parser.
544 */
545 void
xmlParserInputShrink(xmlParserInputPtr in)546 xmlParserInputShrink(xmlParserInputPtr in) {
547 size_t used;
548 size_t ret;
549
550 if (in == NULL) return;
551 if (in->buf == NULL) return;
552 if (in->base == NULL) return;
553 if (in->cur == NULL) return;
554 if (in->buf->buffer == NULL) return;
555
556 used = in->cur - in->base;
557 /*
558 * Do not shrink on large buffers whose only a tiny fraction
559 * was consumed
560 */
561 if (used > INPUT_CHUNK) {
562 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
563 if (ret > 0) {
564 used -= ret;
565 if ((ret > ULONG_MAX) ||
566 (in->consumed > ULONG_MAX - (unsigned long)ret))
567 in->consumed = ULONG_MAX;
568 else
569 in->consumed += ret;
570 }
571 }
572
573 if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
574 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
575 }
576
577 in->base = xmlBufContent(in->buf->buffer);
578 if (in->base == NULL) {
579 /* TODO: raise error */
580 in->base = BAD_CAST "";
581 in->cur = in->base;
582 in->end = in->base;
583 return;
584 }
585 in->cur = in->base + used;
586 in->end = xmlBufEnd(in->buf->buffer);
587 }
588
589 /************************************************************************
590 * *
591 * UTF8 character input and related functions *
592 * *
593 ************************************************************************/
594
595 /**
596 * xmlNextChar:
597 * @ctxt: the XML parser context
598 *
599 * DEPRECATED: Internal function, do not use.
600 *
601 * Skip to the next char input char.
602 */
603
604 void
xmlNextChar(xmlParserCtxtPtr ctxt)605 xmlNextChar(xmlParserCtxtPtr ctxt)
606 {
607 const unsigned char *cur;
608 size_t avail;
609 int c;
610
611 if ((ctxt == NULL) || (ctxt->input == NULL))
612 return;
613
614 avail = ctxt->input->end - ctxt->input->cur;
615
616 if (avail < INPUT_CHUNK) {
617 xmlParserGrow(ctxt);
618 if (ctxt->input->cur >= ctxt->input->end)
619 return;
620 avail = ctxt->input->end - ctxt->input->cur;
621 }
622
623 cur = ctxt->input->cur;
624 c = *cur;
625
626 if (c < 0x80) {
627 if (c == '\n') {
628 ctxt->input->cur++;
629 ctxt->input->line++;
630 ctxt->input->col = 1;
631 } else if (c == '\r') {
632 /*
633 * 2.11 End-of-Line Handling
634 * the literal two-character sequence "#xD#xA" or a standalone
635 * literal #xD, an XML processor must pass to the application
636 * the single character #xA.
637 */
638 ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
639 ctxt->input->line++;
640 ctxt->input->col = 1;
641 return;
642 } else {
643 ctxt->input->cur++;
644 ctxt->input->col++;
645 }
646 } else {
647 ctxt->input->col++;
648
649 if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
650 goto encoding_error;
651
652 if (c < 0xe0) {
653 /* 2-byte code */
654 if (c < 0xc2)
655 goto encoding_error;
656 ctxt->input->cur += 2;
657 } else {
658 unsigned int val = (c << 8) | cur[1];
659
660 if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
661 goto encoding_error;
662
663 if (c < 0xf0) {
664 /* 3-byte code */
665 if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
666 goto encoding_error;
667 ctxt->input->cur += 3;
668 } else {
669 if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
670 goto encoding_error;
671
672 /* 4-byte code */
673 if ((val < 0xf090) || (val >= 0xf490))
674 goto encoding_error;
675 ctxt->input->cur += 4;
676 }
677 }
678 }
679
680 return;
681
682 encoding_error:
683 /* Only report the first error */
684 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
685 xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
686 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
687 }
688 ctxt->input->cur++;
689 return;
690 }
691
692 /**
693 * xmlCurrentChar:
694 * @ctxt: the XML parser context
695 * @len: pointer to the length of the char read
696 *
697 * DEPRECATED: Internal function, do not use.
698 *
699 * The current char value, if using UTF-8 this may actually span multiple
700 * bytes in the input buffer. Implement the end of line normalization:
701 * 2.11 End-of-Line Handling
702 * Wherever an external parsed entity or the literal entity value
703 * of an internal parsed entity contains either the literal two-character
704 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
705 * must pass to the application the single character #xA.
706 * This behavior can conveniently be produced by normalizing all
707 * line breaks to #xA on input, before parsing.)
708 *
709 * Returns the current char value and its length
710 */
711
712 int
xmlCurrentChar(xmlParserCtxtPtr ctxt,int * len)713 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
714 const unsigned char *cur;
715 size_t avail;
716 int c;
717
718 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
719
720 avail = ctxt->input->end - ctxt->input->cur;
721
722 if (avail < INPUT_CHUNK) {
723 xmlParserGrow(ctxt);
724 avail = ctxt->input->end - ctxt->input->cur;
725 }
726
727 cur = ctxt->input->cur;
728 c = *cur;
729
730 if (c < 0x80) {
731 /* 1-byte code */
732 if (c < 0x20) {
733 /*
734 * 2.11 End-of-Line Handling
735 * the literal two-character sequence "#xD#xA" or a standalone
736 * literal #xD, an XML processor must pass to the application
737 * the single character #xA.
738 */
739 if (c == '\r') {
740 /*
741 * TODO: This function shouldn't change the 'cur' pointer
742 * as side effect, but the NEXTL macro in parser.c relies
743 * on this behavior when incrementing line numbers.
744 */
745 if (cur[1] == '\n')
746 ctxt->input->cur++;
747 *len = 1;
748 c = '\n';
749 } else if (c == 0) {
750 if (ctxt->input->cur >= ctxt->input->end) {
751 *len = 0;
752 } else {
753 *len = 1;
754 /*
755 * TODO: Null bytes should be handled by callers,
756 * but this can be tricky.
757 */
758 xmlFatalErr(ctxt, XML_ERR_INVALID_CHAR,
759 "Char 0x0 out of allowed range\n");
760 }
761 } else {
762 *len = 1;
763 }
764 } else {
765 *len = 1;
766 }
767
768 return(c);
769 } else {
770 int val;
771
772 if (avail < 2)
773 goto incomplete_sequence;
774 if ((cur[1] & 0xc0) != 0x80)
775 goto encoding_error;
776
777 if (c < 0xe0) {
778 /* 2-byte code */
779 if (c < 0xc2)
780 goto encoding_error;
781 val = (c & 0x1f) << 6;
782 val |= cur[1] & 0x3f;
783 *len = 2;
784 } else {
785 if (avail < 3)
786 goto incomplete_sequence;
787 if ((cur[2] & 0xc0) != 0x80)
788 goto encoding_error;
789
790 if (c < 0xf0) {
791 /* 3-byte code */
792 val = (c & 0xf) << 12;
793 val |= (cur[1] & 0x3f) << 6;
794 val |= cur[2] & 0x3f;
795 if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
796 goto encoding_error;
797 *len = 3;
798 } else {
799 if (avail < 4)
800 goto incomplete_sequence;
801 if ((cur[3] & 0xc0) != 0x80)
802 goto encoding_error;
803
804 /* 4-byte code */
805 val = (c & 0x0f) << 18;
806 val |= (cur[1] & 0x3f) << 12;
807 val |= (cur[2] & 0x3f) << 6;
808 val |= cur[3] & 0x3f;
809 if ((val < 0x10000) || (val >= 0x110000))
810 goto encoding_error;
811 *len = 4;
812 }
813 }
814
815 return(val);
816 }
817
818 encoding_error:
819 /* Only report the first error */
820 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
821 xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
822 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
823 }
824 *len = 1;
825 return(0xFFFD); /* U+FFFD Replacement Character */
826
827 incomplete_sequence:
828 /*
829 * An encoding problem may arise from a truncated input buffer
830 * splitting a character in the middle. In that case do not raise
831 * an error but return 0. This should only happen when push parsing
832 * char data.
833 */
834 *len = 0;
835 return(0);
836 }
837
838 /**
839 * xmlStringCurrentChar:
840 * @ctxt: the XML parser context
841 * @cur: pointer to the beginning of the char
842 * @len: pointer to the length of the char read
843 *
844 * DEPRECATED: Internal function, do not use.
845 *
846 * The current char value, if using UTF-8 this may actually span multiple
847 * bytes in the input buffer.
848 *
849 * Returns the current char value and its length
850 */
851
852 int
xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,const xmlChar * cur,int * len)853 xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
854 const xmlChar *cur, int *len) {
855 int c;
856
857 if ((cur == NULL) || (len == NULL))
858 return(0);
859
860 /* cur is zero-terminated, so we can lie about its length. */
861 *len = 4;
862 c = xmlGetUTF8Char(cur, len);
863
864 return((c < 0) ? 0 : c);
865 }
866
867 /**
868 * xmlCopyCharMultiByte:
869 * @out: pointer to an array of xmlChar
870 * @val: the char value
871 *
872 * append the char value in the array
873 *
874 * Returns the number of xmlChar written
875 */
876 int
xmlCopyCharMultiByte(xmlChar * out,int val)877 xmlCopyCharMultiByte(xmlChar *out, int val) {
878 if ((out == NULL) || (val < 0)) return(0);
879 /*
880 * We are supposed to handle UTF8, check it's valid
881 * From rfc2044: encoding of the Unicode values on UTF-8:
882 *
883 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
884 * 0000 0000-0000 007F 0xxxxxxx
885 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
886 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
887 */
888 if (val >= 0x80) {
889 xmlChar *savedout = out;
890 int bits;
891 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
892 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
893 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
894 else {
895 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
896 fprintf(stderr, "xmlCopyCharMultiByte: codepoint out of range\n");
897 abort();
898 #endif
899 return(0);
900 }
901 for ( ; bits >= 0; bits-= 6)
902 *out++= ((val >> bits) & 0x3F) | 0x80 ;
903 return (out - savedout);
904 }
905 *out = val;
906 return 1;
907 }
908
909 /**
910 * xmlCopyChar:
911 * @len: Ignored, compatibility
912 * @out: pointer to an array of xmlChar
913 * @val: the char value
914 *
915 * append the char value in the array
916 *
917 * Returns the number of xmlChar written
918 */
919
920 int
xmlCopyChar(int len ATTRIBUTE_UNUSED,xmlChar * out,int val)921 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
922 if ((out == NULL) || (val < 0)) return(0);
923 /* the len parameter is ignored */
924 if (val >= 0x80) {
925 return(xmlCopyCharMultiByte (out, val));
926 }
927 *out = val;
928 return 1;
929 }
930
931 /************************************************************************
932 * *
933 * Commodity functions to switch encodings *
934 * *
935 ************************************************************************/
936
937 static int
xmlDetectEBCDIC(xmlParserInputPtr input,xmlCharEncodingHandlerPtr * hout)938 xmlDetectEBCDIC(xmlParserInputPtr input, xmlCharEncodingHandlerPtr *hout) {
939 xmlChar out[200];
940 xmlCharEncodingHandlerPtr handler;
941 int inlen, outlen, res, i;
942
943 *hout = NULL;
944
945 /*
946 * To detect the EBCDIC code page, we convert the first 200 bytes
947 * to EBCDIC-US and try to find the encoding declaration.
948 */
949 res = xmlLookupCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC, &handler);
950 if (res != 0)
951 return(res);
952 outlen = sizeof(out) - 1;
953 inlen = input->end - input->cur;
954 res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
955 /*
956 * Return the EBCDIC handler if decoding failed. The error will
957 * be reported later.
958 */
959 if (res < 0)
960 goto done;
961 out[outlen] = 0;
962
963 for (i = 0; i < outlen; i++) {
964 if (out[i] == '>')
965 break;
966 if ((out[i] == 'e') &&
967 (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
968 int start, cur, quote;
969
970 i += 8;
971 while (IS_BLANK_CH(out[i]))
972 i += 1;
973 if (out[i++] != '=')
974 break;
975 while (IS_BLANK_CH(out[i]))
976 i += 1;
977 quote = out[i++];
978 if ((quote != '\'') && (quote != '"'))
979 break;
980 start = i;
981 cur = out[i];
982 while (((cur >= 'a') && (cur <= 'z')) ||
983 ((cur >= 'A') && (cur <= 'Z')) ||
984 ((cur >= '0') && (cur <= '9')) ||
985 (cur == '.') || (cur == '_') ||
986 (cur == '-'))
987 cur = out[++i];
988 if (cur != quote)
989 break;
990 out[i] = 0;
991 xmlCharEncCloseFunc(handler);
992 res = xmlOpenCharEncodingHandler((char *) out + start,
993 /* output */ 0, &handler);
994 if (res != 0)
995 return(res);
996 *hout = handler;
997 return(0);
998 }
999 }
1000
1001 done:
1002 /*
1003 * Encoding handlers are stateful, so we have to recreate them.
1004 */
1005 xmlCharEncCloseFunc(handler);
1006 res = xmlLookupCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC, &handler);
1007 if (res != 0)
1008 return(res);
1009 *hout = handler;
1010 return(0);
1011 }
1012
1013 /**
1014 * xmlSwitchEncoding:
1015 * @ctxt: the parser context
1016 * @enc: the encoding value (number)
1017 *
1018 * Use encoding specified by enum to decode input data. This overrides
1019 * the encoding found in the XML declaration.
1020 *
1021 * This function can also be used to override the encoding of chunks
1022 * passed to xmlParseChunk.
1023 *
1024 * Returns 0 in case of success, -1 otherwise
1025 */
1026 int
xmlSwitchEncoding(xmlParserCtxtPtr ctxt,xmlCharEncoding enc)1027 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1028 {
1029 xmlCharEncodingHandlerPtr handler = NULL;
1030 int ret;
1031 int res;
1032
1033 if ((ctxt == NULL) || (ctxt->input == NULL))
1034 return(-1);
1035
1036 switch (enc) {
1037 case XML_CHAR_ENCODING_NONE:
1038 case XML_CHAR_ENCODING_UTF8:
1039 case XML_CHAR_ENCODING_ASCII:
1040 res = 0;
1041 break;
1042 case XML_CHAR_ENCODING_EBCDIC:
1043 res = xmlDetectEBCDIC(ctxt->input, &handler);
1044 break;
1045 default:
1046 res = xmlLookupCharEncodingHandler(enc, &handler);
1047 break;
1048 }
1049
1050 if (res != 0) {
1051 const char *name = xmlGetCharEncodingName(enc);
1052
1053 xmlFatalErr(ctxt, res, (name ? name : "<null>"));
1054 return(-1);
1055 }
1056
1057 ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1058
1059 if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1060 ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1061 }
1062
1063 return(ret);
1064 }
1065
1066 /**
1067 * xmlSwitchEncodingName:
1068 * @ctxt: the parser context, only for error reporting
1069 * @input: the input strea,
1070 * @encoding: the encoding name
1071 *
1072 * Returns 0 in case of success, -1 otherwise
1073 */
1074 static int
xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,const char * encoding)1075 xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1076 const char *encoding) {
1077 xmlCharEncodingHandlerPtr handler;
1078 int res;
1079
1080 if (encoding == NULL)
1081 return(-1);
1082
1083 res = xmlOpenCharEncodingHandler(encoding, /* output */ 0, &handler);
1084 if (res != 0) {
1085 xmlFatalErr(ctxt, res, encoding);
1086 return(-1);
1087 }
1088
1089 return(xmlSwitchInputEncoding(ctxt, input, handler));
1090 }
1091
1092 /**
1093 * xmlSwitchEncodingName:
1094 * @ctxt: the parser context
1095 * @encoding: the encoding name
1096 *
1097 * Use specified encoding to decode input data. This overrides the
1098 * encoding found in the XML declaration.
1099 *
1100 * This function can also be used to override the encoding of chunks
1101 * passed to xmlParseChunk.
1102 *
1103 * Available since 2.13.0.
1104 *
1105 * Returns 0 in case of success, -1 otherwise
1106 */
1107 int
xmlSwitchEncodingName(xmlParserCtxtPtr ctxt,const char * encoding)1108 xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding) {
1109 return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding));
1110 }
1111
1112 /**
1113 * xmlSwitchInputEncoding:
1114 * @ctxt: the parser context, only for error reporting
1115 * @input: the input stream
1116 * @handler: the encoding handler
1117 *
1118 * DEPRECATED: Internal function, don't use.
1119 *
1120 * Use encoding handler to decode input data.
1121 *
1122 * Returns 0 in case of success, -1 otherwise
1123 */
1124 int
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,xmlCharEncodingHandlerPtr handler)1125 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1126 xmlCharEncodingHandlerPtr handler)
1127 {
1128 int nbchars;
1129 xmlParserInputBufferPtr in;
1130
1131 if ((input == NULL) || (input->buf == NULL)) {
1132 xmlCharEncCloseFunc(handler);
1133 return (-1);
1134 }
1135 in = input->buf;
1136
1137 input->flags |= XML_INPUT_HAS_ENCODING;
1138
1139 /*
1140 * UTF-8 requires no encoding handler.
1141 */
1142 if ((handler != NULL) &&
1143 (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1144 xmlCharEncCloseFunc(handler);
1145 handler = NULL;
1146 }
1147
1148 if (in->encoder == handler)
1149 return (0);
1150
1151 if (in->encoder != NULL) {
1152 /*
1153 * Switching encodings during parsing is a really bad idea,
1154 * but Chromium can switch between ISO-8859-1 and UTF-16 before
1155 * separate calls to xmlParseChunk.
1156 *
1157 * TODO: We should check whether the "raw" input buffer is empty and
1158 * convert the old content using the old encoder.
1159 */
1160
1161 xmlCharEncCloseFunc(in->encoder);
1162 in->encoder = handler;
1163 return (0);
1164 }
1165
1166 in->encoder = handler;
1167
1168 /*
1169 * Is there already some content down the pipe to convert ?
1170 */
1171 if (xmlBufIsEmpty(in->buffer) == 0) {
1172 xmlBufPtr buf;
1173 size_t processed;
1174
1175 buf = xmlBufCreate();
1176 if (buf == NULL) {
1177 xmlCtxtErrMemory(ctxt);
1178 return(-1);
1179 }
1180
1181 /*
1182 * Shrink the current input buffer.
1183 * Move it as the raw buffer and create a new input buffer
1184 */
1185 processed = input->cur - input->base;
1186 xmlBufShrink(in->buffer, processed);
1187 input->consumed += processed;
1188 in->raw = in->buffer;
1189 in->buffer = buf;
1190 in->rawconsumed = processed;
1191
1192 nbchars = xmlCharEncInput(in);
1193 xmlBufResetInput(in->buffer, input);
1194 if (nbchars == XML_ENC_ERR_MEMORY) {
1195 xmlCtxtErrMemory(ctxt);
1196 } else if (nbchars < 0) {
1197 xmlCtxtErrIO(ctxt, in->error, NULL);
1198 xmlHaltParser(ctxt);
1199 return (-1);
1200 }
1201 }
1202 return (0);
1203 }
1204
1205 /**
1206 * xmlSwitchToEncoding:
1207 * @ctxt: the parser context
1208 * @handler: the encoding handler
1209 *
1210 * Use encoding handler to decode input data.
1211 *
1212 * This function can be used to enforce the encoding of chunks passed
1213 * to xmlParseChunk.
1214 *
1215 * Returns 0 in case of success, -1 otherwise
1216 */
1217 int
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt,xmlCharEncodingHandlerPtr handler)1218 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1219 {
1220 if (ctxt == NULL)
1221 return(-1);
1222 return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
1223 }
1224
1225 /**
1226 * xmlDetectEncoding:
1227 * @ctxt: the parser context
1228 *
1229 * Handle optional BOM, detect and switch to encoding.
1230 *
1231 * Assumes that there are at least four bytes in the input buffer.
1232 */
1233 void
xmlDetectEncoding(xmlParserCtxtPtr ctxt)1234 xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
1235 const xmlChar *in;
1236 xmlCharEncoding enc;
1237 int bomSize;
1238 int autoFlag = 0;
1239
1240 if (xmlParserGrow(ctxt) < 0)
1241 return;
1242 in = ctxt->input->cur;
1243 if (ctxt->input->end - in < 4)
1244 return;
1245
1246 if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1247 /*
1248 * If the encoding was already set, only skip the BOM which was
1249 * possibly decoded to UTF-8.
1250 */
1251 if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1252 ctxt->input->cur += 3;
1253 }
1254
1255 return;
1256 }
1257
1258 enc = XML_CHAR_ENCODING_NONE;
1259 bomSize = 0;
1260
1261 switch (in[0]) {
1262 case 0x00:
1263 if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1264 enc = XML_CHAR_ENCODING_UCS4BE;
1265 autoFlag = XML_INPUT_AUTO_OTHER;
1266 } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1267 enc = XML_CHAR_ENCODING_UTF16BE;
1268 autoFlag = XML_INPUT_AUTO_UTF16BE;
1269 }
1270 break;
1271
1272 case 0x3C:
1273 if (in[1] == 0x00) {
1274 if ((in[2] == 0x00) && (in[3] == 0x00)) {
1275 enc = XML_CHAR_ENCODING_UCS4LE;
1276 autoFlag = XML_INPUT_AUTO_OTHER;
1277 } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1278 enc = XML_CHAR_ENCODING_UTF16LE;
1279 autoFlag = XML_INPUT_AUTO_UTF16LE;
1280 }
1281 }
1282 break;
1283
1284 case 0x4C:
1285 if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1286 enc = XML_CHAR_ENCODING_EBCDIC;
1287 autoFlag = XML_INPUT_AUTO_OTHER;
1288 }
1289 break;
1290
1291 case 0xEF:
1292 if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1293 enc = XML_CHAR_ENCODING_UTF8;
1294 autoFlag = XML_INPUT_AUTO_UTF8;
1295 bomSize = 3;
1296 }
1297 break;
1298
1299 case 0xFE:
1300 if (in[1] == 0xFF) {
1301 enc = XML_CHAR_ENCODING_UTF16BE;
1302 autoFlag = XML_INPUT_AUTO_UTF16BE;
1303 bomSize = 2;
1304 }
1305 break;
1306
1307 case 0xFF:
1308 if (in[1] == 0xFE) {
1309 enc = XML_CHAR_ENCODING_UTF16LE;
1310 autoFlag = XML_INPUT_AUTO_UTF16LE;
1311 bomSize = 2;
1312 }
1313 break;
1314 }
1315
1316 if (bomSize > 0) {
1317 ctxt->input->cur += bomSize;
1318 }
1319
1320 if (enc != XML_CHAR_ENCODING_NONE) {
1321 ctxt->input->flags |= autoFlag;
1322 xmlSwitchEncoding(ctxt, enc);
1323 }
1324 }
1325
1326 /**
1327 * xmlSetDeclaredEncoding:
1328 * @ctxt: the parser context
1329 * @encoding: declared encoding
1330 *
1331 * Set the encoding from a declaration in the document.
1332 *
1333 * If no encoding was set yet, switch the encoding. Otherwise, only warn
1334 * about encoding mismatches.
1335 *
1336 * Takes ownership of 'encoding'.
1337 */
1338 void
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt,xmlChar * encoding)1339 xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
1340 if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1341 ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1342 xmlSwitchEncodingName(ctxt, (const char *) encoding);
1343 ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1344 } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1345 static const char *allowedUTF8[] = {
1346 "UTF-8", "UTF8", NULL
1347 };
1348 static const char *allowedUTF16LE[] = {
1349 "UTF-16", "UTF-16LE", "UTF16", NULL
1350 };
1351 static const char *allowedUTF16BE[] = {
1352 "UTF-16", "UTF-16BE", "UTF16", NULL
1353 };
1354 const char **allowed = NULL;
1355 const char *autoEnc = NULL;
1356
1357 switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1358 case XML_INPUT_AUTO_UTF8:
1359 allowed = allowedUTF8;
1360 autoEnc = "UTF-8";
1361 break;
1362 case XML_INPUT_AUTO_UTF16LE:
1363 allowed = allowedUTF16LE;
1364 autoEnc = "UTF-16LE";
1365 break;
1366 case XML_INPUT_AUTO_UTF16BE:
1367 allowed = allowedUTF16BE;
1368 autoEnc = "UTF-16BE";
1369 break;
1370 }
1371
1372 if (allowed != NULL) {
1373 const char **p;
1374 int match = 0;
1375
1376 for (p = allowed; *p != NULL; p++) {
1377 if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1378 match = 1;
1379 break;
1380 }
1381 }
1382
1383 if (match == 0) {
1384 xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1385 "Encoding '%s' doesn't match "
1386 "auto-detected '%s'\n",
1387 encoding, BAD_CAST autoEnc);
1388 xmlFree(encoding);
1389 encoding = xmlStrdup(BAD_CAST autoEnc);
1390 if (encoding == NULL)
1391 xmlCtxtErrMemory(ctxt);
1392 }
1393 }
1394 }
1395
1396 if (ctxt->encoding != NULL)
1397 xmlFree((xmlChar *) ctxt->encoding);
1398 ctxt->encoding = encoding;
1399 }
1400
1401 /************************************************************************
1402 * *
1403 * Commodity functions to handle entities processing *
1404 * *
1405 ************************************************************************/
1406
1407 /**
1408 * xmlFreeInputStream:
1409 * @input: an xmlParserInputPtr
1410 *
1411 * Free up an input stream.
1412 */
1413 void
xmlFreeInputStream(xmlParserInputPtr input)1414 xmlFreeInputStream(xmlParserInputPtr input) {
1415 if (input == NULL) return;
1416
1417 if (input->filename != NULL) xmlFree((char *) input->filename);
1418 if (input->version != NULL) xmlFree((char *) input->version);
1419 if ((input->free != NULL) && (input->base != NULL))
1420 input->free((xmlChar *) input->base);
1421 if (input->buf != NULL)
1422 xmlFreeParserInputBuffer(input->buf);
1423 xmlFree(input);
1424 }
1425
1426 /**
1427 * xmlNewInputStream:
1428 * @ctxt: an XML parser context
1429 *
1430 * Create a new input stream structure.
1431 *
1432 * Returns the new input stream or NULL
1433 */
1434 xmlParserInputPtr
xmlNewInputStream(xmlParserCtxtPtr ctxt)1435 xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1436 xmlParserInputPtr input;
1437
1438 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1439 if (input == NULL) {
1440 xmlCtxtErrMemory(ctxt);
1441 return(NULL);
1442 }
1443 memset(input, 0, sizeof(xmlParserInput));
1444 input->line = 1;
1445 input->col = 1;
1446
1447 /*
1448 * If the context is NULL the id cannot be initialized, but that
1449 * should not happen while parsing which is the situation where
1450 * the id is actually needed.
1451 */
1452 if (ctxt != NULL) {
1453 if (input->id >= INT_MAX) {
1454 xmlCtxtErrMemory(ctxt);
1455 return(NULL);
1456 }
1457 input->id = ctxt->input_id++;
1458 }
1459
1460 return(input);
1461 }
1462
1463 /**
1464 * xmlNewInputURL:
1465 * @ctxt: parser context
1466 * @url: filename or URL
1467 * @publicId: publid ID from doctype (optional)
1468 * @encoding: character encoding (optional)
1469 * @flags: unused, pass 0
1470 *
1471 * Creates a new parser input from the filesystem, the network or
1472 * a user-defined resource loader.
1473 *
1474 * @url is a filename or URL. If if contains the substring "://",
1475 * it is assumed to be a Legacy Extended IRI. Otherwise, it is
1476 * treated as a filesystem path.
1477 *
1478 * @publicId is an optional XML public ID, typically from a doctype
1479 * declaration. It is used for catalog lookups.
1480 *
1481 * If @encoding is specified, it will override any encodings found
1482 * in XML declarations, text declarations, BOMs, etc. Pass NULL
1483 * for auto-detection.
1484 *
1485 * The following resource loaders will be called if they were
1486 * registered (in order of precedence):
1487 *
1488 * - the global external entity loader set with
1489 * xmlSetExternalEntityLoader
1490 * - the per-thread xmlParserInputBufferCreateFilenameFunc set with
1491 * xmlParserInputBufferCreateFilenameDefault
1492 * - the default loader which will return
1493 * - the result from a matching global input callback set with
1494 * xmlRegisterInputCallbacks
1495 * - a HTTP resource if support is compiled in.
1496 * - a file opened from the filesystem, with automatic detection
1497 * of compressed files if support is compiled in.
1498 *
1499 * The returned input can be passed to xmlCtxtParseDocument or
1500 * htmlCtxtParseDocument.
1501 *
1502 * This function should not be invoked from user-defined resource
1503 * loaders to avoid infinite loops.
1504 *
1505 * Returns a new parser input.
1506 */
1507 xmlParserInputPtr
xmlNewInputURL(xmlParserCtxtPtr ctxt,const char * url,const char * publicId,const char * encoding,int flags ATTRIBUTE_UNUSED)1508 xmlNewInputURL(xmlParserCtxtPtr ctxt, const char *url, const char *publicId,
1509 const char *encoding, int flags ATTRIBUTE_UNUSED) {
1510 xmlParserInputPtr input;
1511
1512 if ((ctxt == NULL) || (url == NULL))
1513 return(NULL);
1514
1515 input = xmlLoadExternalEntity(url, publicId, ctxt);
1516 if (input == NULL)
1517 return(NULL);
1518
1519 if (encoding != NULL)
1520 xmlSwitchInputEncodingName(ctxt, input, encoding);
1521
1522 return(input);
1523 }
1524
1525 /**
1526 * xmlNewInputInternal:
1527 * @ctxt: parser context
1528 * @buf: parser input buffer
1529 * @filename: filename or URL
1530 * @encoding: character encoding (optional)
1531 *
1532 * Internal helper function.
1533 *
1534 * Returns a new parser input.
1535 */
1536 static xmlParserInputPtr
xmlNewInputInternal(xmlParserCtxtPtr ctxt,xmlParserInputBufferPtr buf,const char * filename,const char * encoding)1537 xmlNewInputInternal(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf,
1538 const char *filename, const char *encoding) {
1539 xmlParserInputPtr input;
1540
1541 input = xmlNewInputStream(ctxt);
1542 if (input == NULL) {
1543 xmlFreeParserInputBuffer(buf);
1544 return(NULL);
1545 }
1546
1547 input->buf = buf;
1548 xmlBufResetInput(input->buf->buffer, input);
1549
1550 if (filename != NULL) {
1551 input->filename = xmlMemStrdup(filename);
1552 if (input->filename == NULL) {
1553 xmlCtxtErrMemory(ctxt);
1554 xmlFreeInputStream(input);
1555 return(NULL);
1556 }
1557 }
1558
1559 if (encoding != NULL)
1560 xmlSwitchInputEncodingName(ctxt, input, encoding);
1561
1562 return(input);
1563 }
1564
1565 /**
1566 * xmlNewInputMemory:
1567 * @ctxt: parser context
1568 * @url: base URL (optional)
1569 * @mem: pointer to char array
1570 * @size: size of array
1571 * @encoding: character encoding (optional)
1572 * @flags: optimization hints
1573 *
1574 * Creates a new parser input to read from a memory area.
1575 *
1576 * @url is used as base to resolve external entities and for
1577 * error reporting.
1578 *
1579 * If the XML_INPUT_BUF_STATIC flag is set, the memory area must
1580 * stay unchanged until parsing has finished. This can avoid
1581 * temporary copies.
1582 *
1583 * If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory
1584 * area must contain a zero byte after the buffer at position @size.
1585 * This can avoid temporary copies.
1586 *
1587 * Returns a new parser input.
1588 */
1589 xmlParserInputPtr
xmlNewInputMemory(xmlParserCtxtPtr ctxt,const char * url,const void * mem,size_t size,const char * encoding,int flags)1590 xmlNewInputMemory(xmlParserCtxtPtr ctxt, const char *url,
1591 const void *mem, size_t size,
1592 const char *encoding, int flags) {
1593 xmlParserInputBufferPtr buf;
1594
1595 if ((ctxt == NULL) || (mem == NULL))
1596 return(NULL);
1597
1598 buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE);
1599 if (buf == NULL) {
1600 xmlCtxtErrMemory(ctxt);
1601 return(NULL);
1602 }
1603
1604 return(xmlNewInputInternal(ctxt, buf, url, encoding));
1605 }
1606
1607 /**
1608 * xmlNewInputString:
1609 * @ctxt: parser context
1610 * @url: base URL (optional)
1611 * @str: zero-terminated string
1612 * @encoding: character encoding (optional)
1613 * @flags: optimization hints
1614 *
1615 * Creates a new parser input to read from a zero-terminated string.
1616 *
1617 * @url is used as base to resolve external entities and for
1618 * error reporting.
1619 *
1620 * If the XML_INPUT_BUF_STATIC flag is set, the string must
1621 * stay unchanged until parsing has finished. This can avoid
1622 * temporary copies.
1623 *
1624 * Returns a new parser input.
1625 */
1626 xmlParserInputPtr
xmlNewInputString(xmlParserCtxtPtr ctxt,const char * url,const char * str,const char * encoding,int flags)1627 xmlNewInputString(xmlParserCtxtPtr ctxt, const char *url,
1628 const char *str, const char *encoding, int flags) {
1629 xmlParserInputBufferPtr buf;
1630
1631 if ((ctxt == NULL) || (str == NULL))
1632 return(NULL);
1633
1634 buf = xmlNewInputBufferString(str, flags);
1635 if (buf == NULL) {
1636 xmlCtxtErrMemory(ctxt);
1637 return(NULL);
1638 }
1639
1640 return(xmlNewInputInternal(ctxt, buf, url, encoding));
1641 }
1642
1643 /**
1644 * xmlNewInputFd:
1645 * @ctxt: parser context
1646 * @url: base URL (optional)
1647 * @fd: file descriptor
1648 * @encoding: character encoding (optional)
1649 * @flags: unused, pass 0
1650 *
1651 * Creates a new parser input to read from a zero-terminated string.
1652 *
1653 * @url is used as base to resolve external entities and for
1654 * error reporting.
1655 *
1656 * @fd is closed after parsing has finished.
1657 *
1658 * Returns a new parser input.
1659 */
1660 xmlParserInputPtr
xmlNewInputFd(xmlParserCtxtPtr ctxt,const char * url,int fd,const char * encoding,int flags ATTRIBUTE_UNUSED)1661 xmlNewInputFd(xmlParserCtxtPtr ctxt, const char *url,
1662 int fd, const char *encoding, int flags ATTRIBUTE_UNUSED) {
1663 xmlParserInputBufferPtr buf;
1664
1665 if ((ctxt == NULL) || (fd < 0))
1666 return(NULL);
1667
1668 buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
1669 if (buf == NULL) {
1670 xmlCtxtErrMemory(ctxt);
1671 return(NULL);
1672 }
1673
1674 return(xmlNewInputInternal(ctxt, buf, url, encoding));
1675 }
1676
1677 /**
1678 * xmlNewInputIO:
1679 * @ctxt: parser context
1680 * @url: base URL (optional)
1681 * @ioRead: read callback
1682 * @ioClose: close callback (optional)
1683 * @ioCtxt: IO context
1684 * @encoding: character encoding (optional)
1685 * @flags: unused, pass 0
1686 *
1687 * Creates a new parser input to read from input callbacks and
1688 * cintext.
1689 *
1690 * @url is used as base to resolve external entities and for
1691 * error reporting.
1692 *
1693 * @ioRead is called to read new data into a provided buffer.
1694 * It must return the number of bytes written into the buffer
1695 * ot a negative xmlParserErrors code on failure.
1696 *
1697 * @ioClose is called after parsing has finished.
1698 *
1699 * @ioCtxt is an opaque pointer passed to the callbacks.
1700 *
1701 * Returns a new parser input.
1702 */
1703 xmlParserInputPtr
xmlNewInputIO(xmlParserCtxtPtr ctxt,const char * url,xmlInputReadCallback ioRead,xmlInputCloseCallback ioClose,void * ioCtxt,const char * encoding,int flags ATTRIBUTE_UNUSED)1704 xmlNewInputIO(xmlParserCtxtPtr ctxt, const char *url,
1705 xmlInputReadCallback ioRead, xmlInputCloseCallback ioClose,
1706 void *ioCtxt,
1707 const char *encoding, int flags ATTRIBUTE_UNUSED) {
1708 xmlParserInputBufferPtr buf;
1709
1710 if ((ctxt == NULL) || (ioRead == NULL))
1711 return(NULL);
1712
1713 buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
1714 if (buf == NULL) {
1715 xmlCtxtErrMemory(ctxt);
1716 if (ioClose != NULL)
1717 ioClose(ioCtxt);
1718 return(NULL);
1719 }
1720
1721 buf->context = ioCtxt;
1722 buf->readcallback = ioRead;
1723 buf->closecallback = ioClose;
1724
1725 return(xmlNewInputInternal(ctxt, buf, url, encoding));
1726 }
1727
1728 /**
1729 * xmlNewInputPush:
1730 * @ctxt: parser context
1731 * @url: base URL (optional)
1732 * @chunk: pointer to char array
1733 * @size: size of array
1734 * @encoding: character encoding (optional)
1735 *
1736 * Creates a new parser input for a push parser.
1737 *
1738 * Returns a new parser input.
1739 */
1740 xmlParserInputPtr
xmlNewInputPush(xmlParserCtxtPtr ctxt,const char * url,const char * chunk,int size,const char * encoding)1741 xmlNewInputPush(xmlParserCtxtPtr ctxt, const char *url,
1742 const char *chunk, int size, const char *encoding) {
1743 xmlParserInputBufferPtr buf;
1744 xmlParserInputPtr input;
1745
1746 buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
1747 if (buf == NULL) {
1748 xmlCtxtErrMemory(ctxt);
1749 return(NULL);
1750 }
1751
1752 input = xmlNewInputInternal(ctxt, buf, url, encoding);
1753 if (input == NULL)
1754 return(NULL);
1755
1756 input->flags |= XML_INPUT_PROGRESSIVE;
1757
1758 if ((size > 0) && (chunk != NULL)) {
1759 int res;
1760
1761 res = xmlParserInputBufferPush(input->buf, size, chunk);
1762 xmlBufResetInput(input->buf->buffer, input);
1763 if (res < 0) {
1764 xmlCtxtErrIO(ctxt, input->buf->error, NULL);
1765 xmlFreeInputStream(input);
1766 return(NULL);
1767 }
1768 }
1769
1770 return(input);
1771 }
1772
1773 /**
1774 * xmlNewIOInputStream:
1775 * @ctxt: an XML parser context
1776 * @input: an I/O Input
1777 * @enc: the charset encoding if known
1778 *
1779 * DEPRECATED: Use xmlNewInputURL, xmlNewInputMemory, etc.
1780 *
1781 * Create a new input stream structure encapsulating the @input into
1782 * a stream suitable for the parser.
1783 *
1784 * Returns the new input stream or NULL
1785 */
1786 xmlParserInputPtr
xmlNewIOInputStream(xmlParserCtxtPtr ctxt,xmlParserInputBufferPtr buf,xmlCharEncoding enc)1787 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf,
1788 xmlCharEncoding enc) {
1789 const char *encoding;
1790
1791 if (buf == NULL)
1792 return(NULL);
1793
1794 encoding = xmlGetCharEncodingName(enc);
1795 return(xmlNewInputInternal(ctxt, buf, NULL, encoding));
1796 }
1797
1798 /**
1799 * xmlNewEntityInputStream:
1800 * @ctxt: an XML parser context
1801 * @entity: an Entity pointer
1802 *
1803 * DEPRECATED: Internal function, do not use.
1804 *
1805 * Create a new input stream based on an xmlEntityPtr
1806 *
1807 * Returns the new input stream or NULL
1808 */
1809 xmlParserInputPtr
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr ent)1810 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
1811 xmlParserInputPtr input;
1812
1813 if ((ctxt == NULL) || (ent == NULL))
1814 return(NULL);
1815
1816 if (ent->content != NULL) {
1817 input = xmlNewInputString(ctxt, NULL, (const char *) ent->content,
1818 NULL, XML_INPUT_BUF_STATIC);
1819 } else if (ent->URI != NULL) {
1820 input = xmlLoadExternalEntity((char *) ent->URI,
1821 (char *) ent->ExternalID, ctxt);
1822 } else {
1823 input = xmlNewInputMemory(ctxt, NULL, "", 0, NULL,
1824 XML_INPUT_BUF_STATIC |
1825 XML_INPUT_BUF_ZERO_TERMINATED);
1826 }
1827
1828 if (input == NULL)
1829 return(NULL);
1830
1831 input->entity = ent;
1832
1833 return(input);
1834 }
1835
1836 /**
1837 * xmlNewStringInputStream:
1838 * @ctxt: an XML parser context
1839 * @buffer: an memory buffer
1840 *
1841 * DEPRECATED: Use xmlNewInputString.
1842 *
1843 * Create a new input stream based on a memory buffer.
1844 *
1845 * Returns the new input stream
1846 */
1847 xmlParserInputPtr
xmlNewStringInputStream(xmlParserCtxtPtr ctxt,const xmlChar * buffer)1848 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1849 return(xmlNewInputString(ctxt, NULL, (const char *) buffer, NULL, 0));
1850 }
1851
1852
1853 /****************************************************************
1854 * *
1855 * External entities loading *
1856 * *
1857 ****************************************************************/
1858
1859 #ifdef LIBXML_CATALOG_ENABLED
1860
1861 /**
1862 * xmlResolveResourceFromCatalog:
1863 * @URL: the URL for the entity to load
1864 * @ID: the System ID for the entity to load
1865 * @ctxt: the context in which the entity is called or NULL
1866 *
1867 * Resolves the URL and ID against the appropriate catalog.
1868 * This function is used by xmlDefaultExternalEntityLoader and
1869 * xmlNoNetExternalEntityLoader.
1870 *
1871 * Returns a new allocated URL, or NULL.
1872 */
1873 static xmlChar *
xmlResolveResourceFromCatalog(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)1874 xmlResolveResourceFromCatalog(const char *URL, const char *ID,
1875 xmlParserCtxtPtr ctxt) {
1876 xmlChar *resource = NULL;
1877 xmlCatalogAllow pref;
1878
1879 /*
1880 * If the resource doesn't exists as a file,
1881 * try to load it from the resource pointed in the catalogs
1882 */
1883 pref = xmlCatalogGetDefaults();
1884
1885 if ((pref != XML_CATA_ALLOW_NONE) && (!xmlNoNetExists(URL))) {
1886 /*
1887 * Do a local lookup
1888 */
1889 if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
1890 ((pref == XML_CATA_ALLOW_ALL) ||
1891 (pref == XML_CATA_ALLOW_DOCUMENT))) {
1892 resource = xmlCatalogLocalResolve(ctxt->catalogs,
1893 (const xmlChar *)ID,
1894 (const xmlChar *)URL);
1895 }
1896 /*
1897 * Try a global lookup
1898 */
1899 if ((resource == NULL) &&
1900 ((pref == XML_CATA_ALLOW_ALL) ||
1901 (pref == XML_CATA_ALLOW_GLOBAL))) {
1902 resource = xmlCatalogResolve((const xmlChar *)ID,
1903 (const xmlChar *)URL);
1904 }
1905 if ((resource == NULL) && (URL != NULL))
1906 resource = xmlStrdup((const xmlChar *) URL);
1907
1908 /*
1909 * TODO: do an URI lookup on the reference
1910 */
1911 if ((resource != NULL) && (!xmlNoNetExists((const char *)resource))) {
1912 xmlChar *tmp = NULL;
1913
1914 if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
1915 ((pref == XML_CATA_ALLOW_ALL) ||
1916 (pref == XML_CATA_ALLOW_DOCUMENT))) {
1917 tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource);
1918 }
1919 if ((tmp == NULL) &&
1920 ((pref == XML_CATA_ALLOW_ALL) ||
1921 (pref == XML_CATA_ALLOW_GLOBAL))) {
1922 tmp = xmlCatalogResolveURI(resource);
1923 }
1924
1925 if (tmp != NULL) {
1926 xmlFree(resource);
1927 resource = tmp;
1928 }
1929 }
1930 }
1931
1932 return resource;
1933 }
1934
1935 #endif
1936
1937 /**
1938 * xmlCheckHTTPInput:
1939 * @ctxt: an XML parser context
1940 * @ret: an XML parser input
1941 *
1942 * DEPRECATED: Internal function, don't use.
1943 *
1944 * Check an input in case it was created from an HTTP stream, in that
1945 * case it will handle encoding and update of the base URL in case of
1946 * redirection. It also checks for HTTP errors in which case the input
1947 * is cleanly freed up and an appropriate error is raised in context
1948 *
1949 * Returns the input or NULL in case of HTTP error.
1950 */
1951 xmlParserInputPtr
xmlCheckHTTPInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr ret)1952 xmlCheckHTTPInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr ret) {
1953 /* Avoid unused variable warning if features are disabled. */
1954 (void) ctxt;
1955
1956 #ifdef LIBXML_HTTP_ENABLED
1957 if ((ret != NULL) && (ret->buf != NULL) &&
1958 (ret->buf->readcallback == xmlIOHTTPRead) &&
1959 (ret->buf->context != NULL)) {
1960 const char *encoding;
1961 const char *redir;
1962 const char *mime;
1963 int code;
1964
1965 code = xmlNanoHTTPReturnCode(ret->buf->context);
1966 if (code >= 400) {
1967 /* fatal error */
1968 if (ret->filename != NULL)
1969 xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, ret->filename);
1970 else
1971 xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, "<null>");
1972 xmlFreeInputStream(ret);
1973 ret = NULL;
1974 } else {
1975
1976 mime = xmlNanoHTTPMimeType(ret->buf->context);
1977 if ((xmlStrstr(BAD_CAST mime, BAD_CAST "/xml")) ||
1978 (xmlStrstr(BAD_CAST mime, BAD_CAST "+xml"))) {
1979 encoding = xmlNanoHTTPEncoding(ret->buf->context);
1980 if (encoding != NULL)
1981 xmlSwitchEncodingName(ctxt, encoding);
1982 #if 0
1983 } else if (xmlStrstr(BAD_CAST mime, BAD_CAST "html")) {
1984 #endif
1985 }
1986 redir = xmlNanoHTTPRedir(ret->buf->context);
1987 if (redir != NULL) {
1988 if (ret->filename != NULL)
1989 xmlFree((xmlChar *) ret->filename);
1990 ret->filename =
1991 (char *) xmlStrdup((const xmlChar *) redir);
1992 }
1993 }
1994 }
1995 #endif
1996 return(ret);
1997 }
1998
1999 /**
2000 * xmlNewInputFromFile:
2001 * @ctxt: an XML parser context
2002 * @filename: the filename to use as entity
2003 *
2004 * DEPRECATED: Use xmlNewInputURL.
2005 *
2006 * Create a new input stream based on a file or an URL.
2007 *
2008 * Returns the new input stream or NULL in case of error
2009 */
2010 xmlParserInputPtr
xmlNewInputFromFile(xmlParserCtxtPtr ctxt,const char * filename)2011 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2012 xmlParserInputBufferPtr buf;
2013 xmlParserInputPtr inputStream;
2014 xmlChar *URI = NULL;
2015 int code;
2016
2017 if ((ctxt == NULL) || (filename == NULL))
2018 return(NULL);
2019
2020 code = xmlParserInputBufferCreateFilenameSafe(filename,
2021 XML_CHAR_ENCODING_NONE, &buf);
2022 if (buf == NULL) {
2023 xmlCtxtErrIO(ctxt, code, filename);
2024 return(NULL);
2025 }
2026
2027 inputStream = xmlNewInputStream(ctxt);
2028 if (inputStream == NULL) {
2029 xmlFreeParserInputBuffer(buf);
2030 return(NULL);
2031 }
2032
2033 inputStream->buf = buf;
2034 inputStream = xmlCheckHTTPInput(ctxt, inputStream);
2035 if (inputStream == NULL)
2036 return(NULL);
2037
2038 if (inputStream->filename == NULL)
2039 URI = xmlStrdup((xmlChar *) filename);
2040 else
2041 URI = xmlStrdup((xmlChar *) inputStream->filename);
2042 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
2043 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
2044 if (URI != NULL) xmlFree((char *) URI);
2045
2046 xmlBufResetInput(inputStream->buf->buffer, inputStream);
2047
2048 return(inputStream);
2049 }
2050
2051 /**
2052 * xmlDefaultExternalEntityLoader:
2053 * @URL: the URL for the entity to load
2054 * @ID: the System ID for the entity to load
2055 * @ctxt: the context in which the entity is called or NULL
2056 *
2057 * By default we don't load external entities, yet.
2058 *
2059 * Returns a new allocated xmlParserInputPtr, or NULL.
2060 */
2061 static xmlParserInputPtr
xmlDefaultExternalEntityLoader(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2062 xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
2063 xmlParserCtxtPtr ctxt)
2064 {
2065 xmlParserInputPtr ret = NULL;
2066 xmlChar *resource = NULL;
2067
2068 if (URL == NULL)
2069 return(NULL);
2070
2071 if ((ctxt != NULL) && (ctxt->options & XML_PARSE_NONET)) {
2072 int options = ctxt->options;
2073
2074 ctxt->options -= XML_PARSE_NONET;
2075 ret = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
2076 ctxt->options = options;
2077 return(ret);
2078 }
2079 #ifdef LIBXML_CATALOG_ENABLED
2080 resource = xmlResolveResourceFromCatalog(URL, ID, ctxt);
2081 #endif
2082
2083 if (resource == NULL)
2084 resource = (xmlChar *) URL;
2085
2086 ret = xmlNewInputFromFile(ctxt, (const char *) resource);
2087 if ((resource != NULL) && (resource != (xmlChar *) URL))
2088 xmlFree(resource);
2089 return (ret);
2090 }
2091
2092 /**
2093 * xmlNoNetExternalEntityLoader:
2094 * @URL: the URL for the entity to load
2095 * @ID: the System ID for the entity to load
2096 * @ctxt: the context in which the entity is called or NULL
2097 *
2098 * A specific entity loader disabling network accesses, though still
2099 * allowing local catalog accesses for resolution.
2100 *
2101 * Returns a new allocated xmlParserInputPtr, or NULL.
2102 */
2103 xmlParserInputPtr
xmlNoNetExternalEntityLoader(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2104 xmlNoNetExternalEntityLoader(const char *URL, const char *ID,
2105 xmlParserCtxtPtr ctxt) {
2106 xmlParserInputPtr input = NULL;
2107 xmlChar *resource = NULL;
2108
2109 #ifdef LIBXML_CATALOG_ENABLED
2110 resource = xmlResolveResourceFromCatalog(URL, ID, ctxt);
2111 #endif
2112
2113 if (resource == NULL)
2114 resource = (xmlChar *) URL;
2115
2116 if (resource != NULL) {
2117 if ((!xmlStrncasecmp(BAD_CAST resource, BAD_CAST "ftp://", 6)) ||
2118 (!xmlStrncasecmp(BAD_CAST resource, BAD_CAST "http://", 7))) {
2119 xmlCtxtErrIO(ctxt, XML_IO_NETWORK_ATTEMPT,
2120 (const char *) resource);
2121 /*
2122 * Also forward the error directly to the global error
2123 * handler, which the XML::LibXML test suite expects.
2124 */
2125 __xmlIOErr(XML_FROM_IO, XML_IO_NETWORK_ATTEMPT,
2126 (const char *) resource);
2127 if (resource != (xmlChar *) URL)
2128 xmlFree(resource);
2129 return(NULL);
2130 }
2131 }
2132 input = xmlDefaultExternalEntityLoader((const char *) resource, ID, ctxt);
2133 if (resource != (xmlChar *) URL)
2134 xmlFree(resource);
2135 return(input);
2136 }
2137
2138 /*
2139 * This global has to die eventually
2140 */
2141 static xmlExternalEntityLoader
2142 xmlCurrentExternalEntityLoader = xmlDefaultExternalEntityLoader;
2143
2144 /**
2145 * xmlSetExternalEntityLoader:
2146 * @f: the new entity resolver function
2147 *
2148 * Changes the defaultexternal entity resolver function for the application
2149 */
2150 void
xmlSetExternalEntityLoader(xmlExternalEntityLoader f)2151 xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
2152 xmlCurrentExternalEntityLoader = f;
2153 }
2154
2155 /**
2156 * xmlGetExternalEntityLoader:
2157 *
2158 * Get the default external entity resolver function for the application
2159 *
2160 * Returns the xmlExternalEntityLoader function pointer
2161 */
2162 xmlExternalEntityLoader
xmlGetExternalEntityLoader(void)2163 xmlGetExternalEntityLoader(void) {
2164 return(xmlCurrentExternalEntityLoader);
2165 }
2166
2167 /**
2168 * xmlLoadExternalEntity:
2169 * @URL: the URL for the entity to load
2170 * @ID: the Public ID for the entity to load
2171 * @ctxt: the context in which the entity is called or NULL
2172 *
2173 * DEPRECATED: Use xmlNewInputURL.
2174 *
2175 * Returns the xmlParserInputPtr or NULL
2176 */
2177 xmlParserInputPtr
xmlLoadExternalEntity(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2178 xmlLoadExternalEntity(const char *URL, const char *ID,
2179 xmlParserCtxtPtr ctxt) {
2180 char *canonicFilename;
2181 xmlParserInputPtr ret;
2182
2183 if (URL == NULL)
2184 return(NULL);
2185
2186 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) URL);
2187 if (canonicFilename == NULL) {
2188 xmlCtxtErrMemory(ctxt);
2189 return(NULL);
2190 }
2191
2192 ret = xmlCurrentExternalEntityLoader(canonicFilename, ID, ctxt);
2193 xmlFree(canonicFilename);
2194 return(ret);
2195 }
2196
2197 /************************************************************************
2198 * *
2199 * Commodity functions to handle parser contexts *
2200 * *
2201 ************************************************************************/
2202
2203 /**
2204 * xmlInitSAXParserCtxt:
2205 * @ctxt: XML parser context
2206 * @sax: SAX handlert
2207 * @userData: user data
2208 *
2209 * Initialize a SAX parser context
2210 *
2211 * Returns 0 in case of success and -1 in case of error
2212 */
2213
2214 static int
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt,const xmlSAXHandler * sax,void * userData)2215 xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
2216 void *userData)
2217 {
2218 xmlParserInputPtr input;
2219
2220 if (ctxt == NULL)
2221 return(-1);
2222
2223 if (ctxt->dict == NULL)
2224 ctxt->dict = xmlDictCreate();
2225 if (ctxt->dict == NULL)
2226 return(-1);
2227 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
2228
2229 if (ctxt->sax == NULL)
2230 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2231 if (ctxt->sax == NULL)
2232 return(-1);
2233 if (sax == NULL) {
2234 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2235 xmlSAXVersion(ctxt->sax, 2);
2236 ctxt->userData = ctxt;
2237 } else {
2238 if (sax->initialized == XML_SAX2_MAGIC) {
2239 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
2240 } else {
2241 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2242 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
2243 }
2244 ctxt->userData = userData ? userData : ctxt;
2245 }
2246
2247 ctxt->maxatts = 0;
2248 ctxt->atts = NULL;
2249 /* Allocate the Input stack */
2250 if (ctxt->inputTab == NULL) {
2251 ctxt->inputTab = (xmlParserInputPtr *)
2252 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2253 ctxt->inputMax = 5;
2254 }
2255 if (ctxt->inputTab == NULL)
2256 return(-1);
2257 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2258 xmlFreeInputStream(input);
2259 }
2260 ctxt->inputNr = 0;
2261 ctxt->input = NULL;
2262
2263 ctxt->version = NULL;
2264 ctxt->encoding = NULL;
2265 ctxt->standalone = -1;
2266 ctxt->hasExternalSubset = 0;
2267 ctxt->hasPErefs = 0;
2268 ctxt->html = 0;
2269 ctxt->instate = XML_PARSER_START;
2270
2271 /* Allocate the Node stack */
2272 if (ctxt->nodeTab == NULL) {
2273 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2274 ctxt->nodeMax = 10;
2275 }
2276 if (ctxt->nodeTab == NULL)
2277 return(-1);
2278 ctxt->nodeNr = 0;
2279 ctxt->node = NULL;
2280
2281 /* Allocate the Name stack */
2282 if (ctxt->nameTab == NULL) {
2283 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2284 ctxt->nameMax = 10;
2285 }
2286 if (ctxt->nameTab == NULL)
2287 return(-1);
2288 ctxt->nameNr = 0;
2289 ctxt->name = NULL;
2290
2291 /* Allocate the space stack */
2292 if (ctxt->spaceTab == NULL) {
2293 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2294 ctxt->spaceMax = 10;
2295 }
2296 if (ctxt->spaceTab == NULL)
2297 return(-1);
2298 ctxt->spaceNr = 1;
2299 ctxt->spaceMax = 10;
2300 ctxt->spaceTab[0] = -1;
2301 ctxt->space = &ctxt->spaceTab[0];
2302 ctxt->myDoc = NULL;
2303 ctxt->wellFormed = 1;
2304 ctxt->nsWellFormed = 1;
2305 ctxt->valid = 1;
2306
2307 ctxt->options = XML_PARSE_NODICT;
2308
2309 /*
2310 * Initialize some parser options from deprecated global variables.
2311 * Note that the "modern" API taking options arguments or
2312 * xmlCtxtSetOptions will ignore these defaults. They're only
2313 * relevant if old API functions like xmlParseFile are used.
2314 */
2315 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2316 if (ctxt->loadsubset) {
2317 ctxt->options |= XML_PARSE_DTDLOAD;
2318 }
2319 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2320 if (ctxt->validate) {
2321 ctxt->options |= XML_PARSE_DTDVALID;
2322 }
2323 ctxt->pedantic = xmlPedanticParserDefaultValue;
2324 if (ctxt->pedantic) {
2325 ctxt->options |= XML_PARSE_PEDANTIC;
2326 }
2327 ctxt->linenumbers = xmlLineNumbersDefaultValue;
2328 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2329 if (ctxt->keepBlanks == 0) {
2330 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
2331 ctxt->options |= XML_PARSE_NOBLANKS;
2332 }
2333 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2334 if (ctxt->replaceEntities) {
2335 ctxt->options |= XML_PARSE_NOENT;
2336 }
2337 if (xmlGetWarningsDefaultValue == 0)
2338 ctxt->options |= XML_PARSE_NOWARNING;
2339
2340 ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
2341 ctxt->vctxt.userData = ctxt;
2342 ctxt->vctxt.error = xmlParserValidityError;
2343 ctxt->vctxt.warning = xmlParserValidityWarning;
2344
2345 ctxt->record_info = 0;
2346 ctxt->checkIndex = 0;
2347 ctxt->inSubset = 0;
2348 ctxt->errNo = XML_ERR_OK;
2349 ctxt->depth = 0;
2350 ctxt->catalogs = NULL;
2351 ctxt->sizeentities = 0;
2352 ctxt->sizeentcopy = 0;
2353 ctxt->input_id = 1;
2354 ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
2355 xmlInitNodeInfoSeq(&ctxt->node_seq);
2356
2357 if (ctxt->nsdb == NULL) {
2358 ctxt->nsdb = xmlParserNsCreate();
2359 if (ctxt->nsdb == NULL) {
2360 xmlCtxtErrMemory(ctxt);
2361 return(-1);
2362 }
2363 }
2364
2365 return(0);
2366 }
2367
2368 /**
2369 * xmlInitParserCtxt:
2370 * @ctxt: an XML parser context
2371 *
2372 * DEPRECATED: Internal function which will be made private in a future
2373 * version.
2374 *
2375 * Initialize a parser context
2376 *
2377 * Returns 0 in case of success and -1 in case of error
2378 */
2379
2380 int
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)2381 xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2382 {
2383 return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
2384 }
2385
2386 /**
2387 * xmlFreeParserCtxt:
2388 * @ctxt: an XML parser context
2389 *
2390 * Free all the memory used by a parser context. However the parsed
2391 * document in ctxt->myDoc is not freed.
2392 */
2393
2394 void
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)2395 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2396 {
2397 xmlParserInputPtr input;
2398
2399 if (ctxt == NULL) return;
2400
2401 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2402 xmlFreeInputStream(input);
2403 }
2404 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2405 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2406 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2407 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2408 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2409 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2410 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2411 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2412 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2413 #ifdef LIBXML_SAX1_ENABLED
2414 if ((ctxt->sax != NULL) &&
2415 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2416 #else
2417 if (ctxt->sax != NULL)
2418 #endif /* LIBXML_SAX1_ENABLED */
2419 xmlFree(ctxt->sax);
2420 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2421 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2422 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2423 if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
2424 if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
2425 if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
2426 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2427 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2428 if (ctxt->attsDefault != NULL)
2429 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2430 if (ctxt->attsSpecial != NULL)
2431 xmlHashFree(ctxt->attsSpecial, NULL);
2432 if (ctxt->freeElems != NULL) {
2433 xmlNodePtr cur, next;
2434
2435 cur = ctxt->freeElems;
2436 while (cur != NULL) {
2437 next = cur->next;
2438 xmlFree(cur);
2439 cur = next;
2440 }
2441 }
2442 if (ctxt->freeAttrs != NULL) {
2443 xmlAttrPtr cur, next;
2444
2445 cur = ctxt->freeAttrs;
2446 while (cur != NULL) {
2447 next = cur->next;
2448 xmlFree(cur);
2449 cur = next;
2450 }
2451 }
2452 /*
2453 * cleanup the error strings
2454 */
2455 if (ctxt->lastError.message != NULL)
2456 xmlFree(ctxt->lastError.message);
2457 if (ctxt->lastError.file != NULL)
2458 xmlFree(ctxt->lastError.file);
2459 if (ctxt->lastError.str1 != NULL)
2460 xmlFree(ctxt->lastError.str1);
2461 if (ctxt->lastError.str2 != NULL)
2462 xmlFree(ctxt->lastError.str2);
2463 if (ctxt->lastError.str3 != NULL)
2464 xmlFree(ctxt->lastError.str3);
2465
2466 #ifdef LIBXML_CATALOG_ENABLED
2467 if (ctxt->catalogs != NULL)
2468 xmlCatalogFreeLocal(ctxt->catalogs);
2469 #endif
2470 xmlFree(ctxt);
2471 }
2472
2473 /**
2474 * xmlNewParserCtxt:
2475 *
2476 * Allocate and initialize a new parser context.
2477 *
2478 * Returns the xmlParserCtxtPtr or NULL
2479 */
2480
2481 xmlParserCtxtPtr
xmlNewParserCtxt(void)2482 xmlNewParserCtxt(void)
2483 {
2484 return(xmlNewSAXParserCtxt(NULL, NULL));
2485 }
2486
2487 /**
2488 * xmlNewSAXParserCtxt:
2489 * @sax: SAX handler
2490 * @userData: user data
2491 *
2492 * Allocate and initialize a new SAX parser context. If userData is NULL,
2493 * the parser context will be passed as user data.
2494 *
2495 * Available since 2.11.0. If you want support older versions,
2496 * it's best to invoke xmlNewParserCtxt and set ctxt->sax with
2497 * struct assignment.
2498 *
2499 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
2500 */
2501
2502 xmlParserCtxtPtr
xmlNewSAXParserCtxt(const xmlSAXHandler * sax,void * userData)2503 xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
2504 {
2505 xmlParserCtxtPtr ctxt;
2506
2507 xmlInitParser();
2508
2509 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2510 if (ctxt == NULL)
2511 return(NULL);
2512 memset(ctxt, 0, sizeof(xmlParserCtxt));
2513 if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
2514 xmlFreeParserCtxt(ctxt);
2515 return(NULL);
2516 }
2517 return(ctxt);
2518 }
2519
2520 /************************************************************************
2521 * *
2522 * Handling of node information *
2523 * *
2524 ************************************************************************/
2525
2526 /**
2527 * xmlClearParserCtxt:
2528 * @ctxt: an XML parser context
2529 *
2530 * Clear (release owned resources) and reinitialize a parser context
2531 */
2532
2533 void
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)2534 xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2535 {
2536 if (ctxt==NULL)
2537 return;
2538 xmlClearNodeInfoSeq(&ctxt->node_seq);
2539 xmlCtxtReset(ctxt);
2540 }
2541
2542
2543 /**
2544 * xmlParserFindNodeInfo:
2545 * @ctx: an XML parser context
2546 * @node: an XML node within the tree
2547 *
2548 * DEPRECATED: Don't use.
2549 *
2550 * Find the parser node info struct for a given node
2551 *
2552 * Returns an xmlParserNodeInfo block pointer or NULL
2553 */
2554 const xmlParserNodeInfo *
xmlParserFindNodeInfo(xmlParserCtxtPtr ctx,xmlNodePtr node)2555 xmlParserFindNodeInfo(xmlParserCtxtPtr ctx, xmlNodePtr node)
2556 {
2557 unsigned long pos;
2558
2559 if ((ctx == NULL) || (node == NULL))
2560 return (NULL);
2561 /* Find position where node should be at */
2562 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2563 if (pos < ctx->node_seq.length
2564 && ctx->node_seq.buffer[pos].node == node)
2565 return &ctx->node_seq.buffer[pos];
2566 else
2567 return NULL;
2568 }
2569
2570
2571 /**
2572 * xmlInitNodeInfoSeq:
2573 * @seq: a node info sequence pointer
2574 *
2575 * DEPRECATED: Don't use.
2576 *
2577 * -- Initialize (set to initial state) node info sequence
2578 */
2579 void
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)2580 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2581 {
2582 if (seq == NULL)
2583 return;
2584 seq->length = 0;
2585 seq->maximum = 0;
2586 seq->buffer = NULL;
2587 }
2588
2589 /**
2590 * xmlClearNodeInfoSeq:
2591 * @seq: a node info sequence pointer
2592 *
2593 * DEPRECATED: Don't use.
2594 *
2595 * -- Clear (release memory and reinitialize) node
2596 * info sequence
2597 */
2598 void
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)2599 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2600 {
2601 if (seq == NULL)
2602 return;
2603 if (seq->buffer != NULL)
2604 xmlFree(seq->buffer);
2605 xmlInitNodeInfoSeq(seq);
2606 }
2607
2608 /**
2609 * xmlParserFindNodeInfoIndex:
2610 * @seq: a node info sequence pointer
2611 * @node: an XML node pointer
2612 *
2613 * DEPRECATED: Don't use.
2614 *
2615 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2616 * the given node is or should be at in a sorted sequence
2617 *
2618 * Returns a long indicating the position of the record
2619 */
2620 unsigned long
xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,xmlNodePtr node)2621 xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,
2622 xmlNodePtr node)
2623 {
2624 unsigned long upper, lower, middle;
2625 int found = 0;
2626
2627 if ((seq == NULL) || (node == NULL))
2628 return ((unsigned long) -1);
2629
2630 /* Do a binary search for the key */
2631 lower = 1;
2632 upper = seq->length;
2633 middle = 0;
2634 while (lower <= upper && !found) {
2635 middle = lower + (upper - lower) / 2;
2636 if (node == seq->buffer[middle - 1].node)
2637 found = 1;
2638 else if (node < seq->buffer[middle - 1].node)
2639 upper = middle - 1;
2640 else
2641 lower = middle + 1;
2642 }
2643
2644 /* Return position */
2645 if (middle == 0 || seq->buffer[middle - 1].node < node)
2646 return middle;
2647 else
2648 return middle - 1;
2649 }
2650
2651
2652 /**
2653 * xmlParserAddNodeInfo:
2654 * @ctxt: an XML parser context
2655 * @info: a node info sequence pointer
2656 *
2657 * DEPRECATED: Don't use.
2658 *
2659 * Insert node info record into the sorted sequence
2660 */
2661 void
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,xmlParserNodeInfoPtr info)2662 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2663 xmlParserNodeInfoPtr info)
2664 {
2665 unsigned long pos;
2666
2667 if ((ctxt == NULL) || (info == NULL)) return;
2668
2669 /* Find pos and check to see if node is already in the sequence */
2670 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2671 info->node);
2672
2673 if ((pos < ctxt->node_seq.length) &&
2674 (ctxt->node_seq.buffer != NULL) &&
2675 (ctxt->node_seq.buffer[pos].node == info->node)) {
2676 ctxt->node_seq.buffer[pos] = *info;
2677 }
2678
2679 /* Otherwise, we need to add new node to buffer */
2680 else {
2681 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2682 (ctxt->node_seq.buffer == NULL)) {
2683 xmlParserNodeInfo *tmp_buffer;
2684 unsigned int byte_size;
2685
2686 if (ctxt->node_seq.maximum == 0)
2687 ctxt->node_seq.maximum = 2;
2688 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2689 (2 * ctxt->node_seq.maximum));
2690
2691 if (ctxt->node_seq.buffer == NULL)
2692 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2693 else
2694 tmp_buffer =
2695 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2696 byte_size);
2697
2698 if (tmp_buffer == NULL) {
2699 xmlCtxtErrMemory(ctxt);
2700 return;
2701 }
2702 ctxt->node_seq.buffer = tmp_buffer;
2703 ctxt->node_seq.maximum *= 2;
2704 }
2705
2706 /* If position is not at end, move elements out of the way */
2707 if (pos != ctxt->node_seq.length) {
2708 unsigned long i;
2709
2710 for (i = ctxt->node_seq.length; i > pos; i--)
2711 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2712 }
2713
2714 /* Copy element and increase length */
2715 ctxt->node_seq.buffer[pos] = *info;
2716 ctxt->node_seq.length++;
2717 }
2718 }
2719
2720 /************************************************************************
2721 * *
2722 * Defaults settings *
2723 * *
2724 ************************************************************************/
2725 /**
2726 * xmlPedanticParserDefault:
2727 * @val: int 0 or 1
2728 *
2729 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
2730 *
2731 * Set and return the previous value for enabling pedantic warnings.
2732 *
2733 * Returns the last value for 0 for no substitution, 1 for substitution.
2734 */
2735
2736 int
xmlPedanticParserDefault(int val)2737 xmlPedanticParserDefault(int val) {
2738 int old = xmlPedanticParserDefaultValue;
2739
2740 xmlPedanticParserDefaultValue = val;
2741 return(old);
2742 }
2743
2744 /**
2745 * xmlLineNumbersDefault:
2746 * @val: int 0 or 1
2747 *
2748 * DEPRECATED: The modern options API always enables line numbers.
2749 *
2750 * Set and return the previous value for enabling line numbers in elements
2751 * contents. This may break on old application and is turned off by default.
2752 *
2753 * Returns the last value for 0 for no substitution, 1 for substitution.
2754 */
2755
2756 int
xmlLineNumbersDefault(int val)2757 xmlLineNumbersDefault(int val) {
2758 int old = xmlLineNumbersDefaultValue;
2759
2760 xmlLineNumbersDefaultValue = val;
2761 return(old);
2762 }
2763
2764 /**
2765 * xmlSubstituteEntitiesDefault:
2766 * @val: int 0 or 1
2767 *
2768 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
2769 *
2770 * Set and return the previous value for default entity support.
2771 * Initially the parser always keep entity references instead of substituting
2772 * entity values in the output. This function has to be used to change the
2773 * default parser behavior
2774 * SAX::substituteEntities() has to be used for changing that on a file by
2775 * file basis.
2776 *
2777 * Returns the last value for 0 for no substitution, 1 for substitution.
2778 */
2779
2780 int
xmlSubstituteEntitiesDefault(int val)2781 xmlSubstituteEntitiesDefault(int val) {
2782 int old = xmlSubstituteEntitiesDefaultValue;
2783
2784 xmlSubstituteEntitiesDefaultValue = val;
2785 return(old);
2786 }
2787
2788 /**
2789 * xmlKeepBlanksDefault:
2790 * @val: int 0 or 1
2791 *
2792 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
2793 *
2794 * Set and return the previous value for default blanks text nodes support.
2795 * The 1.x version of the parser used an heuristic to try to detect
2796 * ignorable white spaces. As a result the SAX callback was generating
2797 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2798 * using the DOM output text nodes containing those blanks were not generated.
2799 * The 2.x and later version will switch to the XML standard way and
2800 * ignorableWhitespace() are only generated when running the parser in
2801 * validating mode and when the current element doesn't allow CDATA or
2802 * mixed content.
2803 * This function is provided as a way to force the standard behavior
2804 * on 1.X libs and to switch back to the old mode for compatibility when
2805 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2806 * by using xmlIsBlankNode() commodity function to detect the "empty"
2807 * nodes generated.
2808 * This value also affect autogeneration of indentation when saving code
2809 * if blanks sections are kept, indentation is not generated.
2810 *
2811 * Returns the last value for 0 for no substitution, 1 for substitution.
2812 */
2813
2814 int
xmlKeepBlanksDefault(int val)2815 xmlKeepBlanksDefault(int val) {
2816 int old = xmlKeepBlanksDefaultValue;
2817
2818 xmlKeepBlanksDefaultValue = val;
2819 #ifdef LIBXML_OUTPUT_ENABLED
2820 if (!val)
2821 xmlIndentTreeOutput = 1;
2822 #endif
2823 return(old);
2824 }
2825
2826