xref: /aosp_15_r20/external/libxml2/xpointer.c (revision 7c5688314b92172186c154356a6374bf7684c3ca)
1 /*
2  * xpointer.c : Code to handle XML Pointer
3  *
4  * Base implementation was made accordingly to
5  * W3C Candidate Recommendation 7 June 2000
6  * http://www.w3.org/TR/2000/CR-xptr-20000607
7  *
8  * Added support for the element() scheme described in:
9  * W3C Proposed Recommendation 13 November 2002
10  * http://www.w3.org/TR/2002/PR-xptr-element-20021113/
11  *
12  * See Copyright for the status of this software.
13  *
14  * [email protected]
15  */
16 
17 /* To avoid EBCDIC trouble when parsing on zOS */
18 #if defined(__MVS__)
19 #pragma convert("ISO8859-1")
20 #endif
21 
22 #define IN_LIBXML
23 #include "libxml.h"
24 
25 /*
26  * TODO: better handling of error cases, the full expression should
27  *       be parsed beforehand instead of a progressive evaluation
28  * TODO: Access into entities references are not supported now ...
29  *       need a start to be able to pop out of entities refs since
30  *       parent is the entity declaration, not the ref.
31  */
32 
33 #include <string.h>
34 #include <libxml/xpointer.h>
35 #include <libxml/xmlmemory.h>
36 #include <libxml/parserInternals.h>
37 #include <libxml/uri.h>
38 #include <libxml/xpath.h>
39 #include <libxml/xpathInternals.h>
40 #include <libxml/xmlerror.h>
41 
42 #ifdef LIBXML_XPTR_ENABLED
43 
44 /* Add support of the xmlns() xpointer scheme to initialize the namespaces */
45 #define XPTR_XMLNS_SCHEME
46 
47 #include "private/error.h"
48 #include "private/xpath.h"
49 
50 /************************************************************************
51  *									*
52  *		Some factorized error routines				*
53  *									*
54  ************************************************************************/
55 
56 /**
57  * xmlXPtrErr:
58  * @ctxt:  an XPTR evaluation context
59  * @extra:  extra information
60  *
61  * Handle an XPointer error
62  */
63 static void LIBXML_ATTR_FORMAT(3,0)
xmlXPtrErr(xmlXPathParserContextPtr ctxt,int code,const char * msg,const xmlChar * extra)64 xmlXPtrErr(xmlXPathParserContextPtr ctxt, int code,
65            const char * msg, const xmlChar *extra)
66 {
67     xmlStructuredErrorFunc serror = NULL;
68     void *data = NULL;
69     xmlNodePtr node = NULL;
70     int res;
71 
72     if (ctxt == NULL)
73         return;
74     /* Only report the first error */
75     if (ctxt->error != 0)
76         return;
77 
78     ctxt->error = code;
79 
80     if (ctxt->context != NULL) {
81         xmlErrorPtr err = &ctxt->context->lastError;
82 
83         /* cleanup current last error */
84         xmlResetError(err);
85 
86         err->domain = XML_FROM_XPOINTER;
87         err->code = code;
88         err->level = XML_ERR_ERROR;
89         err->str1 = (char *) xmlStrdup(ctxt->base);
90         if (err->str1 == NULL) {
91             xmlXPathPErrMemory(ctxt);
92             return;
93         }
94         err->int1 = ctxt->cur - ctxt->base;
95         err->node = ctxt->context->debugNode;
96 
97         serror = ctxt->context->error;
98         data = ctxt->context->userData;
99         node = ctxt->context->debugNode;
100     }
101 
102     res = xmlRaiseError(serror, NULL, data, NULL, node,
103                         XML_FROM_XPOINTER, code, XML_ERR_ERROR, NULL, 0,
104                         (const char *) extra, (const char *) ctxt->base,
105                         NULL, ctxt->cur - ctxt->base, 0,
106                         msg, extra);
107     if (res < 0)
108         xmlXPathPErrMemory(ctxt);
109 }
110 
111 /************************************************************************
112  *									*
113  *		A few helper functions for child sequences		*
114  *									*
115  ************************************************************************/
116 
117 /**
118  * xmlXPtrGetNthChild:
119  * @cur:  the node
120  * @no:  the child number
121  *
122  * Returns the @no'th element child of @cur or NULL
123  */
124 static xmlNodePtr
xmlXPtrGetNthChild(xmlNodePtr cur,int no)125 xmlXPtrGetNthChild(xmlNodePtr cur, int no) {
126     int i;
127     if ((cur == NULL) || (cur->type == XML_NAMESPACE_DECL))
128 	return(cur);
129     cur = cur->children;
130     for (i = 0;i <= no;cur = cur->next) {
131 	if (cur == NULL)
132 	    return(cur);
133 	if ((cur->type == XML_ELEMENT_NODE) ||
134 	    (cur->type == XML_DOCUMENT_NODE) ||
135 	    (cur->type == XML_HTML_DOCUMENT_NODE)) {
136 	    i++;
137 	    if (i == no)
138 		break;
139 	}
140     }
141     return(cur);
142 }
143 
144 /************************************************************************
145  *									*
146  *			The parser					*
147  *									*
148  ************************************************************************/
149 
150 static void xmlXPtrEvalChildSeq(xmlXPathParserContextPtr ctxt, xmlChar *name);
151 
152 /*
153  * Macros for accessing the content. Those should be used only by the parser,
154  * and not exported.
155  *
156  * Dirty macros, i.e. one need to make assumption on the context to use them
157  *
158  *   CUR     returns the current xmlChar value, i.e. a 8 bit value
159  *           in ISO-Latin or UTF-8.
160  *           This should be used internally by the parser
161  *           only to compare to ASCII values otherwise it would break when
162  *           running with UTF-8 encoding.
163  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
164  *           to compare on ASCII based substring.
165  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
166  *           strings within the parser.
167  *   CURRENT Returns the current char value, with the full decoding of
168  *           UTF-8 if we are using this mode. It returns an int.
169  *   NEXT    Skip to the next character, this does the proper decoding
170  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
171  *           It returns the pointer to the current xmlChar.
172  */
173 
174 #define CUR (*ctxt->cur)
175 #define SKIP(val) ctxt->cur += (val)
176 #define NXT(val) ctxt->cur[(val)]
177 
178 #define SKIP_BLANKS							\
179     while (IS_BLANK_CH(*(ctxt->cur))) NEXT
180 
181 #define CURRENT (*ctxt->cur)
182 #define NEXT ((*ctxt->cur) ?  ctxt->cur++: ctxt->cur)
183 
184 /*
185  * xmlXPtrGetChildNo:
186  * @ctxt:  the XPointer Parser context
187  * @index:  the child number
188  *
189  * Move the current node of the nodeset on the stack to the
190  * given child if found
191  */
192 static void
xmlXPtrGetChildNo(xmlXPathParserContextPtr ctxt,int indx)193 xmlXPtrGetChildNo(xmlXPathParserContextPtr ctxt, int indx) {
194     xmlNodePtr cur = NULL;
195     xmlXPathObjectPtr obj;
196     xmlNodeSetPtr oldset;
197 
198     CHECK_TYPE(XPATH_NODESET);
199     obj = valuePop(ctxt);
200     oldset = obj->nodesetval;
201     if ((indx <= 0) || (oldset == NULL) || (oldset->nodeNr != 1)) {
202 	xmlXPathFreeObject(obj);
203 	valuePush(ctxt, xmlXPathNewNodeSet(NULL));
204 	return;
205     }
206     cur = xmlXPtrGetNthChild(oldset->nodeTab[0], indx);
207     if (cur == NULL) {
208 	xmlXPathFreeObject(obj);
209 	valuePush(ctxt, xmlXPathNewNodeSet(NULL));
210 	return;
211     }
212     oldset->nodeTab[0] = cur;
213     valuePush(ctxt, obj);
214 }
215 
216 /**
217  * xmlXPtrEvalXPtrPart:
218  * @ctxt:  the XPointer Parser context
219  * @name:  the preparsed Scheme for the XPtrPart
220  *
221  * XPtrPart ::= 'xpointer' '(' XPtrExpr ')'
222  *            | Scheme '(' SchemeSpecificExpr ')'
223  *
224  * Scheme   ::=  NCName - 'xpointer' [VC: Non-XPointer schemes]
225  *
226  * SchemeSpecificExpr ::= StringWithBalancedParens
227  *
228  * StringWithBalancedParens ::=
229  *              [^()]* ('(' StringWithBalancedParens ')' [^()]*)*
230  *              [VC: Parenthesis escaping]
231  *
232  * XPtrExpr ::= Expr [VC: Parenthesis escaping]
233  *
234  * VC: Parenthesis escaping:
235  *   The end of an XPointer part is signaled by the right parenthesis ")"
236  *   character that is balanced with the left parenthesis "(" character
237  *   that began the part. Any unbalanced parenthesis character inside the
238  *   expression, even within literals, must be escaped with a circumflex (^)
239  *   character preceding it. If the expression contains any literal
240  *   occurrences of the circumflex, each must be escaped with an additional
241  *   circumflex (that is, ^^). If the unescaped parentheses in the expression
242  *   are not balanced, a syntax error results.
243  *
244  * Parse and evaluate an XPtrPart. Basically it generates the unescaped
245  * string and if the scheme is 'xpointer' it will call the XPath interpreter.
246  *
247  * TODO: there is no new scheme registration mechanism
248  */
249 
250 static void
xmlXPtrEvalXPtrPart(xmlXPathParserContextPtr ctxt,xmlChar * name)251 xmlXPtrEvalXPtrPart(xmlXPathParserContextPtr ctxt, xmlChar *name) {
252     xmlChar *buffer, *cur;
253     int len;
254     int level;
255 
256     if (name == NULL)
257     name = xmlXPathParseName(ctxt);
258     if (name == NULL)
259 	XP_ERROR(XPATH_EXPR_ERROR);
260 
261     if (CUR != '(') {
262         xmlFree(name);
263 	XP_ERROR(XPATH_EXPR_ERROR);
264     }
265     NEXT;
266     level = 1;
267 
268     len = xmlStrlen(ctxt->cur);
269     len++;
270     buffer = xmlMalloc(len);
271     if (buffer == NULL) {
272         xmlXPathPErrMemory(ctxt);
273         xmlFree(name);
274 	return;
275     }
276 
277     cur = buffer;
278     while (CUR != 0) {
279 	if (CUR == ')') {
280 	    level--;
281 	    if (level == 0) {
282 		NEXT;
283 		break;
284 	    }
285 	} else if (CUR == '(') {
286 	    level++;
287 	} else if (CUR == '^') {
288             if ((NXT(1) == ')') || (NXT(1) == '(') || (NXT(1) == '^')) {
289                 NEXT;
290             }
291 	}
292         *cur++ = CUR;
293 	NEXT;
294     }
295     *cur = 0;
296 
297     if ((level != 0) && (CUR == 0)) {
298         xmlFree(name);
299 	xmlFree(buffer);
300 	XP_ERROR(XPTR_SYNTAX_ERROR);
301     }
302 
303     if (xmlStrEqual(name, (xmlChar *) "xpointer") ||
304         xmlStrEqual(name, (xmlChar *) "xpath1")) {
305 	const xmlChar *oldBase = ctxt->base;
306 	const xmlChar *oldCur = ctxt->cur;
307 
308 	ctxt->cur = ctxt->base = buffer;
309 	/*
310 	 * To evaluate an xpointer scheme element (4.3) we need:
311 	 *   context initialized to the root
312 	 *   context position initialized to 1
313 	 *   context size initialized to 1
314 	 */
315 	ctxt->context->node = (xmlNodePtr)ctxt->context->doc;
316 	ctxt->context->proximityPosition = 1;
317 	ctxt->context->contextSize = 1;
318 	xmlXPathEvalExpr(ctxt);
319 	ctxt->base = oldBase;
320         ctxt->cur = oldCur;
321     } else if (xmlStrEqual(name, (xmlChar *) "element")) {
322 	const xmlChar *oldBase = ctxt->base;
323 	const xmlChar *oldCur = ctxt->cur;
324 	xmlChar *name2;
325 
326 	ctxt->cur = ctxt->base = buffer;
327 	if (buffer[0] == '/') {
328 	    xmlXPathRoot(ctxt);
329 	    xmlXPtrEvalChildSeq(ctxt, NULL);
330 	} else {
331 	    name2 = xmlXPathParseName(ctxt);
332 	    if (name2 == NULL) {
333                 ctxt->base = oldBase;
334                 ctxt->cur = oldCur;
335 		xmlFree(buffer);
336                 xmlFree(name);
337 		XP_ERROR(XPATH_EXPR_ERROR);
338 	    }
339 	    xmlXPtrEvalChildSeq(ctxt, name2);
340 	}
341 	ctxt->base = oldBase;
342         ctxt->cur = oldCur;
343 #ifdef XPTR_XMLNS_SCHEME
344     } else if (xmlStrEqual(name, (xmlChar *) "xmlns")) {
345 	const xmlChar *oldBase = ctxt->base;
346 	const xmlChar *oldCur = ctxt->cur;
347 	xmlChar *prefix;
348 
349 	ctxt->cur = ctxt->base = buffer;
350         prefix = xmlXPathParseNCName(ctxt);
351 	if (prefix == NULL) {
352             ctxt->base = oldBase;
353             ctxt->cur = oldCur;
354 	    xmlFree(buffer);
355 	    xmlFree(name);
356 	    XP_ERROR(XPTR_SYNTAX_ERROR);
357 	}
358 	SKIP_BLANKS;
359 	if (CUR != '=') {
360             ctxt->base = oldBase;
361             ctxt->cur = oldCur;
362 	    xmlFree(prefix);
363 	    xmlFree(buffer);
364 	    xmlFree(name);
365 	    XP_ERROR(XPTR_SYNTAX_ERROR);
366 	}
367 	NEXT;
368 	SKIP_BLANKS;
369 
370 	if (xmlXPathRegisterNs(ctxt->context, prefix, ctxt->cur) < 0)
371             xmlXPathPErrMemory(ctxt);
372         ctxt->base = oldBase;
373         ctxt->cur = oldCur;
374 	xmlFree(prefix);
375 #endif /* XPTR_XMLNS_SCHEME */
376     } else {
377         xmlXPtrErr(ctxt, XML_XPTR_UNKNOWN_SCHEME,
378 		   "unsupported scheme '%s'\n", name);
379     }
380     xmlFree(buffer);
381     xmlFree(name);
382 }
383 
384 /**
385  * xmlXPtrEvalFullXPtr:
386  * @ctxt:  the XPointer Parser context
387  * @name:  the preparsed Scheme for the first XPtrPart
388  *
389  * FullXPtr ::= XPtrPart (S? XPtrPart)*
390  *
391  * As the specs says:
392  * -----------
393  * When multiple XPtrParts are provided, they must be evaluated in
394  * left-to-right order. If evaluation of one part fails, the nexti
395  * is evaluated. The following conditions cause XPointer part failure:
396  *
397  * - An unknown scheme
398  * - A scheme that does not locate any sub-resource present in the resource
399  * - A scheme that is not applicable to the media type of the resource
400  *
401  * The XPointer application must consume a failed XPointer part and
402  * attempt to evaluate the next one, if any. The result of the first
403  * XPointer part whose evaluation succeeds is taken to be the fragment
404  * located by the XPointer as a whole. If all the parts fail, the result
405  * for the XPointer as a whole is a sub-resource error.
406  * -----------
407  *
408  * Parse and evaluate a Full XPtr i.e. possibly a cascade of XPath based
409  * expressions or other schemes.
410  */
411 static void
xmlXPtrEvalFullXPtr(xmlXPathParserContextPtr ctxt,xmlChar * name)412 xmlXPtrEvalFullXPtr(xmlXPathParserContextPtr ctxt, xmlChar *name) {
413     if (name == NULL)
414     name = xmlXPathParseName(ctxt);
415     if (name == NULL)
416 	XP_ERROR(XPATH_EXPR_ERROR);
417     while (name != NULL) {
418 	ctxt->error = XPATH_EXPRESSION_OK;
419 	xmlXPtrEvalXPtrPart(ctxt, name);
420 
421 	/* in case of syntax error, break here */
422 	if ((ctxt->error != XPATH_EXPRESSION_OK) &&
423             (ctxt->error != XML_XPTR_UNKNOWN_SCHEME))
424 	    return;
425 
426 	/*
427 	 * If the returned value is a non-empty nodeset
428 	 * or location set, return here.
429 	 */
430 	if (ctxt->value != NULL) {
431 	    xmlXPathObjectPtr obj = ctxt->value;
432 
433 	    switch (obj->type) {
434 		case XPATH_NODESET: {
435 		    xmlNodeSetPtr loc = ctxt->value->nodesetval;
436 		    if ((loc != NULL) && (loc->nodeNr > 0))
437 			return;
438 		    break;
439 		}
440 		default:
441 		    break;
442 	    }
443 
444 	    /*
445 	     * Evaluating to improper values is equivalent to
446 	     * a sub-resource error, clean-up the stack
447 	     */
448 	    do {
449 		obj = valuePop(ctxt);
450 		if (obj != NULL) {
451 		    xmlXPathFreeObject(obj);
452 		}
453 	    } while (obj != NULL);
454 	}
455 
456 	/*
457 	 * Is there another XPointer part.
458 	 */
459 	SKIP_BLANKS;
460 	name = xmlXPathParseName(ctxt);
461     }
462 }
463 
464 /**
465  * xmlXPtrEvalChildSeq:
466  * @ctxt:  the XPointer Parser context
467  * @name:  a possible ID name of the child sequence
468  *
469  *  ChildSeq ::= '/1' ('/' [0-9]*)*
470  *             | Name ('/' [0-9]*)+
471  *
472  * Parse and evaluate a Child Sequence. This routine also handle the
473  * case of a Bare Name used to get a document ID.
474  */
475 static void
xmlXPtrEvalChildSeq(xmlXPathParserContextPtr ctxt,xmlChar * name)476 xmlXPtrEvalChildSeq(xmlXPathParserContextPtr ctxt, xmlChar *name) {
477     /*
478      * XPointer don't allow by syntax to address in multirooted trees
479      * this might prove useful in some cases, warn about it.
480      */
481     if ((name == NULL) && (CUR == '/') && (NXT(1) != '1')) {
482         xmlXPtrErr(ctxt, XML_XPTR_CHILDSEQ_START,
483 		   "warning: ChildSeq not starting by /1\n", NULL);
484     }
485 
486     if (name != NULL) {
487 	valuePush(ctxt, xmlXPathNewString(name));
488 	xmlFree(name);
489 	xmlXPathIdFunction(ctxt, 1);
490 	CHECK_ERROR;
491     }
492 
493     while (CUR == '/') {
494 	int child = 0, overflow = 0;
495 	NEXT;
496 
497 	while ((CUR >= '0') && (CUR <= '9')) {
498             int d = CUR - '0';
499             if (child > INT_MAX / 10)
500                 overflow = 1;
501             else
502                 child *= 10;
503             if (child > INT_MAX - d)
504                 overflow = 1;
505             else
506                 child += d;
507 	    NEXT;
508 	}
509         if (overflow)
510             child = 0;
511 	xmlXPtrGetChildNo(ctxt, child);
512     }
513 }
514 
515 
516 /**
517  * xmlXPtrEvalXPointer:
518  * @ctxt:  the XPointer Parser context
519  *
520  *  XPointer ::= Name
521  *             | ChildSeq
522  *             | FullXPtr
523  *
524  * Parse and evaluate an XPointer
525  */
526 static void
xmlXPtrEvalXPointer(xmlXPathParserContextPtr ctxt)527 xmlXPtrEvalXPointer(xmlXPathParserContextPtr ctxt) {
528     if (ctxt->valueTab == NULL) {
529 	/* Allocate the value stack */
530 	ctxt->valueTab = (xmlXPathObjectPtr *)
531 			 xmlMalloc(10 * sizeof(xmlXPathObjectPtr));
532 	if (ctxt->valueTab == NULL) {
533 	    xmlXPathPErrMemory(ctxt);
534 	    return;
535 	}
536 	ctxt->valueNr = 0;
537 	ctxt->valueMax = 10;
538 	ctxt->value = NULL;
539     }
540     SKIP_BLANKS;
541     if (CUR == '/') {
542 	xmlXPathRoot(ctxt);
543         xmlXPtrEvalChildSeq(ctxt, NULL);
544     } else {
545 	xmlChar *name;
546 
547 	name = xmlXPathParseName(ctxt);
548 	if (name == NULL)
549 	    XP_ERROR(XPATH_EXPR_ERROR);
550 	if (CUR == '(') {
551 	    xmlXPtrEvalFullXPtr(ctxt, name);
552 	    /* Short evaluation */
553 	    return;
554 	} else {
555 	    /* this handle both Bare Names and Child Sequences */
556 	    xmlXPtrEvalChildSeq(ctxt, name);
557 	}
558     }
559     SKIP_BLANKS;
560     if (CUR != 0)
561 	XP_ERROR(XPATH_EXPR_ERROR);
562 }
563 
564 
565 /************************************************************************
566  *									*
567  *			General routines				*
568  *									*
569  ************************************************************************/
570 
571 /**
572  * xmlXPtrNewContext:
573  * @doc:  the XML document
574  * @here:  the node that directly contains the XPointer being evaluated or NULL
575  * @origin:  the element from which a user or program initiated traversal of
576  *           the link, or NULL.
577  *
578  * Create a new XPointer context
579  *
580  * Returns the xmlXPathContext just allocated.
581  */
582 xmlXPathContextPtr
xmlXPtrNewContext(xmlDocPtr doc,xmlNodePtr here,xmlNodePtr origin)583 xmlXPtrNewContext(xmlDocPtr doc, xmlNodePtr here, xmlNodePtr origin) {
584     xmlXPathContextPtr ret;
585     (void) here;
586     (void) origin;
587 
588     ret = xmlXPathNewContext(doc);
589     if (ret == NULL)
590 	return(ret);
591 
592     return(ret);
593 }
594 
595 /**
596  * xmlXPtrEval:
597  * @str:  the XPointer expression
598  * @ctx:  the XPointer context
599  *
600  * Evaluate the XPath Location Path in the given context.
601  *
602  * Returns the xmlXPathObjectPtr resulting from the evaluation or NULL.
603  *         the caller has to free the object.
604  */
605 xmlXPathObjectPtr
xmlXPtrEval(const xmlChar * str,xmlXPathContextPtr ctx)606 xmlXPtrEval(const xmlChar *str, xmlXPathContextPtr ctx) {
607     xmlXPathParserContextPtr ctxt;
608     xmlXPathObjectPtr res = NULL, tmp;
609     xmlXPathObjectPtr init = NULL;
610     int stack = 0;
611 
612     xmlInitParser();
613 
614     if ((ctx == NULL) || (str == NULL))
615 	return(NULL);
616 
617     xmlResetError(&ctx->lastError);
618 
619     ctxt = xmlXPathNewParserContext(str, ctx);
620     if (ctxt == NULL) {
621         xmlXPathErrMemory(ctx);
622 	return(NULL);
623     }
624     xmlXPtrEvalXPointer(ctxt);
625     if (ctx->lastError.code != XML_ERR_OK)
626         goto error;
627 
628     if ((ctxt->value != NULL) &&
629 	(ctxt->value->type != XPATH_NODESET)) {
630         xmlXPtrErr(ctxt, XML_XPTR_EVAL_FAILED,
631 		"xmlXPtrEval: evaluation failed to return a node set\n",
632 		   NULL);
633     } else {
634 	res = valuePop(ctxt);
635     }
636 
637     do {
638         tmp = valuePop(ctxt);
639 	if (tmp != NULL) {
640 	    if (tmp != init) {
641 		if (tmp->type == XPATH_NODESET) {
642 		    /*
643 		     * Evaluation may push a root nodeset which is unused
644 		     */
645 		    xmlNodeSetPtr set;
646 		    set = tmp->nodesetval;
647 		    if ((set == NULL) || (set->nodeNr != 1) ||
648 			(set->nodeTab[0] != (xmlNodePtr) ctx->doc))
649 			stack++;
650 		} else
651 		    stack++;
652 	    }
653 	    xmlXPathFreeObject(tmp);
654         }
655     } while (tmp != NULL);
656     if (stack != 0) {
657         xmlXPtrErr(ctxt, XML_XPTR_EXTRA_OBJECTS,
658 		   "xmlXPtrEval: object(s) left on the eval stack\n",
659 		   NULL);
660     }
661     if (ctx->lastError.code != XML_ERR_OK) {
662 	xmlXPathFreeObject(res);
663 	res = NULL;
664     }
665 
666 error:
667     xmlXPathFreeParserContext(ctxt);
668     return(res);
669 }
670 
671 #endif
672 
673