1 /*
2 * xpointer.c : Code to handle XML Pointer
3 *
4 * Base implementation was made accordingly to
5 * W3C Candidate Recommendation 7 June 2000
6 * http://www.w3.org/TR/2000/CR-xptr-20000607
7 *
8 * Added support for the element() scheme described in:
9 * W3C Proposed Recommendation 13 November 2002
10 * http://www.w3.org/TR/2002/PR-xptr-element-20021113/
11 *
12 * See Copyright for the status of this software.
13 *
14 * [email protected]
15 */
16
17 /* To avoid EBCDIC trouble when parsing on zOS */
18 #if defined(__MVS__)
19 #pragma convert("ISO8859-1")
20 #endif
21
22 #define IN_LIBXML
23 #include "libxml.h"
24
25 /*
26 * TODO: better handling of error cases, the full expression should
27 * be parsed beforehand instead of a progressive evaluation
28 * TODO: Access into entities references are not supported now ...
29 * need a start to be able to pop out of entities refs since
30 * parent is the entity declaration, not the ref.
31 */
32
33 #include <string.h>
34 #include <libxml/xpointer.h>
35 #include <libxml/xmlmemory.h>
36 #include <libxml/parserInternals.h>
37 #include <libxml/uri.h>
38 #include <libxml/xpath.h>
39 #include <libxml/xpathInternals.h>
40 #include <libxml/xmlerror.h>
41
42 #ifdef LIBXML_XPTR_ENABLED
43
44 /* Add support of the xmlns() xpointer scheme to initialize the namespaces */
45 #define XPTR_XMLNS_SCHEME
46
47 #include "private/error.h"
48 #include "private/xpath.h"
49
50 /************************************************************************
51 * *
52 * Some factorized error routines *
53 * *
54 ************************************************************************/
55
56 /**
57 * xmlXPtrErr:
58 * @ctxt: an XPTR evaluation context
59 * @extra: extra information
60 *
61 * Handle an XPointer error
62 */
63 static void LIBXML_ATTR_FORMAT(3,0)
xmlXPtrErr(xmlXPathParserContextPtr ctxt,int code,const char * msg,const xmlChar * extra)64 xmlXPtrErr(xmlXPathParserContextPtr ctxt, int code,
65 const char * msg, const xmlChar *extra)
66 {
67 xmlStructuredErrorFunc serror = NULL;
68 void *data = NULL;
69 xmlNodePtr node = NULL;
70 int res;
71
72 if (ctxt == NULL)
73 return;
74 /* Only report the first error */
75 if (ctxt->error != 0)
76 return;
77
78 ctxt->error = code;
79
80 if (ctxt->context != NULL) {
81 xmlErrorPtr err = &ctxt->context->lastError;
82
83 /* cleanup current last error */
84 xmlResetError(err);
85
86 err->domain = XML_FROM_XPOINTER;
87 err->code = code;
88 err->level = XML_ERR_ERROR;
89 err->str1 = (char *) xmlStrdup(ctxt->base);
90 if (err->str1 == NULL) {
91 xmlXPathPErrMemory(ctxt);
92 return;
93 }
94 err->int1 = ctxt->cur - ctxt->base;
95 err->node = ctxt->context->debugNode;
96
97 serror = ctxt->context->error;
98 data = ctxt->context->userData;
99 node = ctxt->context->debugNode;
100 }
101
102 res = xmlRaiseError(serror, NULL, data, NULL, node,
103 XML_FROM_XPOINTER, code, XML_ERR_ERROR, NULL, 0,
104 (const char *) extra, (const char *) ctxt->base,
105 NULL, ctxt->cur - ctxt->base, 0,
106 msg, extra);
107 if (res < 0)
108 xmlXPathPErrMemory(ctxt);
109 }
110
111 /************************************************************************
112 * *
113 * A few helper functions for child sequences *
114 * *
115 ************************************************************************/
116
117 /**
118 * xmlXPtrGetNthChild:
119 * @cur: the node
120 * @no: the child number
121 *
122 * Returns the @no'th element child of @cur or NULL
123 */
124 static xmlNodePtr
xmlXPtrGetNthChild(xmlNodePtr cur,int no)125 xmlXPtrGetNthChild(xmlNodePtr cur, int no) {
126 int i;
127 if ((cur == NULL) || (cur->type == XML_NAMESPACE_DECL))
128 return(cur);
129 cur = cur->children;
130 for (i = 0;i <= no;cur = cur->next) {
131 if (cur == NULL)
132 return(cur);
133 if ((cur->type == XML_ELEMENT_NODE) ||
134 (cur->type == XML_DOCUMENT_NODE) ||
135 (cur->type == XML_HTML_DOCUMENT_NODE)) {
136 i++;
137 if (i == no)
138 break;
139 }
140 }
141 return(cur);
142 }
143
144 /************************************************************************
145 * *
146 * The parser *
147 * *
148 ************************************************************************/
149
150 static void xmlXPtrEvalChildSeq(xmlXPathParserContextPtr ctxt, xmlChar *name);
151
152 /*
153 * Macros for accessing the content. Those should be used only by the parser,
154 * and not exported.
155 *
156 * Dirty macros, i.e. one need to make assumption on the context to use them
157 *
158 * CUR returns the current xmlChar value, i.e. a 8 bit value
159 * in ISO-Latin or UTF-8.
160 * This should be used internally by the parser
161 * only to compare to ASCII values otherwise it would break when
162 * running with UTF-8 encoding.
163 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
164 * to compare on ASCII based substring.
165 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
166 * strings within the parser.
167 * CURRENT Returns the current char value, with the full decoding of
168 * UTF-8 if we are using this mode. It returns an int.
169 * NEXT Skip to the next character, this does the proper decoding
170 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
171 * It returns the pointer to the current xmlChar.
172 */
173
174 #define CUR (*ctxt->cur)
175 #define SKIP(val) ctxt->cur += (val)
176 #define NXT(val) ctxt->cur[(val)]
177
178 #define SKIP_BLANKS \
179 while (IS_BLANK_CH(*(ctxt->cur))) NEXT
180
181 #define CURRENT (*ctxt->cur)
182 #define NEXT ((*ctxt->cur) ? ctxt->cur++: ctxt->cur)
183
184 /*
185 * xmlXPtrGetChildNo:
186 * @ctxt: the XPointer Parser context
187 * @index: the child number
188 *
189 * Move the current node of the nodeset on the stack to the
190 * given child if found
191 */
192 static void
xmlXPtrGetChildNo(xmlXPathParserContextPtr ctxt,int indx)193 xmlXPtrGetChildNo(xmlXPathParserContextPtr ctxt, int indx) {
194 xmlNodePtr cur = NULL;
195 xmlXPathObjectPtr obj;
196 xmlNodeSetPtr oldset;
197
198 CHECK_TYPE(XPATH_NODESET);
199 obj = valuePop(ctxt);
200 oldset = obj->nodesetval;
201 if ((indx <= 0) || (oldset == NULL) || (oldset->nodeNr != 1)) {
202 xmlXPathFreeObject(obj);
203 valuePush(ctxt, xmlXPathNewNodeSet(NULL));
204 return;
205 }
206 cur = xmlXPtrGetNthChild(oldset->nodeTab[0], indx);
207 if (cur == NULL) {
208 xmlXPathFreeObject(obj);
209 valuePush(ctxt, xmlXPathNewNodeSet(NULL));
210 return;
211 }
212 oldset->nodeTab[0] = cur;
213 valuePush(ctxt, obj);
214 }
215
216 /**
217 * xmlXPtrEvalXPtrPart:
218 * @ctxt: the XPointer Parser context
219 * @name: the preparsed Scheme for the XPtrPart
220 *
221 * XPtrPart ::= 'xpointer' '(' XPtrExpr ')'
222 * | Scheme '(' SchemeSpecificExpr ')'
223 *
224 * Scheme ::= NCName - 'xpointer' [VC: Non-XPointer schemes]
225 *
226 * SchemeSpecificExpr ::= StringWithBalancedParens
227 *
228 * StringWithBalancedParens ::=
229 * [^()]* ('(' StringWithBalancedParens ')' [^()]*)*
230 * [VC: Parenthesis escaping]
231 *
232 * XPtrExpr ::= Expr [VC: Parenthesis escaping]
233 *
234 * VC: Parenthesis escaping:
235 * The end of an XPointer part is signaled by the right parenthesis ")"
236 * character that is balanced with the left parenthesis "(" character
237 * that began the part. Any unbalanced parenthesis character inside the
238 * expression, even within literals, must be escaped with a circumflex (^)
239 * character preceding it. If the expression contains any literal
240 * occurrences of the circumflex, each must be escaped with an additional
241 * circumflex (that is, ^^). If the unescaped parentheses in the expression
242 * are not balanced, a syntax error results.
243 *
244 * Parse and evaluate an XPtrPart. Basically it generates the unescaped
245 * string and if the scheme is 'xpointer' it will call the XPath interpreter.
246 *
247 * TODO: there is no new scheme registration mechanism
248 */
249
250 static void
xmlXPtrEvalXPtrPart(xmlXPathParserContextPtr ctxt,xmlChar * name)251 xmlXPtrEvalXPtrPart(xmlXPathParserContextPtr ctxt, xmlChar *name) {
252 xmlChar *buffer, *cur;
253 int len;
254 int level;
255
256 if (name == NULL)
257 name = xmlXPathParseName(ctxt);
258 if (name == NULL)
259 XP_ERROR(XPATH_EXPR_ERROR);
260
261 if (CUR != '(') {
262 xmlFree(name);
263 XP_ERROR(XPATH_EXPR_ERROR);
264 }
265 NEXT;
266 level = 1;
267
268 len = xmlStrlen(ctxt->cur);
269 len++;
270 buffer = xmlMalloc(len);
271 if (buffer == NULL) {
272 xmlXPathPErrMemory(ctxt);
273 xmlFree(name);
274 return;
275 }
276
277 cur = buffer;
278 while (CUR != 0) {
279 if (CUR == ')') {
280 level--;
281 if (level == 0) {
282 NEXT;
283 break;
284 }
285 } else if (CUR == '(') {
286 level++;
287 } else if (CUR == '^') {
288 if ((NXT(1) == ')') || (NXT(1) == '(') || (NXT(1) == '^')) {
289 NEXT;
290 }
291 }
292 *cur++ = CUR;
293 NEXT;
294 }
295 *cur = 0;
296
297 if ((level != 0) && (CUR == 0)) {
298 xmlFree(name);
299 xmlFree(buffer);
300 XP_ERROR(XPTR_SYNTAX_ERROR);
301 }
302
303 if (xmlStrEqual(name, (xmlChar *) "xpointer") ||
304 xmlStrEqual(name, (xmlChar *) "xpath1")) {
305 const xmlChar *oldBase = ctxt->base;
306 const xmlChar *oldCur = ctxt->cur;
307
308 ctxt->cur = ctxt->base = buffer;
309 /*
310 * To evaluate an xpointer scheme element (4.3) we need:
311 * context initialized to the root
312 * context position initialized to 1
313 * context size initialized to 1
314 */
315 ctxt->context->node = (xmlNodePtr)ctxt->context->doc;
316 ctxt->context->proximityPosition = 1;
317 ctxt->context->contextSize = 1;
318 xmlXPathEvalExpr(ctxt);
319 ctxt->base = oldBase;
320 ctxt->cur = oldCur;
321 } else if (xmlStrEqual(name, (xmlChar *) "element")) {
322 const xmlChar *oldBase = ctxt->base;
323 const xmlChar *oldCur = ctxt->cur;
324 xmlChar *name2;
325
326 ctxt->cur = ctxt->base = buffer;
327 if (buffer[0] == '/') {
328 xmlXPathRoot(ctxt);
329 xmlXPtrEvalChildSeq(ctxt, NULL);
330 } else {
331 name2 = xmlXPathParseName(ctxt);
332 if (name2 == NULL) {
333 ctxt->base = oldBase;
334 ctxt->cur = oldCur;
335 xmlFree(buffer);
336 xmlFree(name);
337 XP_ERROR(XPATH_EXPR_ERROR);
338 }
339 xmlXPtrEvalChildSeq(ctxt, name2);
340 }
341 ctxt->base = oldBase;
342 ctxt->cur = oldCur;
343 #ifdef XPTR_XMLNS_SCHEME
344 } else if (xmlStrEqual(name, (xmlChar *) "xmlns")) {
345 const xmlChar *oldBase = ctxt->base;
346 const xmlChar *oldCur = ctxt->cur;
347 xmlChar *prefix;
348
349 ctxt->cur = ctxt->base = buffer;
350 prefix = xmlXPathParseNCName(ctxt);
351 if (prefix == NULL) {
352 ctxt->base = oldBase;
353 ctxt->cur = oldCur;
354 xmlFree(buffer);
355 xmlFree(name);
356 XP_ERROR(XPTR_SYNTAX_ERROR);
357 }
358 SKIP_BLANKS;
359 if (CUR != '=') {
360 ctxt->base = oldBase;
361 ctxt->cur = oldCur;
362 xmlFree(prefix);
363 xmlFree(buffer);
364 xmlFree(name);
365 XP_ERROR(XPTR_SYNTAX_ERROR);
366 }
367 NEXT;
368 SKIP_BLANKS;
369
370 if (xmlXPathRegisterNs(ctxt->context, prefix, ctxt->cur) < 0)
371 xmlXPathPErrMemory(ctxt);
372 ctxt->base = oldBase;
373 ctxt->cur = oldCur;
374 xmlFree(prefix);
375 #endif /* XPTR_XMLNS_SCHEME */
376 } else {
377 xmlXPtrErr(ctxt, XML_XPTR_UNKNOWN_SCHEME,
378 "unsupported scheme '%s'\n", name);
379 }
380 xmlFree(buffer);
381 xmlFree(name);
382 }
383
384 /**
385 * xmlXPtrEvalFullXPtr:
386 * @ctxt: the XPointer Parser context
387 * @name: the preparsed Scheme for the first XPtrPart
388 *
389 * FullXPtr ::= XPtrPart (S? XPtrPart)*
390 *
391 * As the specs says:
392 * -----------
393 * When multiple XPtrParts are provided, they must be evaluated in
394 * left-to-right order. If evaluation of one part fails, the nexti
395 * is evaluated. The following conditions cause XPointer part failure:
396 *
397 * - An unknown scheme
398 * - A scheme that does not locate any sub-resource present in the resource
399 * - A scheme that is not applicable to the media type of the resource
400 *
401 * The XPointer application must consume a failed XPointer part and
402 * attempt to evaluate the next one, if any. The result of the first
403 * XPointer part whose evaluation succeeds is taken to be the fragment
404 * located by the XPointer as a whole. If all the parts fail, the result
405 * for the XPointer as a whole is a sub-resource error.
406 * -----------
407 *
408 * Parse and evaluate a Full XPtr i.e. possibly a cascade of XPath based
409 * expressions or other schemes.
410 */
411 static void
xmlXPtrEvalFullXPtr(xmlXPathParserContextPtr ctxt,xmlChar * name)412 xmlXPtrEvalFullXPtr(xmlXPathParserContextPtr ctxt, xmlChar *name) {
413 if (name == NULL)
414 name = xmlXPathParseName(ctxt);
415 if (name == NULL)
416 XP_ERROR(XPATH_EXPR_ERROR);
417 while (name != NULL) {
418 ctxt->error = XPATH_EXPRESSION_OK;
419 xmlXPtrEvalXPtrPart(ctxt, name);
420
421 /* in case of syntax error, break here */
422 if ((ctxt->error != XPATH_EXPRESSION_OK) &&
423 (ctxt->error != XML_XPTR_UNKNOWN_SCHEME))
424 return;
425
426 /*
427 * If the returned value is a non-empty nodeset
428 * or location set, return here.
429 */
430 if (ctxt->value != NULL) {
431 xmlXPathObjectPtr obj = ctxt->value;
432
433 switch (obj->type) {
434 case XPATH_NODESET: {
435 xmlNodeSetPtr loc = ctxt->value->nodesetval;
436 if ((loc != NULL) && (loc->nodeNr > 0))
437 return;
438 break;
439 }
440 default:
441 break;
442 }
443
444 /*
445 * Evaluating to improper values is equivalent to
446 * a sub-resource error, clean-up the stack
447 */
448 do {
449 obj = valuePop(ctxt);
450 if (obj != NULL) {
451 xmlXPathFreeObject(obj);
452 }
453 } while (obj != NULL);
454 }
455
456 /*
457 * Is there another XPointer part.
458 */
459 SKIP_BLANKS;
460 name = xmlXPathParseName(ctxt);
461 }
462 }
463
464 /**
465 * xmlXPtrEvalChildSeq:
466 * @ctxt: the XPointer Parser context
467 * @name: a possible ID name of the child sequence
468 *
469 * ChildSeq ::= '/1' ('/' [0-9]*)*
470 * | Name ('/' [0-9]*)+
471 *
472 * Parse and evaluate a Child Sequence. This routine also handle the
473 * case of a Bare Name used to get a document ID.
474 */
475 static void
xmlXPtrEvalChildSeq(xmlXPathParserContextPtr ctxt,xmlChar * name)476 xmlXPtrEvalChildSeq(xmlXPathParserContextPtr ctxt, xmlChar *name) {
477 /*
478 * XPointer don't allow by syntax to address in multirooted trees
479 * this might prove useful in some cases, warn about it.
480 */
481 if ((name == NULL) && (CUR == '/') && (NXT(1) != '1')) {
482 xmlXPtrErr(ctxt, XML_XPTR_CHILDSEQ_START,
483 "warning: ChildSeq not starting by /1\n", NULL);
484 }
485
486 if (name != NULL) {
487 valuePush(ctxt, xmlXPathNewString(name));
488 xmlFree(name);
489 xmlXPathIdFunction(ctxt, 1);
490 CHECK_ERROR;
491 }
492
493 while (CUR == '/') {
494 int child = 0, overflow = 0;
495 NEXT;
496
497 while ((CUR >= '0') && (CUR <= '9')) {
498 int d = CUR - '0';
499 if (child > INT_MAX / 10)
500 overflow = 1;
501 else
502 child *= 10;
503 if (child > INT_MAX - d)
504 overflow = 1;
505 else
506 child += d;
507 NEXT;
508 }
509 if (overflow)
510 child = 0;
511 xmlXPtrGetChildNo(ctxt, child);
512 }
513 }
514
515
516 /**
517 * xmlXPtrEvalXPointer:
518 * @ctxt: the XPointer Parser context
519 *
520 * XPointer ::= Name
521 * | ChildSeq
522 * | FullXPtr
523 *
524 * Parse and evaluate an XPointer
525 */
526 static void
xmlXPtrEvalXPointer(xmlXPathParserContextPtr ctxt)527 xmlXPtrEvalXPointer(xmlXPathParserContextPtr ctxt) {
528 if (ctxt->valueTab == NULL) {
529 /* Allocate the value stack */
530 ctxt->valueTab = (xmlXPathObjectPtr *)
531 xmlMalloc(10 * sizeof(xmlXPathObjectPtr));
532 if (ctxt->valueTab == NULL) {
533 xmlXPathPErrMemory(ctxt);
534 return;
535 }
536 ctxt->valueNr = 0;
537 ctxt->valueMax = 10;
538 ctxt->value = NULL;
539 }
540 SKIP_BLANKS;
541 if (CUR == '/') {
542 xmlXPathRoot(ctxt);
543 xmlXPtrEvalChildSeq(ctxt, NULL);
544 } else {
545 xmlChar *name;
546
547 name = xmlXPathParseName(ctxt);
548 if (name == NULL)
549 XP_ERROR(XPATH_EXPR_ERROR);
550 if (CUR == '(') {
551 xmlXPtrEvalFullXPtr(ctxt, name);
552 /* Short evaluation */
553 return;
554 } else {
555 /* this handle both Bare Names and Child Sequences */
556 xmlXPtrEvalChildSeq(ctxt, name);
557 }
558 }
559 SKIP_BLANKS;
560 if (CUR != 0)
561 XP_ERROR(XPATH_EXPR_ERROR);
562 }
563
564
565 /************************************************************************
566 * *
567 * General routines *
568 * *
569 ************************************************************************/
570
571 /**
572 * xmlXPtrNewContext:
573 * @doc: the XML document
574 * @here: the node that directly contains the XPointer being evaluated or NULL
575 * @origin: the element from which a user or program initiated traversal of
576 * the link, or NULL.
577 *
578 * Create a new XPointer context
579 *
580 * Returns the xmlXPathContext just allocated.
581 */
582 xmlXPathContextPtr
xmlXPtrNewContext(xmlDocPtr doc,xmlNodePtr here,xmlNodePtr origin)583 xmlXPtrNewContext(xmlDocPtr doc, xmlNodePtr here, xmlNodePtr origin) {
584 xmlXPathContextPtr ret;
585 (void) here;
586 (void) origin;
587
588 ret = xmlXPathNewContext(doc);
589 if (ret == NULL)
590 return(ret);
591
592 return(ret);
593 }
594
595 /**
596 * xmlXPtrEval:
597 * @str: the XPointer expression
598 * @ctx: the XPointer context
599 *
600 * Evaluate the XPath Location Path in the given context.
601 *
602 * Returns the xmlXPathObjectPtr resulting from the evaluation or NULL.
603 * the caller has to free the object.
604 */
605 xmlXPathObjectPtr
xmlXPtrEval(const xmlChar * str,xmlXPathContextPtr ctx)606 xmlXPtrEval(const xmlChar *str, xmlXPathContextPtr ctx) {
607 xmlXPathParserContextPtr ctxt;
608 xmlXPathObjectPtr res = NULL, tmp;
609 xmlXPathObjectPtr init = NULL;
610 int stack = 0;
611
612 xmlInitParser();
613
614 if ((ctx == NULL) || (str == NULL))
615 return(NULL);
616
617 xmlResetError(&ctx->lastError);
618
619 ctxt = xmlXPathNewParserContext(str, ctx);
620 if (ctxt == NULL) {
621 xmlXPathErrMemory(ctx);
622 return(NULL);
623 }
624 xmlXPtrEvalXPointer(ctxt);
625 if (ctx->lastError.code != XML_ERR_OK)
626 goto error;
627
628 if ((ctxt->value != NULL) &&
629 (ctxt->value->type != XPATH_NODESET)) {
630 xmlXPtrErr(ctxt, XML_XPTR_EVAL_FAILED,
631 "xmlXPtrEval: evaluation failed to return a node set\n",
632 NULL);
633 } else {
634 res = valuePop(ctxt);
635 }
636
637 do {
638 tmp = valuePop(ctxt);
639 if (tmp != NULL) {
640 if (tmp != init) {
641 if (tmp->type == XPATH_NODESET) {
642 /*
643 * Evaluation may push a root nodeset which is unused
644 */
645 xmlNodeSetPtr set;
646 set = tmp->nodesetval;
647 if ((set == NULL) || (set->nodeNr != 1) ||
648 (set->nodeTab[0] != (xmlNodePtr) ctx->doc))
649 stack++;
650 } else
651 stack++;
652 }
653 xmlXPathFreeObject(tmp);
654 }
655 } while (tmp != NULL);
656 if (stack != 0) {
657 xmlXPtrErr(ctxt, XML_XPTR_EXTRA_OBJECTS,
658 "xmlXPtrEval: object(s) left on the eval stack\n",
659 NULL);
660 }
661 if (ctx->lastError.code != XML_ERR_OK) {
662 xmlXPathFreeObject(res);
663 res = NULL;
664 }
665
666 error:
667 xmlXPathFreeParserContext(ctxt);
668 return(res);
669 }
670
671 #endif
672
673