1 /**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFCs 3986, 2732 and 2373
5 *
6 * See Copyright for the status of this software.
7 *
8 * [email protected]
9 */
10
11 #define IN_LIBXML
12 #include "libxml.h"
13
14 #include <limits.h>
15 #include <string.h>
16
17 #include <libxml/xmlmemory.h>
18 #include <libxml/uri.h>
19 #include <libxml/xmlerror.h>
20
21 #include "private/error.h"
22
23 /**
24 * MAX_URI_LENGTH:
25 *
26 * The definition of the URI regexp in the above RFC has no size limit
27 * In practice they are usually relatively short except for the
28 * data URI scheme as defined in RFC 2397. Even for data URI the usual
29 * maximum size before hitting random practical limits is around 64 KB
30 * and 4KB is usually a maximum admitted limit for proper operations.
31 * The value below is more a security limit than anything else and
32 * really should never be hit by 'normal' operations
33 * Set to 1 MByte in 2012, this is only enforced on output
34 */
35 #define MAX_URI_LENGTH 1024 * 1024
36
37 #define PORT_EMPTY 0
38 #define PORT_EMPTY_SERVER -1
39
40 static void xmlCleanURI(xmlURIPtr uri);
41
42 /*
43 * Old rule from 2396 used in legacy handling code
44 * alpha = lowalpha | upalpha
45 */
46 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
47
48
49 /*
50 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
51 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
52 * "u" | "v" | "w" | "x" | "y" | "z"
53 */
54 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
55
56 /*
57 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
58 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
59 * "U" | "V" | "W" | "X" | "Y" | "Z"
60 */
61 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
62
63 #ifdef IS_DIGIT
64 #undef IS_DIGIT
65 #endif
66 /*
67 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
68 */
69 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
70
71 /*
72 * alphanum = alpha | digit
73 */
74 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
75
76 /*
77 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
78 */
79
80 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
81 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
82 ((x) == '(') || ((x) == ')'))
83
84 /*
85 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
86 */
87 #define IS_UNWISE(p) \
88 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
89 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
90 ((*(p) == ']')) || ((*(p) == '`')))
91
92 /*
93 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
94 * "[" | "]"
95 */
96 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
97 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
98 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
99 ((x) == ']'))
100
101 /*
102 * unreserved = alphanum | mark
103 */
104 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
105
106 /*
107 * Skip to next pointer char, handle escaped sequences
108 */
109 #define NEXT(p) ((*p == '%')? p += 3 : p++)
110
111 /*
112 * Productions from the spec.
113 *
114 * authority = server | reg_name
115 * reg_name = 1*( unreserved | escaped | "$" | "," |
116 * ";" | ":" | "@" | "&" | "=" | "+" )
117 *
118 * path = [ abs_path | opaque_part ]
119 */
120 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
121
122 /************************************************************************
123 * *
124 * RFC 3986 parser *
125 * *
126 ************************************************************************/
127
128 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
129 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
130 ((*(p) >= 'A') && (*(p) <= 'Z')))
131 #define ISA_HEXDIG(p) \
132 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
133 ((*(p) >= 'A') && (*(p) <= 'F')))
134
135 /*
136 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
137 * / "*" / "+" / "," / ";" / "="
138 */
139 #define ISA_SUB_DELIM(p) \
140 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
141 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
142 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
143 ((*(p) == '=')) || ((*(p) == '\'')))
144
145 /*
146 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
147 */
148 #define ISA_GEN_DELIM(p) \
149 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
150 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
151 ((*(p) == '@')))
152
153 /*
154 * reserved = gen-delims / sub-delims
155 */
156 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
157
158 /*
159 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
160 */
161 #define ISA_STRICTLY_UNRESERVED(p) \
162 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
163 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
164
165 /*
166 * pct-encoded = "%" HEXDIG HEXDIG
167 */
168 #define ISA_PCT_ENCODED(p) \
169 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
170
171 /*
172 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
173 */
174 #define ISA_PCHAR(u, p) \
175 (ISA_UNRESERVED(u, p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
176 ((*(p) == ':')) || ((*(p) == '@')))
177
178 /*
179 * From https://www.w3.org/TR/leiri/
180 *
181 * " " / "<" / ">" / '"' / "{" / "}" / "|"
182 * / "\" / "^" / "`" / %x0-1F / %x7F-D7FF
183 * / %xE000-FFFD / %x10000-10FFFF
184 */
185 #define ISA_UCSCHAR(p) \
186 ((*(p) <= 0x20) || (*(p) >= 0x7F) || (*(p) == '<') || (*(p) == '>') || \
187 (*(p) == '"') || (*(p) == '{') || (*(p) == '}') || (*(p) == '|') || \
188 (*(p) == '\\') || (*(p) == '^') || (*(p) == '`'))
189
190 #define ISA_UNRESERVED(u, p) (xmlIsUnreserved(u, p))
191
192 #define XML_URI_ALLOW_UNWISE 1
193 #define XML_URI_NO_UNESCAPE 2
194 #define XML_URI_ALLOW_UCSCHAR 4
195
196 static int
xmlIsUnreserved(xmlURIPtr uri,const char * cur)197 xmlIsUnreserved(xmlURIPtr uri, const char *cur) {
198 if (uri == NULL)
199 return(0);
200
201 if (ISA_STRICTLY_UNRESERVED(cur))
202 return(1);
203
204 if (uri->cleanup & XML_URI_ALLOW_UNWISE) {
205 if (IS_UNWISE(cur))
206 return(1);
207 } else if (uri->cleanup & XML_URI_ALLOW_UCSCHAR) {
208 if (ISA_UCSCHAR(cur))
209 return(1);
210 }
211
212 return(0);
213 }
214
215 /**
216 * xmlParse3986Scheme:
217 * @uri: pointer to an URI structure
218 * @str: pointer to the string to analyze
219 *
220 * Parse an URI scheme
221 *
222 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
223 *
224 * Returns 0 or the error code
225 */
226 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)227 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
228 const char *cur;
229
230 cur = *str;
231 if (!ISA_ALPHA(cur))
232 return(1);
233 cur++;
234 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
235 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
236 if (uri != NULL) {
237 if (uri->scheme != NULL) xmlFree(uri->scheme);
238 uri->scheme = STRNDUP(*str, cur - *str);
239 if (uri->scheme == NULL)
240 return(-1);
241 }
242 *str = cur;
243 return(0);
244 }
245
246 /**
247 * xmlParse3986Fragment:
248 * @uri: pointer to an URI structure
249 * @str: pointer to the string to analyze
250 *
251 * Parse the query part of an URI
252 *
253 * fragment = *( pchar / "/" / "?" )
254 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
255 * in the fragment identifier but this is used very broadly for
256 * xpointer scheme selection, so we are allowing it here to not break
257 * for example all the DocBook processing chains.
258 *
259 * Returns 0 or the error code
260 */
261 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)262 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
263 {
264 const char *cur;
265
266 cur = *str;
267
268 while ((ISA_PCHAR(uri, cur)) || (*cur == '/') || (*cur == '?') ||
269 (*cur == '[') || (*cur == ']'))
270 NEXT(cur);
271 if (uri != NULL) {
272 if (uri->fragment != NULL)
273 xmlFree(uri->fragment);
274 if (uri->cleanup & XML_URI_NO_UNESCAPE)
275 uri->fragment = STRNDUP(*str, cur - *str);
276 else
277 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
278 if (uri->fragment == NULL)
279 return (-1);
280 }
281 *str = cur;
282 return (0);
283 }
284
285 /**
286 * xmlParse3986Query:
287 * @uri: pointer to an URI structure
288 * @str: pointer to the string to analyze
289 *
290 * Parse the query part of an URI
291 *
292 * query = *uric
293 *
294 * Returns 0 or the error code
295 */
296 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)297 xmlParse3986Query(xmlURIPtr uri, const char **str)
298 {
299 const char *cur;
300
301 cur = *str;
302
303 while ((ISA_PCHAR(uri, cur)) || (*cur == '/') || (*cur == '?'))
304 NEXT(cur);
305 if (uri != NULL) {
306 if (uri->query != NULL)
307 xmlFree(uri->query);
308 if (uri->cleanup & XML_URI_NO_UNESCAPE)
309 uri->query = STRNDUP(*str, cur - *str);
310 else
311 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
312 if (uri->query == NULL)
313 return (-1);
314
315 /* Save the raw bytes of the query as well.
316 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
317 */
318 if (uri->query_raw != NULL)
319 xmlFree (uri->query_raw);
320 uri->query_raw = STRNDUP (*str, cur - *str);
321 if (uri->query_raw == NULL)
322 return (-1);
323 }
324 *str = cur;
325 return (0);
326 }
327
328 /**
329 * xmlParse3986Port:
330 * @uri: pointer to an URI structure
331 * @str: the string to analyze
332 *
333 * Parse a port part and fills in the appropriate fields
334 * of the @uri structure
335 *
336 * port = *DIGIT
337 *
338 * Returns 0 or the error code
339 */
340 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)341 xmlParse3986Port(xmlURIPtr uri, const char **str)
342 {
343 const char *cur = *str;
344 int port = 0;
345
346 if (ISA_DIGIT(cur)) {
347 while (ISA_DIGIT(cur)) {
348 int digit = *cur - '0';
349
350 if (port > INT_MAX / 10)
351 return(1);
352 port *= 10;
353 if (port > INT_MAX - digit)
354 return(1);
355 port += digit;
356
357 cur++;
358 }
359 if (uri != NULL)
360 uri->port = port;
361 *str = cur;
362 return(0);
363 }
364 return(1);
365 }
366
367 /**
368 * xmlParse3986Userinfo:
369 * @uri: pointer to an URI structure
370 * @str: the string to analyze
371 *
372 * Parse an user information part and fills in the appropriate fields
373 * of the @uri structure
374 *
375 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
376 *
377 * Returns 0 or the error code
378 */
379 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)380 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
381 {
382 const char *cur;
383
384 cur = *str;
385 while (ISA_UNRESERVED(uri, cur) || ISA_PCT_ENCODED(cur) ||
386 ISA_SUB_DELIM(cur) || (*cur == ':'))
387 NEXT(cur);
388 if (*cur == '@') {
389 if (uri != NULL) {
390 if (uri->user != NULL) xmlFree(uri->user);
391 if (uri->cleanup & XML_URI_NO_UNESCAPE)
392 uri->user = STRNDUP(*str, cur - *str);
393 else
394 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
395 if (uri->user == NULL)
396 return(-1);
397 }
398 *str = cur;
399 return(0);
400 }
401 return(1);
402 }
403
404 /**
405 * xmlParse3986DecOctet:
406 * @str: the string to analyze
407 *
408 * dec-octet = DIGIT ; 0-9
409 * / %x31-39 DIGIT ; 10-99
410 * / "1" 2DIGIT ; 100-199
411 * / "2" %x30-34 DIGIT ; 200-249
412 * / "25" %x30-35 ; 250-255
413 *
414 * Skip a dec-octet.
415 *
416 * Returns 0 if found and skipped, 1 otherwise
417 */
418 static int
xmlParse3986DecOctet(const char ** str)419 xmlParse3986DecOctet(const char **str) {
420 const char *cur = *str;
421
422 if (!(ISA_DIGIT(cur)))
423 return(1);
424 if (!ISA_DIGIT(cur+1))
425 cur++;
426 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
427 cur += 2;
428 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
429 cur += 3;
430 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
431 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
432 cur += 3;
433 else if ((*cur == '2') && (*(cur + 1) == '5') &&
434 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
435 cur += 3;
436 else
437 return(1);
438 *str = cur;
439 return(0);
440 }
441 /**
442 * xmlParse3986Host:
443 * @uri: pointer to an URI structure
444 * @str: the string to analyze
445 *
446 * Parse an host part and fills in the appropriate fields
447 * of the @uri structure
448 *
449 * host = IP-literal / IPv4address / reg-name
450 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
451 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
452 * reg-name = *( unreserved / pct-encoded / sub-delims )
453 *
454 * Returns 0 or the error code
455 */
456 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)457 xmlParse3986Host(xmlURIPtr uri, const char **str)
458 {
459 const char *cur = *str;
460 const char *host;
461
462 host = cur;
463 /*
464 * IPv6 and future addressing scheme are enclosed between brackets
465 */
466 if (*cur == '[') {
467 cur++;
468 while ((*cur != ']') && (*cur != 0))
469 cur++;
470 if (*cur != ']')
471 return(1);
472 cur++;
473 goto found;
474 }
475 /*
476 * try to parse an IPv4
477 */
478 if (ISA_DIGIT(cur)) {
479 if (xmlParse3986DecOctet(&cur) != 0)
480 goto not_ipv4;
481 if (*cur != '.')
482 goto not_ipv4;
483 cur++;
484 if (xmlParse3986DecOctet(&cur) != 0)
485 goto not_ipv4;
486 if (*cur != '.')
487 goto not_ipv4;
488 if (xmlParse3986DecOctet(&cur) != 0)
489 goto not_ipv4;
490 if (*cur != '.')
491 goto not_ipv4;
492 if (xmlParse3986DecOctet(&cur) != 0)
493 goto not_ipv4;
494 goto found;
495 not_ipv4:
496 cur = *str;
497 }
498 /*
499 * then this should be a hostname which can be empty
500 */
501 while (ISA_UNRESERVED(uri, cur) ||
502 ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
503 NEXT(cur);
504 found:
505 if (uri != NULL) {
506 if (uri->authority != NULL) xmlFree(uri->authority);
507 uri->authority = NULL;
508 if (uri->server != NULL) xmlFree(uri->server);
509 if (cur != host) {
510 if (uri->cleanup & XML_URI_NO_UNESCAPE)
511 uri->server = STRNDUP(host, cur - host);
512 else
513 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
514 if (uri->server == NULL)
515 return(-1);
516 } else
517 uri->server = NULL;
518 }
519 *str = cur;
520 return(0);
521 }
522
523 /**
524 * xmlParse3986Authority:
525 * @uri: pointer to an URI structure
526 * @str: the string to analyze
527 *
528 * Parse an authority part and fills in the appropriate fields
529 * of the @uri structure
530 *
531 * authority = [ userinfo "@" ] host [ ":" port ]
532 *
533 * Returns 0 or the error code
534 */
535 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)536 xmlParse3986Authority(xmlURIPtr uri, const char **str)
537 {
538 const char *cur;
539 int ret;
540
541 cur = *str;
542 /*
543 * try to parse an userinfo and check for the trailing @
544 */
545 ret = xmlParse3986Userinfo(uri, &cur);
546 if (ret < 0)
547 return(ret);
548 if ((ret != 0) || (*cur != '@'))
549 cur = *str;
550 else
551 cur++;
552 ret = xmlParse3986Host(uri, &cur);
553 if (ret != 0) return(ret);
554 if (*cur == ':') {
555 cur++;
556 ret = xmlParse3986Port(uri, &cur);
557 if (ret != 0) return(ret);
558 }
559 *str = cur;
560 return(0);
561 }
562
563 /**
564 * xmlParse3986Segment:
565 * @str: the string to analyze
566 * @forbid: an optional forbidden character
567 * @empty: allow an empty segment
568 *
569 * Parse a segment and fills in the appropriate fields
570 * of the @uri structure
571 *
572 * segment = *pchar
573 * segment-nz = 1*pchar
574 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
575 * ; non-zero-length segment without any colon ":"
576 *
577 * Returns 0 or the error code
578 */
579 static int
xmlParse3986Segment(xmlURIPtr uri,const char ** str,char forbid,int empty)580 xmlParse3986Segment(xmlURIPtr uri, const char **str, char forbid, int empty)
581 {
582 const char *cur;
583
584 cur = *str;
585 if (!ISA_PCHAR(uri, cur)) {
586 if (empty)
587 return(0);
588 return(1);
589 }
590 while (ISA_PCHAR(uri, cur) && (*cur != forbid))
591 NEXT(cur);
592 *str = cur;
593 return (0);
594 }
595
596 /**
597 * xmlParse3986PathAbEmpty:
598 * @uri: pointer to an URI structure
599 * @str: the string to analyze
600 *
601 * Parse an path absolute or empty and fills in the appropriate fields
602 * of the @uri structure
603 *
604 * path-abempty = *( "/" segment )
605 *
606 * Returns 0 or the error code
607 */
608 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)609 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
610 {
611 const char *cur;
612 int ret;
613
614 cur = *str;
615
616 while (*cur == '/') {
617 cur++;
618 ret = xmlParse3986Segment(uri, &cur, 0, 1);
619 if (ret != 0) return(ret);
620 }
621 if (uri != NULL) {
622 if (uri->path != NULL) xmlFree(uri->path);
623 if (*str != cur) {
624 if (uri->cleanup & XML_URI_NO_UNESCAPE)
625 uri->path = STRNDUP(*str, cur - *str);
626 else
627 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
628 if (uri->path == NULL)
629 return (-1);
630 } else {
631 uri->path = NULL;
632 }
633 }
634 *str = cur;
635 return (0);
636 }
637
638 /**
639 * xmlParse3986PathAbsolute:
640 * @uri: pointer to an URI structure
641 * @str: the string to analyze
642 *
643 * Parse an path absolute and fills in the appropriate fields
644 * of the @uri structure
645 *
646 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
647 *
648 * Returns 0 or the error code
649 */
650 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)651 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
652 {
653 const char *cur;
654 int ret;
655
656 cur = *str;
657
658 if (*cur != '/')
659 return(1);
660 cur++;
661 ret = xmlParse3986Segment(uri, &cur, 0, 0);
662 if (ret == 0) {
663 while (*cur == '/') {
664 cur++;
665 ret = xmlParse3986Segment(uri, &cur, 0, 1);
666 if (ret != 0) return(ret);
667 }
668 }
669 if (uri != NULL) {
670 if (uri->path != NULL) xmlFree(uri->path);
671 if (cur != *str) {
672 if (uri->cleanup & XML_URI_NO_UNESCAPE)
673 uri->path = STRNDUP(*str, cur - *str);
674 else
675 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
676 if (uri->path == NULL)
677 return (-1);
678 } else {
679 uri->path = NULL;
680 }
681 }
682 *str = cur;
683 return (0);
684 }
685
686 /**
687 * xmlParse3986PathRootless:
688 * @uri: pointer to an URI structure
689 * @str: the string to analyze
690 *
691 * Parse an path without root and fills in the appropriate fields
692 * of the @uri structure
693 *
694 * path-rootless = segment-nz *( "/" segment )
695 *
696 * Returns 0 or the error code
697 */
698 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)699 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
700 {
701 const char *cur;
702 int ret;
703
704 cur = *str;
705
706 ret = xmlParse3986Segment(uri, &cur, 0, 0);
707 if (ret != 0) return(ret);
708 while (*cur == '/') {
709 cur++;
710 ret = xmlParse3986Segment(uri, &cur, 0, 1);
711 if (ret != 0) return(ret);
712 }
713 if (uri != NULL) {
714 if (uri->path != NULL) xmlFree(uri->path);
715 if (cur != *str) {
716 if (uri->cleanup & XML_URI_NO_UNESCAPE)
717 uri->path = STRNDUP(*str, cur - *str);
718 else
719 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
720 if (uri->path == NULL)
721 return (-1);
722 } else {
723 uri->path = NULL;
724 }
725 }
726 *str = cur;
727 return (0);
728 }
729
730 /**
731 * xmlParse3986PathNoScheme:
732 * @uri: pointer to an URI structure
733 * @str: the string to analyze
734 *
735 * Parse an path which is not a scheme and fills in the appropriate fields
736 * of the @uri structure
737 *
738 * path-noscheme = segment-nz-nc *( "/" segment )
739 *
740 * Returns 0 or the error code
741 */
742 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)743 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
744 {
745 const char *cur;
746 int ret;
747
748 cur = *str;
749
750 ret = xmlParse3986Segment(uri, &cur, ':', 0);
751 if (ret != 0) return(ret);
752 while (*cur == '/') {
753 cur++;
754 ret = xmlParse3986Segment(uri, &cur, 0, 1);
755 if (ret != 0) return(ret);
756 }
757 if (uri != NULL) {
758 if (uri->path != NULL) xmlFree(uri->path);
759 if (cur != *str) {
760 if (uri->cleanup & XML_URI_NO_UNESCAPE)
761 uri->path = STRNDUP(*str, cur - *str);
762 else
763 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
764 if (uri->path == NULL)
765 return (-1);
766 } else {
767 uri->path = NULL;
768 }
769 }
770 *str = cur;
771 return (0);
772 }
773
774 /**
775 * xmlParse3986HierPart:
776 * @uri: pointer to an URI structure
777 * @str: the string to analyze
778 *
779 * Parse an hierarchical part and fills in the appropriate fields
780 * of the @uri structure
781 *
782 * hier-part = "//" authority path-abempty
783 * / path-absolute
784 * / path-rootless
785 * / path-empty
786 *
787 * Returns 0 or the error code
788 */
789 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)790 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
791 {
792 const char *cur;
793 int ret;
794
795 cur = *str;
796
797 if ((*cur == '/') && (*(cur + 1) == '/')) {
798 cur += 2;
799 ret = xmlParse3986Authority(uri, &cur);
800 if (ret != 0) return(ret);
801 /*
802 * An empty server is marked with a special URI value.
803 */
804 if ((uri->server == NULL) && (uri->port == PORT_EMPTY))
805 uri->port = PORT_EMPTY_SERVER;
806 ret = xmlParse3986PathAbEmpty(uri, &cur);
807 if (ret != 0) return(ret);
808 *str = cur;
809 return(0);
810 } else if (*cur == '/') {
811 ret = xmlParse3986PathAbsolute(uri, &cur);
812 if (ret != 0) return(ret);
813 } else if (ISA_PCHAR(uri, cur)) {
814 ret = xmlParse3986PathRootless(uri, &cur);
815 if (ret != 0) return(ret);
816 } else {
817 /* path-empty is effectively empty */
818 if (uri != NULL) {
819 if (uri->path != NULL) xmlFree(uri->path);
820 uri->path = NULL;
821 }
822 }
823 *str = cur;
824 return (0);
825 }
826
827 /**
828 * xmlParse3986RelativeRef:
829 * @uri: pointer to an URI structure
830 * @str: the string to analyze
831 *
832 * Parse an URI string and fills in the appropriate fields
833 * of the @uri structure
834 *
835 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
836 * relative-part = "//" authority path-abempty
837 * / path-absolute
838 * / path-noscheme
839 * / path-empty
840 *
841 * Returns 0 or the error code
842 */
843 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)844 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
845 int ret;
846
847 if ((*str == '/') && (*(str + 1) == '/')) {
848 str += 2;
849 ret = xmlParse3986Authority(uri, &str);
850 if (ret != 0) return(ret);
851 ret = xmlParse3986PathAbEmpty(uri, &str);
852 if (ret != 0) return(ret);
853 } else if (*str == '/') {
854 ret = xmlParse3986PathAbsolute(uri, &str);
855 if (ret != 0) return(ret);
856 } else if (ISA_PCHAR(uri, str)) {
857 ret = xmlParse3986PathNoScheme(uri, &str);
858 if (ret != 0) return(ret);
859 } else {
860 /* path-empty is effectively empty */
861 if (uri != NULL) {
862 if (uri->path != NULL) xmlFree(uri->path);
863 uri->path = NULL;
864 }
865 }
866
867 if (*str == '?') {
868 str++;
869 ret = xmlParse3986Query(uri, &str);
870 if (ret != 0) return(ret);
871 }
872 if (*str == '#') {
873 str++;
874 ret = xmlParse3986Fragment(uri, &str);
875 if (ret != 0) return(ret);
876 }
877 if (*str != 0) {
878 xmlCleanURI(uri);
879 return(1);
880 }
881 return(0);
882 }
883
884
885 /**
886 * xmlParse3986URI:
887 * @uri: pointer to an URI structure
888 * @str: the string to analyze
889 *
890 * Parse an URI string and fills in the appropriate fields
891 * of the @uri structure
892 *
893 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
894 *
895 * Returns 0 or the error code
896 */
897 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)898 xmlParse3986URI(xmlURIPtr uri, const char *str) {
899 int ret;
900
901 ret = xmlParse3986Scheme(uri, &str);
902 if (ret != 0) return(ret);
903 if (*str != ':') {
904 return(1);
905 }
906 str++;
907 ret = xmlParse3986HierPart(uri, &str);
908 if (ret != 0) return(ret);
909 if (*str == '?') {
910 str++;
911 ret = xmlParse3986Query(uri, &str);
912 if (ret != 0) return(ret);
913 }
914 if (*str == '#') {
915 str++;
916 ret = xmlParse3986Fragment(uri, &str);
917 if (ret != 0) return(ret);
918 }
919 if (*str != 0) {
920 xmlCleanURI(uri);
921 return(1);
922 }
923 return(0);
924 }
925
926 /**
927 * xmlParse3986URIReference:
928 * @uri: pointer to an URI structure
929 * @str: the string to analyze
930 *
931 * Parse an URI reference string and fills in the appropriate fields
932 * of the @uri structure
933 *
934 * URI-reference = URI / relative-ref
935 *
936 * Returns 0 or the error code
937 */
938 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)939 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
940 int ret;
941
942 if (str == NULL)
943 return(-1);
944 xmlCleanURI(uri);
945
946 /*
947 * Try first to parse absolute refs, then fallback to relative if
948 * it fails.
949 */
950 ret = xmlParse3986URI(uri, str);
951 if (ret < 0)
952 return(ret);
953 if (ret != 0) {
954 xmlCleanURI(uri);
955 ret = xmlParse3986RelativeRef(uri, str);
956 if (ret != 0) {
957 xmlCleanURI(uri);
958 return(ret);
959 }
960 }
961 return(0);
962 }
963
964 /**
965 * xmlParseURISafe:
966 * @str: the URI string to analyze
967 * @uriOut: optional pointer to parsed URI
968 *
969 * Parse an URI based on RFC 3986
970 *
971 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
972 *
973 * Returns 0 on success, an error code (typically 1) if the URI is invalid
974 * or -1 if a memory allocation failed.
975 */
976 int
xmlParseURISafe(const char * str,xmlURIPtr * uriOut)977 xmlParseURISafe(const char *str, xmlURIPtr *uriOut) {
978 xmlURIPtr uri;
979 int ret;
980
981 if (uriOut != NULL)
982 *uriOut = NULL;
983 if (str == NULL)
984 return(1);
985
986 uri = xmlCreateURI();
987 if (uri == NULL)
988 return(-1);
989
990 ret = xmlParse3986URIReference(uri, str);
991 if (ret) {
992 xmlFreeURI(uri);
993 return(ret);
994 }
995
996 if (uriOut != NULL)
997 *uriOut = uri;
998 return(0);
999 }
1000
1001 /**
1002 * xmlParseURI:
1003 * @str: the URI string to analyze
1004 *
1005 * Parse an URI based on RFC 3986
1006 *
1007 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1008 *
1009 * Returns a newly built xmlURIPtr or NULL in case of error
1010 */
1011 xmlURIPtr
xmlParseURI(const char * str)1012 xmlParseURI(const char *str) {
1013 xmlURIPtr uri;
1014 xmlParseURISafe(str, &uri);
1015 return(uri);
1016 }
1017
1018 /**
1019 * xmlParseURIReference:
1020 * @uri: pointer to an URI structure
1021 * @str: the string to analyze
1022 *
1023 * Parse an URI reference string based on RFC 3986 and fills in the
1024 * appropriate fields of the @uri structure
1025 *
1026 * URI-reference = URI / relative-ref
1027 *
1028 * Returns 0 or the error code
1029 */
1030 int
xmlParseURIReference(xmlURIPtr uri,const char * str)1031 xmlParseURIReference(xmlURIPtr uri, const char *str) {
1032 return(xmlParse3986URIReference(uri, str));
1033 }
1034
1035 /**
1036 * xmlParseURIRaw:
1037 * @str: the URI string to analyze
1038 * @raw: if 1 unescaping of URI pieces are disabled
1039 *
1040 * Parse an URI but allows to keep intact the original fragments.
1041 *
1042 * URI-reference = URI / relative-ref
1043 *
1044 * Returns a newly built xmlURIPtr or NULL in case of error
1045 */
1046 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)1047 xmlParseURIRaw(const char *str, int raw) {
1048 xmlURIPtr uri;
1049 int ret;
1050
1051 if (str == NULL)
1052 return(NULL);
1053 uri = xmlCreateURI();
1054 if (uri != NULL) {
1055 if (raw) {
1056 uri->cleanup |= XML_URI_NO_UNESCAPE;
1057 }
1058 ret = xmlParseURIReference(uri, str);
1059 if (ret) {
1060 xmlFreeURI(uri);
1061 return(NULL);
1062 }
1063 }
1064 return(uri);
1065 }
1066
1067 /************************************************************************
1068 * *
1069 * Generic URI structure functions *
1070 * *
1071 ************************************************************************/
1072
1073 /**
1074 * xmlCreateURI:
1075 *
1076 * Simply creates an empty xmlURI
1077 *
1078 * Returns the new structure or NULL in case of error
1079 */
1080 xmlURIPtr
xmlCreateURI(void)1081 xmlCreateURI(void) {
1082 xmlURIPtr ret;
1083
1084 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1085 if (ret == NULL)
1086 return(NULL);
1087 memset(ret, 0, sizeof(xmlURI));
1088 ret->port = PORT_EMPTY;
1089 return(ret);
1090 }
1091
1092 /**
1093 * xmlSaveUriRealloc:
1094 *
1095 * Function to handle properly a reallocation when saving an URI
1096 * Also imposes some limit on the length of an URI string output
1097 */
1098 static xmlChar *
xmlSaveUriRealloc(xmlChar * ret,int * max)1099 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1100 xmlChar *temp;
1101 int tmp;
1102
1103 if (*max > MAX_URI_LENGTH)
1104 return(NULL);
1105 tmp = *max * 2;
1106 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1107 if (temp == NULL)
1108 return(NULL);
1109 *max = tmp;
1110 return(temp);
1111 }
1112
1113 /**
1114 * xmlSaveUri:
1115 * @uri: pointer to an xmlURI
1116 *
1117 * Save the URI as an escaped string
1118 *
1119 * Returns a new string (to be deallocated by caller)
1120 */
1121 xmlChar *
xmlSaveUri(xmlURIPtr uri)1122 xmlSaveUri(xmlURIPtr uri) {
1123 xmlChar *ret = NULL;
1124 xmlChar *temp;
1125 const char *p;
1126 int len;
1127 int max;
1128
1129 if (uri == NULL) return(NULL);
1130
1131
1132 max = 80;
1133 ret = (xmlChar *) xmlMallocAtomic(max + 1);
1134 if (ret == NULL)
1135 return(NULL);
1136 len = 0;
1137
1138 if (uri->scheme != NULL) {
1139 p = uri->scheme;
1140 while (*p != 0) {
1141 if (len >= max) {
1142 temp = xmlSaveUriRealloc(ret, &max);
1143 if (temp == NULL) goto mem_error;
1144 ret = temp;
1145 }
1146 ret[len++] = *p++;
1147 }
1148 if (len >= max) {
1149 temp = xmlSaveUriRealloc(ret, &max);
1150 if (temp == NULL) goto mem_error;
1151 ret = temp;
1152 }
1153 ret[len++] = ':';
1154 }
1155 if (uri->opaque != NULL) {
1156 p = uri->opaque;
1157 while (*p != 0) {
1158 if (len + 3 >= max) {
1159 temp = xmlSaveUriRealloc(ret, &max);
1160 if (temp == NULL) goto mem_error;
1161 ret = temp;
1162 }
1163 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1164 ret[len++] = *p++;
1165 else {
1166 int val = *(unsigned char *)p++;
1167 int hi = val / 0x10, lo = val % 0x10;
1168 ret[len++] = '%';
1169 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1170 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1171 }
1172 }
1173 } else {
1174 if ((uri->server != NULL) || (uri->port != PORT_EMPTY)) {
1175 if (len + 3 >= max) {
1176 temp = xmlSaveUriRealloc(ret, &max);
1177 if (temp == NULL) goto mem_error;
1178 ret = temp;
1179 }
1180 ret[len++] = '/';
1181 ret[len++] = '/';
1182 if (uri->user != NULL) {
1183 p = uri->user;
1184 while (*p != 0) {
1185 if (len + 3 >= max) {
1186 temp = xmlSaveUriRealloc(ret, &max);
1187 if (temp == NULL) goto mem_error;
1188 ret = temp;
1189 }
1190 if ((IS_UNRESERVED(*(p))) ||
1191 ((*(p) == ';')) || ((*(p) == ':')) ||
1192 ((*(p) == '&')) || ((*(p) == '=')) ||
1193 ((*(p) == '+')) || ((*(p) == '$')) ||
1194 ((*(p) == ',')))
1195 ret[len++] = *p++;
1196 else {
1197 int val = *(unsigned char *)p++;
1198 int hi = val / 0x10, lo = val % 0x10;
1199 ret[len++] = '%';
1200 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1201 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1202 }
1203 }
1204 if (len + 3 >= max) {
1205 temp = xmlSaveUriRealloc(ret, &max);
1206 if (temp == NULL) goto mem_error;
1207 ret = temp;
1208 }
1209 ret[len++] = '@';
1210 }
1211 if (uri->server != NULL) {
1212 p = uri->server;
1213 while (*p != 0) {
1214 if (len >= max) {
1215 temp = xmlSaveUriRealloc(ret, &max);
1216 if (temp == NULL) goto mem_error;
1217 ret = temp;
1218 }
1219 /* TODO: escaping? */
1220 ret[len++] = (xmlChar) *p++;
1221 }
1222 }
1223 if (uri->port > 0) {
1224 if (len + 10 >= max) {
1225 temp = xmlSaveUriRealloc(ret, &max);
1226 if (temp == NULL) goto mem_error;
1227 ret = temp;
1228 }
1229 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1230 }
1231 } else if (uri->authority != NULL) {
1232 if (len + 3 >= max) {
1233 temp = xmlSaveUriRealloc(ret, &max);
1234 if (temp == NULL) goto mem_error;
1235 ret = temp;
1236 }
1237 ret[len++] = '/';
1238 ret[len++] = '/';
1239 p = uri->authority;
1240 while (*p != 0) {
1241 if (len + 3 >= max) {
1242 temp = xmlSaveUriRealloc(ret, &max);
1243 if (temp == NULL) goto mem_error;
1244 ret = temp;
1245 }
1246 if ((IS_UNRESERVED(*(p))) ||
1247 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1248 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1249 ((*(p) == '=')) || ((*(p) == '+')))
1250 ret[len++] = *p++;
1251 else {
1252 int val = *(unsigned char *)p++;
1253 int hi = val / 0x10, lo = val % 0x10;
1254 ret[len++] = '%';
1255 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1256 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1257 }
1258 }
1259 } else if (uri->scheme != NULL) {
1260 if (len + 3 >= max) {
1261 temp = xmlSaveUriRealloc(ret, &max);
1262 if (temp == NULL) goto mem_error;
1263 ret = temp;
1264 }
1265 }
1266 if (uri->path != NULL) {
1267 p = uri->path;
1268 /*
1269 * the colon in file:///d: should not be escaped or
1270 * Windows accesses fail later.
1271 */
1272 if ((uri->scheme != NULL) &&
1273 (p[0] == '/') &&
1274 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1275 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1276 (p[2] == ':') &&
1277 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1278 if (len + 3 >= max) {
1279 temp = xmlSaveUriRealloc(ret, &max);
1280 if (temp == NULL) goto mem_error;
1281 ret = temp;
1282 }
1283 ret[len++] = *p++;
1284 ret[len++] = *p++;
1285 ret[len++] = *p++;
1286 }
1287 while (*p != 0) {
1288 if (len + 3 >= max) {
1289 temp = xmlSaveUriRealloc(ret, &max);
1290 if (temp == NULL) goto mem_error;
1291 ret = temp;
1292 }
1293 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1294 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1295 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1296 ((*(p) == ',')))
1297 ret[len++] = *p++;
1298 else {
1299 int val = *(unsigned char *)p++;
1300 int hi = val / 0x10, lo = val % 0x10;
1301 ret[len++] = '%';
1302 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1303 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1304 }
1305 }
1306 }
1307 if (uri->query_raw != NULL) {
1308 if (len + 1 >= max) {
1309 temp = xmlSaveUriRealloc(ret, &max);
1310 if (temp == NULL) goto mem_error;
1311 ret = temp;
1312 }
1313 ret[len++] = '?';
1314 p = uri->query_raw;
1315 while (*p != 0) {
1316 if (len + 1 >= max) {
1317 temp = xmlSaveUriRealloc(ret, &max);
1318 if (temp == NULL) goto mem_error;
1319 ret = temp;
1320 }
1321 ret[len++] = *p++;
1322 }
1323 } else if (uri->query != NULL) {
1324 if (len + 3 >= max) {
1325 temp = xmlSaveUriRealloc(ret, &max);
1326 if (temp == NULL) goto mem_error;
1327 ret = temp;
1328 }
1329 ret[len++] = '?';
1330 p = uri->query;
1331 while (*p != 0) {
1332 if (len + 3 >= max) {
1333 temp = xmlSaveUriRealloc(ret, &max);
1334 if (temp == NULL) goto mem_error;
1335 ret = temp;
1336 }
1337 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1338 ret[len++] = *p++;
1339 else {
1340 int val = *(unsigned char *)p++;
1341 int hi = val / 0x10, lo = val % 0x10;
1342 ret[len++] = '%';
1343 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1344 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1345 }
1346 }
1347 }
1348 }
1349 if (uri->fragment != NULL) {
1350 if (len + 3 >= max) {
1351 temp = xmlSaveUriRealloc(ret, &max);
1352 if (temp == NULL) goto mem_error;
1353 ret = temp;
1354 }
1355 ret[len++] = '#';
1356 p = uri->fragment;
1357 while (*p != 0) {
1358 if (len + 3 >= max) {
1359 temp = xmlSaveUriRealloc(ret, &max);
1360 if (temp == NULL) goto mem_error;
1361 ret = temp;
1362 }
1363 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1364 ret[len++] = *p++;
1365 else {
1366 int val = *(unsigned char *)p++;
1367 int hi = val / 0x10, lo = val % 0x10;
1368 ret[len++] = '%';
1369 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1370 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1371 }
1372 }
1373 }
1374 if (len >= max) {
1375 temp = xmlSaveUriRealloc(ret, &max);
1376 if (temp == NULL) goto mem_error;
1377 ret = temp;
1378 }
1379 ret[len] = 0;
1380 return(ret);
1381
1382 mem_error:
1383 xmlFree(ret);
1384 return(NULL);
1385 }
1386
1387 /**
1388 * xmlPrintURI:
1389 * @stream: a FILE* for the output
1390 * @uri: pointer to an xmlURI
1391 *
1392 * Prints the URI in the stream @stream.
1393 */
1394 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1395 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1396 xmlChar *out;
1397
1398 out = xmlSaveUri(uri);
1399 if (out != NULL) {
1400 fprintf(stream, "%s", (char *) out);
1401 xmlFree(out);
1402 }
1403 }
1404
1405 /**
1406 * xmlCleanURI:
1407 * @uri: pointer to an xmlURI
1408 *
1409 * Make sure the xmlURI struct is free of content
1410 */
1411 static void
xmlCleanURI(xmlURIPtr uri)1412 xmlCleanURI(xmlURIPtr uri) {
1413 if (uri == NULL) return;
1414
1415 if (uri->scheme != NULL) xmlFree(uri->scheme);
1416 uri->scheme = NULL;
1417 if (uri->server != NULL) xmlFree(uri->server);
1418 uri->server = NULL;
1419 if (uri->user != NULL) xmlFree(uri->user);
1420 uri->user = NULL;
1421 if (uri->path != NULL) xmlFree(uri->path);
1422 uri->path = NULL;
1423 if (uri->fragment != NULL) xmlFree(uri->fragment);
1424 uri->fragment = NULL;
1425 if (uri->opaque != NULL) xmlFree(uri->opaque);
1426 uri->opaque = NULL;
1427 if (uri->authority != NULL) xmlFree(uri->authority);
1428 uri->authority = NULL;
1429 if (uri->query != NULL) xmlFree(uri->query);
1430 uri->query = NULL;
1431 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1432 uri->query_raw = NULL;
1433 }
1434
1435 /**
1436 * xmlFreeURI:
1437 * @uri: pointer to an xmlURI
1438 *
1439 * Free up the xmlURI struct
1440 */
1441 void
xmlFreeURI(xmlURIPtr uri)1442 xmlFreeURI(xmlURIPtr uri) {
1443 if (uri == NULL) return;
1444
1445 if (uri->scheme != NULL) xmlFree(uri->scheme);
1446 if (uri->server != NULL) xmlFree(uri->server);
1447 if (uri->user != NULL) xmlFree(uri->user);
1448 if (uri->path != NULL) xmlFree(uri->path);
1449 if (uri->fragment != NULL) xmlFree(uri->fragment);
1450 if (uri->opaque != NULL) xmlFree(uri->opaque);
1451 if (uri->authority != NULL) xmlFree(uri->authority);
1452 if (uri->query != NULL) xmlFree(uri->query);
1453 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1454 xmlFree(uri);
1455 }
1456
1457 /************************************************************************
1458 * *
1459 * Helper functions *
1460 * *
1461 ************************************************************************/
1462
1463 static int
xmlIsPathSeparator(int c,int isFile)1464 xmlIsPathSeparator(int c, int isFile) {
1465 (void) isFile;
1466
1467 if (c == '/')
1468 return(1);
1469
1470 #ifdef _WIN32
1471 if (isFile && (c == '\\'))
1472 return(1);
1473 #endif
1474
1475 return(0);
1476 }
1477
1478 /**
1479 * xmlNormalizePath:
1480 * @path: pointer to the path string
1481 * @isFile: true for filesystem paths, false for URIs
1482 *
1483 * Normalize a filesystem path or URI.
1484 *
1485 * Returns 0 or an error code
1486 */
1487 static int
xmlNormalizePath(char * path,int isFile)1488 xmlNormalizePath(char *path, int isFile) {
1489 char *cur, *out;
1490 int numSeg = 0;
1491
1492 if (path == NULL)
1493 return(-1);
1494
1495 cur = path;
1496 out = path;
1497
1498 if (*cur == 0)
1499 return(0);
1500
1501 if (xmlIsPathSeparator(*cur, isFile)) {
1502 cur++;
1503 *out++ = '/';
1504 }
1505
1506 while (*cur != 0) {
1507 /*
1508 * At this point, out is either empty or ends with a separator.
1509 * Collapse multiple separators first.
1510 */
1511 while (xmlIsPathSeparator(*cur, isFile)) {
1512 #ifdef _WIN32
1513 /* Allow two separators at start of path */
1514 if ((isFile) && (out == path + 1))
1515 *out++ = '/';
1516 #endif
1517 cur++;
1518 }
1519
1520 if (*cur == '.') {
1521 if (cur[1] == 0) {
1522 /* Ignore "." at end of path */
1523 break;
1524 } else if (xmlIsPathSeparator(cur[1], isFile)) {
1525 /* Skip "./" */
1526 cur += 2;
1527 continue;
1528 } else if ((cur[1] == '.') &&
1529 ((cur[2] == 0) || xmlIsPathSeparator(cur[2], isFile))) {
1530 if (numSeg > 0) {
1531 /* Handle ".." by removing last segment */
1532 do {
1533 out--;
1534 } while ((out > path) &&
1535 !xmlIsPathSeparator(out[-1], isFile));
1536 numSeg--;
1537
1538 if (cur[2] == 0)
1539 break;
1540 cur += 3;
1541 continue;
1542 } else if (out[0] == '/') {
1543 /* Ignore extraneous ".." in absolute paths */
1544 if (cur[2] == 0)
1545 break;
1546 cur += 3;
1547 continue;
1548 } else {
1549 /* Keep "../" at start of relative path */
1550 numSeg--;
1551 }
1552 }
1553 }
1554
1555 /* Copy segment */
1556 while ((*cur != 0) && !xmlIsPathSeparator(*cur, isFile)) {
1557 *out++ = *cur++;
1558 }
1559
1560 /* Copy separator */
1561 if (*cur != 0) {
1562 cur++;
1563 *out++ = '/';
1564 }
1565
1566 numSeg++;
1567 }
1568
1569 /* Keep "." if output is empty and it's a file */
1570 if ((isFile) && (out <= path))
1571 *out++ = '.';
1572 *out = 0;
1573
1574 return(0);
1575 }
1576
1577 /**
1578 * xmlNormalizeURIPath:
1579 * @path: pointer to the path string
1580 *
1581 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1582 * Section 5.2, steps 6.c through 6.g.
1583 *
1584 * Normalization occurs directly on the string, no new allocation is done
1585 *
1586 * Returns 0 or an error code
1587 */
1588 int
xmlNormalizeURIPath(char * path)1589 xmlNormalizeURIPath(char *path) {
1590 return(xmlNormalizePath(path, 0));
1591 }
1592
is_hex(char c)1593 static int is_hex(char c) {
1594 if (((c >= '0') && (c <= '9')) ||
1595 ((c >= 'a') && (c <= 'f')) ||
1596 ((c >= 'A') && (c <= 'F')))
1597 return(1);
1598 return(0);
1599 }
1600
1601 /**
1602 * xmlURIUnescapeString:
1603 * @str: the string to unescape
1604 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1605 * @target: optional destination buffer
1606 *
1607 * Unescaping routine, but does not check that the string is an URI. The
1608 * output is a direct unsigned char translation of %XX values (no encoding)
1609 * Note that the length of the result can only be smaller or same size as
1610 * the input string.
1611 *
1612 * Returns a copy of the string, but unescaped, will return NULL only in case
1613 * of error
1614 */
1615 char *
xmlURIUnescapeString(const char * str,int len,char * target)1616 xmlURIUnescapeString(const char *str, int len, char *target) {
1617 char *ret, *out;
1618 const char *in;
1619
1620 if (str == NULL)
1621 return(NULL);
1622 if (len <= 0) len = strlen(str);
1623 if (len < 0) return(NULL);
1624
1625 if (target == NULL) {
1626 ret = (char *) xmlMallocAtomic(len + 1);
1627 if (ret == NULL)
1628 return(NULL);
1629 } else
1630 ret = target;
1631 in = str;
1632 out = ret;
1633 while(len > 0) {
1634 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1635 int c = 0;
1636 in++;
1637 if ((*in >= '0') && (*in <= '9'))
1638 c = (*in - '0');
1639 else if ((*in >= 'a') && (*in <= 'f'))
1640 c = (*in - 'a') + 10;
1641 else if ((*in >= 'A') && (*in <= 'F'))
1642 c = (*in - 'A') + 10;
1643 in++;
1644 if ((*in >= '0') && (*in <= '9'))
1645 c = c * 16 + (*in - '0');
1646 else if ((*in >= 'a') && (*in <= 'f'))
1647 c = c * 16 + (*in - 'a') + 10;
1648 else if ((*in >= 'A') && (*in <= 'F'))
1649 c = c * 16 + (*in - 'A') + 10;
1650 in++;
1651 len -= 3;
1652 /* Explicit sign change */
1653 *out++ = (char) c;
1654 } else {
1655 *out++ = *in++;
1656 len--;
1657 }
1658 }
1659 *out = 0;
1660 return(ret);
1661 }
1662
1663 /**
1664 * xmlURIEscapeStr:
1665 * @str: string to escape
1666 * @list: exception list string of chars not to escape
1667 *
1668 * This routine escapes a string to hex, ignoring unreserved characters
1669 * a-z, A-Z, 0-9, "-._~", a few sub-delims "!*'()", the gen-delim "@"
1670 * (why?) and the characters in the exception list.
1671 *
1672 * Returns a new escaped string or NULL in case of error.
1673 */
1674 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1675 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1676 xmlChar *ret, ch;
1677 xmlChar *temp;
1678 const xmlChar *in;
1679 int len, out;
1680
1681 if (str == NULL)
1682 return(NULL);
1683 if (str[0] == 0)
1684 return(xmlStrdup(str));
1685 len = xmlStrlen(str);
1686
1687 len += 20;
1688 ret = (xmlChar *) xmlMallocAtomic(len);
1689 if (ret == NULL)
1690 return(NULL);
1691 in = (const xmlChar *) str;
1692 out = 0;
1693 while(*in != 0) {
1694 if (len - out <= 3) {
1695 if (len > INT_MAX / 2)
1696 return(NULL);
1697 temp = xmlRealloc(ret, len * 2);
1698 if (temp == NULL) {
1699 xmlFree(ret);
1700 return(NULL);
1701 }
1702 ret = temp;
1703 len *= 2;
1704 }
1705
1706 ch = *in;
1707
1708 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1709 unsigned char val;
1710 ret[out++] = '%';
1711 val = ch >> 4;
1712 if (val <= 9)
1713 ret[out++] = '0' + val;
1714 else
1715 ret[out++] = 'A' + val - 0xA;
1716 val = ch & 0xF;
1717 if (val <= 9)
1718 ret[out++] = '0' + val;
1719 else
1720 ret[out++] = 'A' + val - 0xA;
1721 in++;
1722 } else {
1723 ret[out++] = *in++;
1724 }
1725
1726 }
1727 ret[out] = 0;
1728 return(ret);
1729 }
1730
1731 /**
1732 * xmlURIEscape:
1733 * @str: the string of the URI to escape
1734 *
1735 * Escaping routine, does not do validity checks !
1736 * It will try to escape the chars needing this, but this is heuristic
1737 * based it's impossible to be sure.
1738 *
1739 * Returns an copy of the string, but escaped
1740 *
1741 * 25 May 2001
1742 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1743 * according to RFC2396.
1744 * - Carl Douglas
1745 */
1746 xmlChar *
xmlURIEscape(const xmlChar * str)1747 xmlURIEscape(const xmlChar * str)
1748 {
1749 xmlChar *ret, *segment = NULL;
1750 xmlURIPtr uri;
1751 int ret2;
1752
1753 if (str == NULL)
1754 return (NULL);
1755
1756 uri = xmlCreateURI();
1757 if (uri != NULL) {
1758 /*
1759 * Allow escaping errors in the unescaped form
1760 */
1761 uri->cleanup = XML_URI_ALLOW_UNWISE;
1762 ret2 = xmlParseURIReference(uri, (const char *)str);
1763 if (ret2) {
1764 xmlFreeURI(uri);
1765 return (NULL);
1766 }
1767 }
1768
1769 if (!uri)
1770 return NULL;
1771
1772 ret = NULL;
1773
1774 #define NULLCHK(p) if(!p) { \
1775 xmlFreeURI(uri); \
1776 xmlFree(ret); \
1777 return NULL; } \
1778
1779 if (uri->scheme) {
1780 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1781 NULLCHK(segment)
1782 ret = xmlStrcat(ret, segment);
1783 ret = xmlStrcat(ret, BAD_CAST ":");
1784 xmlFree(segment);
1785 }
1786
1787 if (uri->authority) {
1788 segment =
1789 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1790 NULLCHK(segment)
1791 ret = xmlStrcat(ret, BAD_CAST "//");
1792 ret = xmlStrcat(ret, segment);
1793 xmlFree(segment);
1794 }
1795
1796 if (uri->user) {
1797 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1798 NULLCHK(segment)
1799 ret = xmlStrcat(ret,BAD_CAST "//");
1800 ret = xmlStrcat(ret, segment);
1801 ret = xmlStrcat(ret, BAD_CAST "@");
1802 xmlFree(segment);
1803 }
1804
1805 if (uri->server) {
1806 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1807 NULLCHK(segment)
1808 if (uri->user == NULL)
1809 ret = xmlStrcat(ret, BAD_CAST "//");
1810 ret = xmlStrcat(ret, segment);
1811 xmlFree(segment);
1812 }
1813
1814 if (uri->port > 0) {
1815 xmlChar port[11];
1816
1817 snprintf((char *) port, 11, "%d", uri->port);
1818 ret = xmlStrcat(ret, BAD_CAST ":");
1819 ret = xmlStrcat(ret, port);
1820 }
1821
1822 if (uri->path) {
1823 segment =
1824 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1825 NULLCHK(segment)
1826 ret = xmlStrcat(ret, segment);
1827 xmlFree(segment);
1828 }
1829
1830 if (uri->query_raw) {
1831 ret = xmlStrcat(ret, BAD_CAST "?");
1832 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1833 }
1834 else if (uri->query) {
1835 segment =
1836 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1837 NULLCHK(segment)
1838 ret = xmlStrcat(ret, BAD_CAST "?");
1839 ret = xmlStrcat(ret, segment);
1840 xmlFree(segment);
1841 }
1842
1843 if (uri->opaque) {
1844 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1845 NULLCHK(segment)
1846 ret = xmlStrcat(ret, segment);
1847 xmlFree(segment);
1848 }
1849
1850 if (uri->fragment) {
1851 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1852 NULLCHK(segment)
1853 ret = xmlStrcat(ret, BAD_CAST "#");
1854 ret = xmlStrcat(ret, segment);
1855 xmlFree(segment);
1856 }
1857
1858 xmlFreeURI(uri);
1859 #undef NULLCHK
1860
1861 return (ret);
1862 }
1863
1864 /************************************************************************
1865 * *
1866 * Public functions *
1867 * *
1868 ************************************************************************/
1869
1870 static int
xmlIsAbsolutePath(const xmlChar * path)1871 xmlIsAbsolutePath(const xmlChar *path) {
1872 int c = path[0];
1873
1874 if (xmlIsPathSeparator(c, 1))
1875 return(1);
1876
1877 #ifdef _WIN32
1878 if ((((c >= 'A') && (c <= 'Z')) ||
1879 ((c >= 'a') && (c <= 'z'))) &&
1880 (path[1] == ':'))
1881 return(1);
1882 #endif
1883
1884 return(0);
1885 }
1886
1887 /**
1888 * xmlResolvePath:
1889 * @ref: the filesystem path
1890 * @base: the base value
1891 * @out: pointer to result URI
1892 *
1893 * Resolves a filesystem path from a base path.
1894 *
1895 * Returns 0 on success, -1 if a memory allocation failed or an error
1896 * code if URI or base are invalid.
1897 */
1898 static int
xmlResolvePath(const xmlChar * escRef,const xmlChar * base,xmlChar ** out)1899 xmlResolvePath(const xmlChar *escRef, const xmlChar *base, xmlChar **out) {
1900 const xmlChar *fragment;
1901 xmlChar *tmp = NULL;
1902 xmlChar *ref = NULL;
1903 xmlChar *result = NULL;
1904 int ret = -1;
1905 int i;
1906
1907 if (out == NULL)
1908 return(1);
1909 *out = NULL;
1910
1911 if ((escRef == NULL) || (escRef[0] == 0)) {
1912 if ((base == NULL) || (base[0] == 0))
1913 return(1);
1914 ref = xmlStrdup(base);
1915 if (ref == NULL)
1916 goto err_memory;
1917 *out = ref;
1918 return(0);
1919 }
1920
1921 /*
1922 * If a URI is resolved, we can assume it is a valid URI and not
1923 * a filesystem path. This means we have to unescape the part
1924 * before the fragment.
1925 */
1926 fragment = xmlStrchr(escRef, '#');
1927 if (fragment != NULL) {
1928 tmp = xmlStrndup(escRef, fragment - escRef);
1929 if (tmp == NULL)
1930 goto err_memory;
1931 escRef = tmp;
1932 }
1933
1934 ref = (xmlChar *) xmlURIUnescapeString((char *) escRef, -1, NULL);
1935 if (ref == NULL)
1936 goto err_memory;
1937
1938 if ((base == NULL) || (base[0] == 0))
1939 goto done;
1940
1941 if (xmlIsAbsolutePath(ref))
1942 goto done;
1943
1944 /*
1945 * Remove last segment from base
1946 */
1947 i = xmlStrlen(base);
1948 while ((i > 0) && !xmlIsPathSeparator(base[i-1], 1))
1949 i--;
1950
1951 /*
1952 * Concatenate base and ref
1953 */
1954 if (i > 0) {
1955 int refLen = xmlStrlen(ref);
1956
1957 result = xmlMalloc(i + refLen + 1);
1958 if (result == NULL)
1959 goto err_memory;
1960
1961 memcpy(result, base, i);
1962 memcpy(result + i, ref, refLen + 1);
1963 }
1964
1965 /*
1966 * Normalize
1967 */
1968 xmlNormalizePath((char *) result, 1);
1969
1970 done:
1971 if (result == NULL) {
1972 result = ref;
1973 ref = NULL;
1974 }
1975
1976 if (fragment != NULL) {
1977 result = xmlStrcat(result, fragment);
1978 if (result == NULL)
1979 goto err_memory;
1980 }
1981
1982 *out = result;
1983 ret = 0;
1984
1985 err_memory:
1986 xmlFree(tmp);
1987 xmlFree(ref);
1988 return(ret);
1989 }
1990
1991 /**
1992 * xmlBulidURISafe:
1993 * @URI: the URI instance found in the document
1994 * @base: the base value
1995 * @valPtr: pointer to result URI
1996 *
1997 * Computes he final URI of the reference done by checking that
1998 * the given URI is valid, and building the final URI using the
1999 * base URI. This is processed according to section 5.2 of the
2000 * RFC 2396
2001 *
2002 * 5.2. Resolving Relative References to Absolute Form
2003 *
2004 * Returns 0 on success, -1 if a memory allocation failed or an error
2005 * code if URI or base are invalid.
2006 */
2007 int
xmlBuildURISafe(const xmlChar * URI,const xmlChar * base,xmlChar ** valPtr)2008 xmlBuildURISafe(const xmlChar *URI, const xmlChar *base, xmlChar **valPtr) {
2009 xmlChar *val = NULL;
2010 int ret, len, indx, cur, out;
2011 xmlURIPtr ref = NULL;
2012 xmlURIPtr bas = NULL;
2013 xmlURIPtr res = NULL;
2014
2015 /*
2016 * 1) The URI reference is parsed into the potential four components and
2017 * fragment identifier, as described in Section 4.3.
2018 *
2019 * NOTE that a completely empty URI is treated by modern browsers
2020 * as a reference to "." rather than as a synonym for the current
2021 * URI. Should we do that here?
2022 */
2023 if (URI == NULL)
2024 ret = 1;
2025 else if (URI[0] != 0)
2026 ret = xmlParseURISafe((const char *) URI, &ref);
2027 else
2028 ret = 0;
2029 if (ret != 0)
2030 goto done;
2031 if ((ref != NULL) && (ref->scheme != NULL)) {
2032 /*
2033 * The URI is absolute don't modify.
2034 */
2035 val = xmlStrdup(URI);
2036 if (val == NULL)
2037 ret = -1;
2038 goto done;
2039 }
2040
2041 /*
2042 * If base has no scheme or authority, it is assumed to be a
2043 * filesystem path.
2044 */
2045 if (xmlStrstr(base, BAD_CAST "://") == NULL) {
2046 xmlFreeURI(ref);
2047 return(xmlResolvePath(URI, base, valPtr));
2048 }
2049
2050 ret = xmlParseURISafe((const char *) base, &bas);
2051 if (ret < 0)
2052 goto done;
2053 if (ret != 0) {
2054 if (ref) {
2055 ret = 0;
2056 val = xmlSaveUri(ref);
2057 if (val == NULL)
2058 ret = -1;
2059 }
2060 goto done;
2061 }
2062 if (ref == NULL) {
2063 /*
2064 * the base fragment must be ignored
2065 */
2066 if (bas->fragment != NULL) {
2067 xmlFree(bas->fragment);
2068 bas->fragment = NULL;
2069 }
2070 val = xmlSaveUri(bas);
2071 if (val == NULL)
2072 ret = -1;
2073 goto done;
2074 }
2075
2076 /*
2077 * 2) If the path component is empty and the scheme, authority, and
2078 * query components are undefined, then it is a reference to the
2079 * current document and we are done. Otherwise, the reference URI's
2080 * query and fragment components are defined as found (or not found)
2081 * within the URI reference and not inherited from the base URI.
2082 *
2083 * NOTE that in modern browsers, the parsing differs from the above
2084 * in the following aspect: the query component is allowed to be
2085 * defined while still treating this as a reference to the current
2086 * document.
2087 */
2088 ret = -1;
2089 res = xmlCreateURI();
2090 if (res == NULL)
2091 goto done;
2092 if ((ref->scheme == NULL) && (ref->path == NULL) &&
2093 ((ref->authority == NULL) && (ref->server == NULL) &&
2094 (ref->port == PORT_EMPTY))) {
2095 if (bas->scheme != NULL) {
2096 res->scheme = xmlMemStrdup(bas->scheme);
2097 if (res->scheme == NULL)
2098 goto done;
2099 }
2100 if (bas->authority != NULL) {
2101 res->authority = xmlMemStrdup(bas->authority);
2102 if (res->authority == NULL)
2103 goto done;
2104 } else {
2105 if (bas->server != NULL) {
2106 res->server = xmlMemStrdup(bas->server);
2107 if (res->server == NULL)
2108 goto done;
2109 }
2110 if (bas->user != NULL) {
2111 res->user = xmlMemStrdup(bas->user);
2112 if (res->user == NULL)
2113 goto done;
2114 }
2115 res->port = bas->port;
2116 }
2117 if (bas->path != NULL) {
2118 res->path = xmlMemStrdup(bas->path);
2119 if (res->path == NULL)
2120 goto done;
2121 }
2122 if (ref->query_raw != NULL) {
2123 res->query_raw = xmlMemStrdup (ref->query_raw);
2124 if (res->query_raw == NULL)
2125 goto done;
2126 } else if (ref->query != NULL) {
2127 res->query = xmlMemStrdup(ref->query);
2128 if (res->query == NULL)
2129 goto done;
2130 } else if (bas->query_raw != NULL) {
2131 res->query_raw = xmlMemStrdup(bas->query_raw);
2132 if (res->query_raw == NULL)
2133 goto done;
2134 } else if (bas->query != NULL) {
2135 res->query = xmlMemStrdup(bas->query);
2136 if (res->query == NULL)
2137 goto done;
2138 }
2139 if (ref->fragment != NULL) {
2140 res->fragment = xmlMemStrdup(ref->fragment);
2141 if (res->fragment == NULL)
2142 goto done;
2143 }
2144 goto step_7;
2145 }
2146
2147 /*
2148 * 3) If the scheme component is defined, indicating that the reference
2149 * starts with a scheme name, then the reference is interpreted as an
2150 * absolute URI and we are done. Otherwise, the reference URI's
2151 * scheme is inherited from the base URI's scheme component.
2152 */
2153 if (ref->scheme != NULL) {
2154 val = xmlSaveUri(ref);
2155 if (val != NULL)
2156 ret = 0;
2157 goto done;
2158 }
2159 if (bas->scheme != NULL) {
2160 res->scheme = xmlMemStrdup(bas->scheme);
2161 if (res->scheme == NULL)
2162 goto done;
2163 }
2164
2165 if (ref->query_raw != NULL) {
2166 res->query_raw = xmlMemStrdup(ref->query_raw);
2167 if (res->query_raw == NULL)
2168 goto done;
2169 } else if (ref->query != NULL) {
2170 res->query = xmlMemStrdup(ref->query);
2171 if (res->query == NULL)
2172 goto done;
2173 }
2174 if (ref->fragment != NULL) {
2175 res->fragment = xmlMemStrdup(ref->fragment);
2176 if (res->fragment == NULL)
2177 goto done;
2178 }
2179
2180 /*
2181 * 4) If the authority component is defined, then the reference is a
2182 * network-path and we skip to step 7. Otherwise, the reference
2183 * URI's authority is inherited from the base URI's authority
2184 * component, which will also be undefined if the URI scheme does not
2185 * use an authority component.
2186 */
2187 if ((ref->authority != NULL) || (ref->server != NULL) ||
2188 (ref->port != PORT_EMPTY)) {
2189 if (ref->authority != NULL) {
2190 res->authority = xmlMemStrdup(ref->authority);
2191 if (res->authority == NULL)
2192 goto done;
2193 } else {
2194 if (ref->server != NULL) {
2195 res->server = xmlMemStrdup(ref->server);
2196 if (res->server == NULL)
2197 goto done;
2198 }
2199 if (ref->user != NULL) {
2200 res->user = xmlMemStrdup(ref->user);
2201 if (res->user == NULL)
2202 goto done;
2203 }
2204 res->port = ref->port;
2205 }
2206 if (ref->path != NULL) {
2207 res->path = xmlMemStrdup(ref->path);
2208 if (res->path == NULL)
2209 goto done;
2210 }
2211 goto step_7;
2212 }
2213 if (bas->authority != NULL) {
2214 res->authority = xmlMemStrdup(bas->authority);
2215 if (res->authority == NULL)
2216 goto done;
2217 } else if ((bas->server != NULL) || (bas->port != PORT_EMPTY)) {
2218 if (bas->server != NULL) {
2219 res->server = xmlMemStrdup(bas->server);
2220 if (res->server == NULL)
2221 goto done;
2222 }
2223 if (bas->user != NULL) {
2224 res->user = xmlMemStrdup(bas->user);
2225 if (res->user == NULL)
2226 goto done;
2227 }
2228 res->port = bas->port;
2229 }
2230
2231 /*
2232 * 5) If the path component begins with a slash character ("/"), then
2233 * the reference is an absolute-path and we skip to step 7.
2234 */
2235 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2236 res->path = xmlMemStrdup(ref->path);
2237 if (res->path == NULL)
2238 goto done;
2239 goto step_7;
2240 }
2241
2242
2243 /*
2244 * 6) If this step is reached, then we are resolving a relative-path
2245 * reference. The relative path needs to be merged with the base
2246 * URI's path. Although there are many ways to do this, we will
2247 * describe a simple method using a separate string buffer.
2248 *
2249 * Allocate a buffer large enough for the result string.
2250 */
2251 len = 2; /* extra / and 0 */
2252 if (ref->path != NULL)
2253 len += strlen(ref->path);
2254 if (bas->path != NULL)
2255 len += strlen(bas->path);
2256 res->path = (char *) xmlMallocAtomic(len);
2257 if (res->path == NULL)
2258 goto done;
2259 res->path[0] = 0;
2260
2261 /*
2262 * a) All but the last segment of the base URI's path component is
2263 * copied to the buffer. In other words, any characters after the
2264 * last (right-most) slash character, if any, are excluded.
2265 */
2266 cur = 0;
2267 out = 0;
2268 if (bas->path != NULL) {
2269 while (bas->path[cur] != 0) {
2270 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2271 cur++;
2272 if (bas->path[cur] == 0)
2273 break;
2274
2275 cur++;
2276 while (out < cur) {
2277 res->path[out] = bas->path[out];
2278 out++;
2279 }
2280 }
2281 }
2282 res->path[out] = 0;
2283
2284 /*
2285 * b) The reference's path component is appended to the buffer
2286 * string.
2287 */
2288 if (ref->path != NULL && ref->path[0] != 0) {
2289 indx = 0;
2290 /*
2291 * Ensure the path includes a '/'
2292 */
2293 if ((out == 0) && ((bas->server != NULL) || bas->port != PORT_EMPTY))
2294 res->path[out++] = '/';
2295 while (ref->path[indx] != 0) {
2296 res->path[out++] = ref->path[indx++];
2297 }
2298 }
2299 res->path[out] = 0;
2300
2301 /*
2302 * Steps c) to h) are really path normalization steps
2303 */
2304 xmlNormalizeURIPath(res->path);
2305
2306 step_7:
2307
2308 /*
2309 * 7) The resulting URI components, including any inherited from the
2310 * base URI, are recombined to give the absolute form of the URI
2311 * reference.
2312 */
2313 val = xmlSaveUri(res);
2314 if (val != NULL)
2315 ret = 0;
2316
2317 done:
2318 if (ref != NULL)
2319 xmlFreeURI(ref);
2320 if (bas != NULL)
2321 xmlFreeURI(bas);
2322 if (res != NULL)
2323 xmlFreeURI(res);
2324 *valPtr = val;
2325 return(ret);
2326 }
2327
2328 /**
2329 * xmlBuildURI:
2330 * @URI: the URI instance found in the document
2331 * @base: the base value
2332 *
2333 * Computes he final URI of the reference done by checking that
2334 * the given URI is valid, and building the final URI using the
2335 * base URI. This is processed according to section 5.2 of the
2336 * RFC 2396
2337 *
2338 * 5.2. Resolving Relative References to Absolute Form
2339 *
2340 * Returns a new URI string (to be freed by the caller) or NULL in case
2341 * of error.
2342 */
2343 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)2344 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
2345 xmlChar *out;
2346
2347 xmlBuildURISafe(URI, base, &out);
2348 return(out);
2349 }
2350
2351 /**
2352 * xmlBuildRelativeURISafe:
2353 * @URI: the URI reference under consideration
2354 * @base: the base value
2355 * @valPtr: pointer to result URI
2356 *
2357 * Expresses the URI of the reference in terms relative to the
2358 * base. Some examples of this operation include:
2359 * base = "http://site1.com/docs/book1.html"
2360 * URI input URI returned
2361 * docs/pic1.gif pic1.gif
2362 * docs/img/pic1.gif img/pic1.gif
2363 * img/pic1.gif ../img/pic1.gif
2364 * http://site1.com/docs/pic1.gif pic1.gif
2365 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2366 *
2367 * base = "docs/book1.html"
2368 * URI input URI returned
2369 * docs/pic1.gif pic1.gif
2370 * docs/img/pic1.gif img/pic1.gif
2371 * img/pic1.gif ../img/pic1.gif
2372 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2373 *
2374 *
2375 * Note: if the URI reference is really weird or complicated, it may be
2376 * worthwhile to first convert it into a "nice" one by calling
2377 * xmlBuildURI (using 'base') before calling this routine,
2378 * since this routine (for reasonable efficiency) assumes URI has
2379 * already been through some validation.
2380 *
2381 * Returns 0 on success, -1 if a memory allocation failed or an error
2382 * code if URI or base are invalid.
2383 */
2384 int
xmlBuildRelativeURISafe(const xmlChar * URI,const xmlChar * base,xmlChar ** valPtr)2385 xmlBuildRelativeURISafe(const xmlChar * URI, const xmlChar * base,
2386 xmlChar **valPtr)
2387 {
2388 xmlChar *val = NULL;
2389 int ret = 0;
2390 int ix;
2391 int nbslash = 0;
2392 int len;
2393 xmlURIPtr ref = NULL;
2394 xmlURIPtr bas = NULL;
2395 xmlChar *bptr, *uptr, *vptr;
2396 int remove_path = 0;
2397
2398 if (valPtr == NULL)
2399 return(1);
2400 *valPtr = NULL;
2401 if ((URI == NULL) || (*URI == 0))
2402 return(1);
2403
2404 /*
2405 * First parse URI into a standard form
2406 */
2407 ref = xmlCreateURI ();
2408 if (ref == NULL) {
2409 ret = -1;
2410 goto done;
2411 }
2412 /* If URI not already in "relative" form */
2413 if (URI[0] != '.') {
2414 ret = xmlParseURIReference (ref, (const char *) URI);
2415 if (ret != 0)
2416 goto done; /* Error in URI, return NULL */
2417 } else {
2418 ref->path = (char *)xmlStrdup(URI);
2419 if (ref->path == NULL) {
2420 ret = -1;
2421 goto done;
2422 }
2423 }
2424
2425 /*
2426 * Next parse base into the same standard form
2427 */
2428 if ((base == NULL) || (*base == 0)) {
2429 val = xmlStrdup (URI);
2430 if (val == NULL)
2431 ret = -1;
2432 goto done;
2433 }
2434 bas = xmlCreateURI ();
2435 if (bas == NULL) {
2436 ret = -1;
2437 goto done;
2438 }
2439 if (base[0] != '.') {
2440 ret = xmlParseURIReference (bas, (const char *) base);
2441 if (ret != 0)
2442 goto done; /* Error in base, return NULL */
2443 } else {
2444 bas->path = (char *)xmlStrdup(base);
2445 if (bas->path == NULL) {
2446 ret = -1;
2447 goto done;
2448 }
2449 }
2450
2451 /*
2452 * If the scheme / server on the URI differs from the base,
2453 * just return the URI
2454 */
2455 if ((ref->scheme != NULL) &&
2456 ((bas->scheme == NULL) ||
2457 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2458 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)) ||
2459 (bas->port != ref->port))) {
2460 val = xmlStrdup (URI);
2461 if (val == NULL)
2462 ret = -1;
2463 goto done;
2464 }
2465 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2466 val = xmlStrdup(BAD_CAST "");
2467 if (val == NULL)
2468 ret = -1;
2469 goto done;
2470 }
2471 if (bas->path == NULL) {
2472 val = xmlStrdup((xmlChar *)ref->path);
2473 if (val == NULL)
2474 ret = -1;
2475 goto done;
2476 }
2477 if (ref->path == NULL) {
2478 ref->path = (char *) "/";
2479 remove_path = 1;
2480 }
2481
2482 /*
2483 * At this point (at last!) we can compare the two paths
2484 *
2485 * First we take care of the special case where either of the
2486 * two path components may be missing (bug 316224)
2487 */
2488 bptr = (xmlChar *)bas->path;
2489 {
2490 xmlChar *rptr = (xmlChar *) ref->path;
2491 int pos = 0;
2492
2493 /*
2494 * Next we compare the two strings and find where they first differ
2495 */
2496 if ((*rptr == '.') && (rptr[1] == '/'))
2497 rptr += 2;
2498 if ((*bptr == '.') && (bptr[1] == '/'))
2499 bptr += 2;
2500 else if ((*bptr == '/') && (*rptr != '/'))
2501 bptr++;
2502 while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2503 pos++;
2504
2505 if (bptr[pos] == rptr[pos]) {
2506 val = xmlStrdup(BAD_CAST "");
2507 if (val == NULL)
2508 ret = -1;
2509 goto done; /* (I can't imagine why anyone would do this) */
2510 }
2511
2512 /*
2513 * In URI, "back up" to the last '/' encountered. This will be the
2514 * beginning of the "unique" suffix of URI
2515 */
2516 ix = pos;
2517 for (; ix > 0; ix--) {
2518 if (rptr[ix - 1] == '/')
2519 break;
2520 }
2521 uptr = (xmlChar *)&rptr[ix];
2522
2523 /*
2524 * In base, count the number of '/' from the differing point
2525 */
2526 for (; bptr[ix] != 0; ix++) {
2527 if (bptr[ix] == '/')
2528 nbslash++;
2529 }
2530
2531 /*
2532 * e.g: URI="foo/" base="foo/bar" -> "./"
2533 */
2534 if (nbslash == 0 && !uptr[0]) {
2535 val = xmlStrdup(BAD_CAST "./");
2536 if (val == NULL)
2537 ret = -1;
2538 goto done;
2539 }
2540
2541 len = xmlStrlen (uptr) + 1;
2542 }
2543
2544 if (nbslash == 0) {
2545 if (uptr != NULL) {
2546 /* exception characters from xmlSaveUri */
2547 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2548 if (val == NULL)
2549 ret = -1;
2550 }
2551 goto done;
2552 }
2553
2554 /*
2555 * Allocate just enough space for the returned string -
2556 * length of the remainder of the URI, plus enough space
2557 * for the "../" groups, plus one for the terminator
2558 */
2559 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2560 if (val == NULL) {
2561 ret = -1;
2562 goto done;
2563 }
2564 vptr = val;
2565 /*
2566 * Put in as many "../" as needed
2567 */
2568 for (; nbslash>0; nbslash--) {
2569 *vptr++ = '.';
2570 *vptr++ = '.';
2571 *vptr++ = '/';
2572 }
2573 /*
2574 * Finish up with the end of the URI
2575 */
2576 if (uptr != NULL) {
2577 if ((vptr > val) && (len > 0) &&
2578 (uptr[0] == '/') && (vptr[-1] == '/')) {
2579 memcpy (vptr, uptr + 1, len - 1);
2580 vptr[len - 2] = 0;
2581 } else {
2582 memcpy (vptr, uptr, len);
2583 vptr[len - 1] = 0;
2584 }
2585 } else {
2586 vptr[len - 1] = 0;
2587 }
2588
2589 /* escape the freshly-built path */
2590 vptr = val;
2591 /* exception characters from xmlSaveUri */
2592 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2593 if (val == NULL)
2594 ret = -1;
2595 else
2596 ret = 0;
2597 xmlFree(vptr);
2598
2599 done:
2600 /*
2601 * Free the working variables
2602 */
2603 if (remove_path != 0)
2604 ref->path = NULL;
2605 if (ref != NULL)
2606 xmlFreeURI (ref);
2607 if (bas != NULL)
2608 xmlFreeURI (bas);
2609 if (ret != 0) {
2610 xmlFree(val);
2611 val = NULL;
2612 }
2613
2614 *valPtr = val;
2615 return(ret);
2616 }
2617
2618 /*
2619 * xmlBuildRelativeURI:
2620 * @URI: the URI reference under consideration
2621 * @base: the base value
2622 *
2623 * See xmlBuildRelativeURISafe.
2624 *
2625 * Returns a new URI string (to be freed by the caller) or NULL in case
2626 * error.
2627 */
2628 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2629 xmlBuildRelativeURI(const xmlChar * URI, const xmlChar * base)
2630 {
2631 xmlChar *val;
2632
2633 xmlBuildRelativeURISafe(URI, base, &val);
2634 return(val);
2635 }
2636
2637 /**
2638 * xmlCanonicPath:
2639 * @path: the resource locator in a filesystem notation
2640 *
2641 * Prepares a path.
2642 *
2643 * If the path contains the substring "://", it is considered a
2644 * Legacy Extended IRI. Characters which aren't allowed in URIs are
2645 * escaped.
2646 *
2647 * Otherwise, the path is considered a filesystem path which is
2648 * copied without modification.
2649 *
2650 * The caller is responsible for freeing the memory occupied
2651 * by the returned string. If there is insufficient memory available, or the
2652 * argument is NULL, the function returns NULL.
2653 */
2654 xmlChar *
xmlCanonicPath(const xmlChar * path)2655 xmlCanonicPath(const xmlChar *path)
2656 {
2657 xmlChar *ret;
2658
2659 if (path == NULL)
2660 return(NULL);
2661
2662 /* Check if this is an "absolute uri" */
2663 if (xmlStrstr(path, BAD_CAST "://") != NULL) {
2664 /*
2665 * Escape all characters except reserved, unreserved and the
2666 * percent sign.
2667 *
2668 * xmlURIEscapeStr already keeps unreserved characters, so we
2669 * pass gen-delims, sub-delims and "%" to ignore.
2670 */
2671 ret = xmlURIEscapeStr(path, BAD_CAST ":/?#[]@!$&()*+,;='%");
2672 } else {
2673 ret = xmlStrdup((const xmlChar *) path);
2674 }
2675
2676 return(ret);
2677 }
2678
2679 /**
2680 * xmlPathToURI:
2681 * @path: the resource locator in a filesystem notation
2682 *
2683 * Constructs an URI expressing the existing path
2684 *
2685 * Returns a new URI, or a duplicate of the path parameter if the
2686 * construction fails. The caller is responsible for freeing the memory
2687 * occupied by the returned string. If there is insufficient memory available,
2688 * or the argument is NULL, the function returns NULL.
2689 */
2690 xmlChar *
xmlPathToURI(const xmlChar * path)2691 xmlPathToURI(const xmlChar *path)
2692 {
2693 return(xmlCanonicPath(path));
2694 }
2695