xref: /aosp_15_r20/external/libxml2/uri.c (revision 7c5688314b92172186c154356a6374bf7684c3ca)
1 /**
2  * uri.c: set of generic URI related routines
3  *
4  * Reference: RFCs 3986, 2732 and 2373
5  *
6  * See Copyright for the status of this software.
7  *
8  * [email protected]
9  */
10 
11 #define IN_LIBXML
12 #include "libxml.h"
13 
14 #include <limits.h>
15 #include <string.h>
16 
17 #include <libxml/xmlmemory.h>
18 #include <libxml/uri.h>
19 #include <libxml/xmlerror.h>
20 
21 #include "private/error.h"
22 
23 /**
24  * MAX_URI_LENGTH:
25  *
26  * The definition of the URI regexp in the above RFC has no size limit
27  * In practice they are usually relatively short except for the
28  * data URI scheme as defined in RFC 2397. Even for data URI the usual
29  * maximum size before hitting random practical limits is around 64 KB
30  * and 4KB is usually a maximum admitted limit for proper operations.
31  * The value below is more a security limit than anything else and
32  * really should never be hit by 'normal' operations
33  * Set to 1 MByte in 2012, this is only enforced on output
34  */
35 #define MAX_URI_LENGTH 1024 * 1024
36 
37 #define PORT_EMPTY           0
38 #define PORT_EMPTY_SERVER   -1
39 
40 static void xmlCleanURI(xmlURIPtr uri);
41 
42 /*
43  * Old rule from 2396 used in legacy handling code
44  * alpha    = lowalpha | upalpha
45  */
46 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
47 
48 
49 /*
50  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
51  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
52  *            "u" | "v" | "w" | "x" | "y" | "z"
53  */
54 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
55 
56 /*
57  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
58  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
59  *           "U" | "V" | "W" | "X" | "Y" | "Z"
60  */
61 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
62 
63 #ifdef IS_DIGIT
64 #undef IS_DIGIT
65 #endif
66 /*
67  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
68  */
69 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
70 
71 /*
72  * alphanum = alpha | digit
73  */
74 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
75 
76 /*
77  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
78  */
79 
80 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
81     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
82     ((x) == '(') || ((x) == ')'))
83 
84 /*
85  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
86  */
87 #define IS_UNWISE(p)                                                    \
88       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
89        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
90        ((*(p) == ']')) || ((*(p) == '`')))
91 
92 /*
93  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
94  *            "[" | "]"
95  */
96 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
97         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
98         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
99         ((x) == ']'))
100 
101 /*
102  * unreserved = alphanum | mark
103  */
104 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
105 
106 /*
107  * Skip to next pointer char, handle escaped sequences
108  */
109 #define NEXT(p) ((*p == '%')? p += 3 : p++)
110 
111 /*
112  * Productions from the spec.
113  *
114  *    authority     = server | reg_name
115  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
116  *                        ";" | ":" | "@" | "&" | "=" | "+" )
117  *
118  * path          = [ abs_path | opaque_part ]
119  */
120 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
121 
122 /************************************************************************
123  *									*
124  *                         RFC 3986 parser				*
125  *									*
126  ************************************************************************/
127 
128 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
129 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
130                       ((*(p) >= 'A') && (*(p) <= 'Z')))
131 #define ISA_HEXDIG(p)							\
132        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
133         ((*(p) >= 'A') && (*(p) <= 'F')))
134 
135 /*
136  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
137  *                     / "*" / "+" / "," / ";" / "="
138  */
139 #define ISA_SUB_DELIM(p)						\
140       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
141        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
142        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
143        ((*(p) == '=')) || ((*(p) == '\'')))
144 
145 /*
146  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
147  */
148 #define ISA_GEN_DELIM(p)						\
149       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
150        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
151        ((*(p) == '@')))
152 
153 /*
154  *    reserved      = gen-delims / sub-delims
155  */
156 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
157 
158 /*
159  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
160  */
161 #define ISA_STRICTLY_UNRESERVED(p)					\
162       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
163        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
164 
165 /*
166  *    pct-encoded   = "%" HEXDIG HEXDIG
167  */
168 #define ISA_PCT_ENCODED(p)						\
169      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
170 
171 /*
172  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
173  */
174 #define ISA_PCHAR(u, p)							\
175      (ISA_UNRESERVED(u, p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
176       ((*(p) == ':')) || ((*(p) == '@')))
177 
178 /*
179  * From https://www.w3.org/TR/leiri/
180  *
181  * " " / "<" / ">" / '"' / "{" / "}" / "|"
182  * / "\" / "^" / "`" / %x0-1F / %x7F-D7FF
183  * / %xE000-FFFD / %x10000-10FFFF
184  */
185 #define ISA_UCSCHAR(p) \
186     ((*(p) <= 0x20) || (*(p) >= 0x7F) || (*(p) == '<') || (*(p) == '>') || \
187      (*(p) == '"')  || (*(p) == '{')  || (*(p) == '}') || (*(p) == '|') || \
188      (*(p) == '\\') || (*(p) == '^')  || (*(p) == '`'))
189 
190 #define ISA_UNRESERVED(u, p) (xmlIsUnreserved(u, p))
191 
192 #define XML_URI_ALLOW_UNWISE    1
193 #define XML_URI_NO_UNESCAPE     2
194 #define XML_URI_ALLOW_UCSCHAR   4
195 
196 static int
xmlIsUnreserved(xmlURIPtr uri,const char * cur)197 xmlIsUnreserved(xmlURIPtr uri, const char *cur) {
198     if (uri == NULL)
199         return(0);
200 
201     if (ISA_STRICTLY_UNRESERVED(cur))
202         return(1);
203 
204     if (uri->cleanup & XML_URI_ALLOW_UNWISE) {
205         if (IS_UNWISE(cur))
206             return(1);
207     } else if (uri->cleanup & XML_URI_ALLOW_UCSCHAR) {
208         if (ISA_UCSCHAR(cur))
209             return(1);
210     }
211 
212     return(0);
213 }
214 
215 /**
216  * xmlParse3986Scheme:
217  * @uri:  pointer to an URI structure
218  * @str:  pointer to the string to analyze
219  *
220  * Parse an URI scheme
221  *
222  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
223  *
224  * Returns 0 or the error code
225  */
226 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)227 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
228     const char *cur;
229 
230     cur = *str;
231     if (!ISA_ALPHA(cur))
232 	return(1);
233     cur++;
234     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
235            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
236     if (uri != NULL) {
237 	if (uri->scheme != NULL) xmlFree(uri->scheme);
238 	uri->scheme = STRNDUP(*str, cur - *str);
239         if (uri->scheme == NULL)
240             return(-1);
241     }
242     *str = cur;
243     return(0);
244 }
245 
246 /**
247  * xmlParse3986Fragment:
248  * @uri:  pointer to an URI structure
249  * @str:  pointer to the string to analyze
250  *
251  * Parse the query part of an URI
252  *
253  * fragment      = *( pchar / "/" / "?" )
254  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
255  *       in the fragment identifier but this is used very broadly for
256  *       xpointer scheme selection, so we are allowing it here to not break
257  *       for example all the DocBook processing chains.
258  *
259  * Returns 0 or the error code
260  */
261 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)262 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
263 {
264     const char *cur;
265 
266     cur = *str;
267 
268     while ((ISA_PCHAR(uri, cur)) || (*cur == '/') || (*cur == '?') ||
269            (*cur == '[') || (*cur == ']'))
270         NEXT(cur);
271     if (uri != NULL) {
272         if (uri->fragment != NULL)
273             xmlFree(uri->fragment);
274 	if (uri->cleanup & XML_URI_NO_UNESCAPE)
275 	    uri->fragment = STRNDUP(*str, cur - *str);
276 	else
277 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
278         if (uri->fragment == NULL)
279             return (-1);
280     }
281     *str = cur;
282     return (0);
283 }
284 
285 /**
286  * xmlParse3986Query:
287  * @uri:  pointer to an URI structure
288  * @str:  pointer to the string to analyze
289  *
290  * Parse the query part of an URI
291  *
292  * query = *uric
293  *
294  * Returns 0 or the error code
295  */
296 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)297 xmlParse3986Query(xmlURIPtr uri, const char **str)
298 {
299     const char *cur;
300 
301     cur = *str;
302 
303     while ((ISA_PCHAR(uri, cur)) || (*cur == '/') || (*cur == '?'))
304         NEXT(cur);
305     if (uri != NULL) {
306         if (uri->query != NULL)
307             xmlFree(uri->query);
308 	if (uri->cleanup & XML_URI_NO_UNESCAPE)
309 	    uri->query = STRNDUP(*str, cur - *str);
310 	else
311 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
312         if (uri->query == NULL)
313             return (-1);
314 
315 	/* Save the raw bytes of the query as well.
316 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
317 	 */
318 	if (uri->query_raw != NULL)
319 	    xmlFree (uri->query_raw);
320 	uri->query_raw = STRNDUP (*str, cur - *str);
321         if (uri->query_raw == NULL)
322             return (-1);
323     }
324     *str = cur;
325     return (0);
326 }
327 
328 /**
329  * xmlParse3986Port:
330  * @uri:  pointer to an URI structure
331  * @str:  the string to analyze
332  *
333  * Parse a port part and fills in the appropriate fields
334  * of the @uri structure
335  *
336  * port          = *DIGIT
337  *
338  * Returns 0 or the error code
339  */
340 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)341 xmlParse3986Port(xmlURIPtr uri, const char **str)
342 {
343     const char *cur = *str;
344     int port = 0;
345 
346     if (ISA_DIGIT(cur)) {
347 	while (ISA_DIGIT(cur)) {
348             int digit = *cur - '0';
349 
350             if (port > INT_MAX / 10)
351                 return(1);
352             port *= 10;
353             if (port > INT_MAX - digit)
354                 return(1);
355 	    port += digit;
356 
357 	    cur++;
358 	}
359 	if (uri != NULL)
360 	    uri->port = port;
361 	*str = cur;
362 	return(0);
363     }
364     return(1);
365 }
366 
367 /**
368  * xmlParse3986Userinfo:
369  * @uri:  pointer to an URI structure
370  * @str:  the string to analyze
371  *
372  * Parse an user information part and fills in the appropriate fields
373  * of the @uri structure
374  *
375  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
376  *
377  * Returns 0 or the error code
378  */
379 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)380 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
381 {
382     const char *cur;
383 
384     cur = *str;
385     while (ISA_UNRESERVED(uri, cur) || ISA_PCT_ENCODED(cur) ||
386            ISA_SUB_DELIM(cur) || (*cur == ':'))
387 	NEXT(cur);
388     if (*cur == '@') {
389 	if (uri != NULL) {
390 	    if (uri->user != NULL) xmlFree(uri->user);
391 	    if (uri->cleanup & XML_URI_NO_UNESCAPE)
392 		uri->user = STRNDUP(*str, cur - *str);
393 	    else
394 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
395             if (uri->user == NULL)
396                 return(-1);
397 	}
398 	*str = cur;
399 	return(0);
400     }
401     return(1);
402 }
403 
404 /**
405  * xmlParse3986DecOctet:
406  * @str:  the string to analyze
407  *
408  *    dec-octet     = DIGIT                 ; 0-9
409  *                  / %x31-39 DIGIT         ; 10-99
410  *                  / "1" 2DIGIT            ; 100-199
411  *                  / "2" %x30-34 DIGIT     ; 200-249
412  *                  / "25" %x30-35          ; 250-255
413  *
414  * Skip a dec-octet.
415  *
416  * Returns 0 if found and skipped, 1 otherwise
417  */
418 static int
xmlParse3986DecOctet(const char ** str)419 xmlParse3986DecOctet(const char **str) {
420     const char *cur = *str;
421 
422     if (!(ISA_DIGIT(cur)))
423         return(1);
424     if (!ISA_DIGIT(cur+1))
425 	cur++;
426     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
427 	cur += 2;
428     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
429 	cur += 3;
430     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
431 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
432 	cur += 3;
433     else if ((*cur == '2') && (*(cur + 1) == '5') &&
434 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
435 	cur += 3;
436     else
437         return(1);
438     *str = cur;
439     return(0);
440 }
441 /**
442  * xmlParse3986Host:
443  * @uri:  pointer to an URI structure
444  * @str:  the string to analyze
445  *
446  * Parse an host part and fills in the appropriate fields
447  * of the @uri structure
448  *
449  * host          = IP-literal / IPv4address / reg-name
450  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
451  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
452  * reg-name      = *( unreserved / pct-encoded / sub-delims )
453  *
454  * Returns 0 or the error code
455  */
456 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)457 xmlParse3986Host(xmlURIPtr uri, const char **str)
458 {
459     const char *cur = *str;
460     const char *host;
461 
462     host = cur;
463     /*
464      * IPv6 and future addressing scheme are enclosed between brackets
465      */
466     if (*cur == '[') {
467         cur++;
468 	while ((*cur != ']') && (*cur != 0))
469 	    cur++;
470 	if (*cur != ']')
471 	    return(1);
472 	cur++;
473 	goto found;
474     }
475     /*
476      * try to parse an IPv4
477      */
478     if (ISA_DIGIT(cur)) {
479         if (xmlParse3986DecOctet(&cur) != 0)
480 	    goto not_ipv4;
481 	if (*cur != '.')
482 	    goto not_ipv4;
483 	cur++;
484         if (xmlParse3986DecOctet(&cur) != 0)
485 	    goto not_ipv4;
486 	if (*cur != '.')
487 	    goto not_ipv4;
488         if (xmlParse3986DecOctet(&cur) != 0)
489 	    goto not_ipv4;
490 	if (*cur != '.')
491 	    goto not_ipv4;
492         if (xmlParse3986DecOctet(&cur) != 0)
493 	    goto not_ipv4;
494 	goto found;
495 not_ipv4:
496         cur = *str;
497     }
498     /*
499      * then this should be a hostname which can be empty
500      */
501     while (ISA_UNRESERVED(uri, cur) ||
502            ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
503         NEXT(cur);
504 found:
505     if (uri != NULL) {
506 	if (uri->authority != NULL) xmlFree(uri->authority);
507 	uri->authority = NULL;
508 	if (uri->server != NULL) xmlFree(uri->server);
509 	if (cur != host) {
510 	    if (uri->cleanup & XML_URI_NO_UNESCAPE)
511 		uri->server = STRNDUP(host, cur - host);
512 	    else
513 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
514             if (uri->server == NULL)
515                 return(-1);
516 	} else
517 	    uri->server = NULL;
518     }
519     *str = cur;
520     return(0);
521 }
522 
523 /**
524  * xmlParse3986Authority:
525  * @uri:  pointer to an URI structure
526  * @str:  the string to analyze
527  *
528  * Parse an authority part and fills in the appropriate fields
529  * of the @uri structure
530  *
531  * authority     = [ userinfo "@" ] host [ ":" port ]
532  *
533  * Returns 0 or the error code
534  */
535 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)536 xmlParse3986Authority(xmlURIPtr uri, const char **str)
537 {
538     const char *cur;
539     int ret;
540 
541     cur = *str;
542     /*
543      * try to parse an userinfo and check for the trailing @
544      */
545     ret = xmlParse3986Userinfo(uri, &cur);
546     if (ret < 0)
547         return(ret);
548     if ((ret != 0) || (*cur != '@'))
549         cur = *str;
550     else
551         cur++;
552     ret = xmlParse3986Host(uri, &cur);
553     if (ret != 0) return(ret);
554     if (*cur == ':') {
555         cur++;
556         ret = xmlParse3986Port(uri, &cur);
557 	if (ret != 0) return(ret);
558     }
559     *str = cur;
560     return(0);
561 }
562 
563 /**
564  * xmlParse3986Segment:
565  * @str:  the string to analyze
566  * @forbid: an optional forbidden character
567  * @empty: allow an empty segment
568  *
569  * Parse a segment and fills in the appropriate fields
570  * of the @uri structure
571  *
572  * segment       = *pchar
573  * segment-nz    = 1*pchar
574  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
575  *               ; non-zero-length segment without any colon ":"
576  *
577  * Returns 0 or the error code
578  */
579 static int
xmlParse3986Segment(xmlURIPtr uri,const char ** str,char forbid,int empty)580 xmlParse3986Segment(xmlURIPtr uri, const char **str, char forbid, int empty)
581 {
582     const char *cur;
583 
584     cur = *str;
585     if (!ISA_PCHAR(uri, cur)) {
586         if (empty)
587 	    return(0);
588 	return(1);
589     }
590     while (ISA_PCHAR(uri, cur) && (*cur != forbid))
591         NEXT(cur);
592     *str = cur;
593     return (0);
594 }
595 
596 /**
597  * xmlParse3986PathAbEmpty:
598  * @uri:  pointer to an URI structure
599  * @str:  the string to analyze
600  *
601  * Parse an path absolute or empty and fills in the appropriate fields
602  * of the @uri structure
603  *
604  * path-abempty  = *( "/" segment )
605  *
606  * Returns 0 or the error code
607  */
608 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)609 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
610 {
611     const char *cur;
612     int ret;
613 
614     cur = *str;
615 
616     while (*cur == '/') {
617         cur++;
618 	ret = xmlParse3986Segment(uri, &cur, 0, 1);
619 	if (ret != 0) return(ret);
620     }
621     if (uri != NULL) {
622 	if (uri->path != NULL) xmlFree(uri->path);
623         if (*str != cur) {
624             if (uri->cleanup & XML_URI_NO_UNESCAPE)
625                 uri->path = STRNDUP(*str, cur - *str);
626             else
627                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
628             if (uri->path == NULL)
629                 return (-1);
630         } else {
631             uri->path = NULL;
632         }
633     }
634     *str = cur;
635     return (0);
636 }
637 
638 /**
639  * xmlParse3986PathAbsolute:
640  * @uri:  pointer to an URI structure
641  * @str:  the string to analyze
642  *
643  * Parse an path absolute and fills in the appropriate fields
644  * of the @uri structure
645  *
646  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
647  *
648  * Returns 0 or the error code
649  */
650 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)651 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
652 {
653     const char *cur;
654     int ret;
655 
656     cur = *str;
657 
658     if (*cur != '/')
659         return(1);
660     cur++;
661     ret = xmlParse3986Segment(uri, &cur, 0, 0);
662     if (ret == 0) {
663 	while (*cur == '/') {
664 	    cur++;
665 	    ret = xmlParse3986Segment(uri, &cur, 0, 1);
666 	    if (ret != 0) return(ret);
667 	}
668     }
669     if (uri != NULL) {
670 	if (uri->path != NULL) xmlFree(uri->path);
671         if (cur != *str) {
672             if (uri->cleanup & XML_URI_NO_UNESCAPE)
673                 uri->path = STRNDUP(*str, cur - *str);
674             else
675                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
676             if (uri->path == NULL)
677                 return (-1);
678         } else {
679             uri->path = NULL;
680         }
681     }
682     *str = cur;
683     return (0);
684 }
685 
686 /**
687  * xmlParse3986PathRootless:
688  * @uri:  pointer to an URI structure
689  * @str:  the string to analyze
690  *
691  * Parse an path without root and fills in the appropriate fields
692  * of the @uri structure
693  *
694  * path-rootless = segment-nz *( "/" segment )
695  *
696  * Returns 0 or the error code
697  */
698 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)699 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
700 {
701     const char *cur;
702     int ret;
703 
704     cur = *str;
705 
706     ret = xmlParse3986Segment(uri, &cur, 0, 0);
707     if (ret != 0) return(ret);
708     while (*cur == '/') {
709         cur++;
710 	ret = xmlParse3986Segment(uri, &cur, 0, 1);
711 	if (ret != 0) return(ret);
712     }
713     if (uri != NULL) {
714 	if (uri->path != NULL) xmlFree(uri->path);
715         if (cur != *str) {
716             if (uri->cleanup & XML_URI_NO_UNESCAPE)
717                 uri->path = STRNDUP(*str, cur - *str);
718             else
719                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
720             if (uri->path == NULL)
721                 return (-1);
722         } else {
723             uri->path = NULL;
724         }
725     }
726     *str = cur;
727     return (0);
728 }
729 
730 /**
731  * xmlParse3986PathNoScheme:
732  * @uri:  pointer to an URI structure
733  * @str:  the string to analyze
734  *
735  * Parse an path which is not a scheme and fills in the appropriate fields
736  * of the @uri structure
737  *
738  * path-noscheme = segment-nz-nc *( "/" segment )
739  *
740  * Returns 0 or the error code
741  */
742 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)743 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
744 {
745     const char *cur;
746     int ret;
747 
748     cur = *str;
749 
750     ret = xmlParse3986Segment(uri, &cur, ':', 0);
751     if (ret != 0) return(ret);
752     while (*cur == '/') {
753         cur++;
754 	ret = xmlParse3986Segment(uri, &cur, 0, 1);
755 	if (ret != 0) return(ret);
756     }
757     if (uri != NULL) {
758 	if (uri->path != NULL) xmlFree(uri->path);
759         if (cur != *str) {
760             if (uri->cleanup & XML_URI_NO_UNESCAPE)
761                 uri->path = STRNDUP(*str, cur - *str);
762             else
763                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
764             if (uri->path == NULL)
765                 return (-1);
766         } else {
767             uri->path = NULL;
768         }
769     }
770     *str = cur;
771     return (0);
772 }
773 
774 /**
775  * xmlParse3986HierPart:
776  * @uri:  pointer to an URI structure
777  * @str:  the string to analyze
778  *
779  * Parse an hierarchical part and fills in the appropriate fields
780  * of the @uri structure
781  *
782  * hier-part     = "//" authority path-abempty
783  *                / path-absolute
784  *                / path-rootless
785  *                / path-empty
786  *
787  * Returns 0 or the error code
788  */
789 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)790 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
791 {
792     const char *cur;
793     int ret;
794 
795     cur = *str;
796 
797     if ((*cur == '/') && (*(cur + 1) == '/')) {
798         cur += 2;
799 	ret = xmlParse3986Authority(uri, &cur);
800 	if (ret != 0) return(ret);
801         /*
802          * An empty server is marked with a special URI value.
803          */
804 	if ((uri->server == NULL) && (uri->port == PORT_EMPTY))
805 	    uri->port = PORT_EMPTY_SERVER;
806 	ret = xmlParse3986PathAbEmpty(uri, &cur);
807 	if (ret != 0) return(ret);
808 	*str = cur;
809 	return(0);
810     } else if (*cur == '/') {
811         ret = xmlParse3986PathAbsolute(uri, &cur);
812 	if (ret != 0) return(ret);
813     } else if (ISA_PCHAR(uri, cur)) {
814         ret = xmlParse3986PathRootless(uri, &cur);
815 	if (ret != 0) return(ret);
816     } else {
817 	/* path-empty is effectively empty */
818 	if (uri != NULL) {
819 	    if (uri->path != NULL) xmlFree(uri->path);
820 	    uri->path = NULL;
821 	}
822     }
823     *str = cur;
824     return (0);
825 }
826 
827 /**
828  * xmlParse3986RelativeRef:
829  * @uri:  pointer to an URI structure
830  * @str:  the string to analyze
831  *
832  * Parse an URI string and fills in the appropriate fields
833  * of the @uri structure
834  *
835  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
836  * relative-part = "//" authority path-abempty
837  *               / path-absolute
838  *               / path-noscheme
839  *               / path-empty
840  *
841  * Returns 0 or the error code
842  */
843 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)844 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
845     int ret;
846 
847     if ((*str == '/') && (*(str + 1) == '/')) {
848         str += 2;
849 	ret = xmlParse3986Authority(uri, &str);
850 	if (ret != 0) return(ret);
851 	ret = xmlParse3986PathAbEmpty(uri, &str);
852 	if (ret != 0) return(ret);
853     } else if (*str == '/') {
854 	ret = xmlParse3986PathAbsolute(uri, &str);
855 	if (ret != 0) return(ret);
856     } else if (ISA_PCHAR(uri, str)) {
857         ret = xmlParse3986PathNoScheme(uri, &str);
858 	if (ret != 0) return(ret);
859     } else {
860 	/* path-empty is effectively empty */
861 	if (uri != NULL) {
862 	    if (uri->path != NULL) xmlFree(uri->path);
863 	    uri->path = NULL;
864 	}
865     }
866 
867     if (*str == '?') {
868 	str++;
869 	ret = xmlParse3986Query(uri, &str);
870 	if (ret != 0) return(ret);
871     }
872     if (*str == '#') {
873 	str++;
874 	ret = xmlParse3986Fragment(uri, &str);
875 	if (ret != 0) return(ret);
876     }
877     if (*str != 0) {
878 	xmlCleanURI(uri);
879 	return(1);
880     }
881     return(0);
882 }
883 
884 
885 /**
886  * xmlParse3986URI:
887  * @uri:  pointer to an URI structure
888  * @str:  the string to analyze
889  *
890  * Parse an URI string and fills in the appropriate fields
891  * of the @uri structure
892  *
893  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
894  *
895  * Returns 0 or the error code
896  */
897 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)898 xmlParse3986URI(xmlURIPtr uri, const char *str) {
899     int ret;
900 
901     ret = xmlParse3986Scheme(uri, &str);
902     if (ret != 0) return(ret);
903     if (*str != ':') {
904 	return(1);
905     }
906     str++;
907     ret = xmlParse3986HierPart(uri, &str);
908     if (ret != 0) return(ret);
909     if (*str == '?') {
910 	str++;
911 	ret = xmlParse3986Query(uri, &str);
912 	if (ret != 0) return(ret);
913     }
914     if (*str == '#') {
915 	str++;
916 	ret = xmlParse3986Fragment(uri, &str);
917 	if (ret != 0) return(ret);
918     }
919     if (*str != 0) {
920 	xmlCleanURI(uri);
921 	return(1);
922     }
923     return(0);
924 }
925 
926 /**
927  * xmlParse3986URIReference:
928  * @uri:  pointer to an URI structure
929  * @str:  the string to analyze
930  *
931  * Parse an URI reference string and fills in the appropriate fields
932  * of the @uri structure
933  *
934  * URI-reference = URI / relative-ref
935  *
936  * Returns 0 or the error code
937  */
938 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)939 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
940     int ret;
941 
942     if (str == NULL)
943 	return(-1);
944     xmlCleanURI(uri);
945 
946     /*
947      * Try first to parse absolute refs, then fallback to relative if
948      * it fails.
949      */
950     ret = xmlParse3986URI(uri, str);
951     if (ret < 0)
952         return(ret);
953     if (ret != 0) {
954 	xmlCleanURI(uri);
955         ret = xmlParse3986RelativeRef(uri, str);
956 	if (ret != 0) {
957 	    xmlCleanURI(uri);
958 	    return(ret);
959 	}
960     }
961     return(0);
962 }
963 
964 /**
965  * xmlParseURISafe:
966  * @str:  the URI string to analyze
967  * @uriOut:  optional pointer to parsed URI
968  *
969  * Parse an URI based on RFC 3986
970  *
971  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
972  *
973  * Available since 2.13.0.
974  *
975  * Returns 0 on success, an error code (typically 1) if the URI is invalid
976  * or -1 if a memory allocation failed.
977  */
978 int
xmlParseURISafe(const char * str,xmlURIPtr * uriOut)979 xmlParseURISafe(const char *str, xmlURIPtr *uriOut) {
980     xmlURIPtr uri;
981     int ret;
982 
983     if (uriOut == NULL)
984         return(1);
985     *uriOut = NULL;
986     if (str == NULL)
987 	return(1);
988 
989     uri = xmlCreateURI();
990     if (uri == NULL)
991         return(-1);
992 
993     ret = xmlParse3986URIReference(uri, str);
994     if (ret) {
995         xmlFreeURI(uri);
996         return(ret);
997     }
998 
999     *uriOut = uri;
1000     return(0);
1001 }
1002 
1003 /**
1004  * xmlParseURI:
1005  * @str:  the URI string to analyze
1006  *
1007  * Parse an URI based on RFC 3986
1008  *
1009  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1010  *
1011  * Returns a newly built xmlURIPtr or NULL in case of error
1012  */
1013 xmlURIPtr
xmlParseURI(const char * str)1014 xmlParseURI(const char *str) {
1015     xmlURIPtr uri;
1016     xmlParseURISafe(str, &uri);
1017     return(uri);
1018 }
1019 
1020 /**
1021  * xmlParseURIReference:
1022  * @uri:  pointer to an URI structure
1023  * @str:  the string to analyze
1024  *
1025  * Parse an URI reference string based on RFC 3986 and fills in the
1026  * appropriate fields of the @uri structure
1027  *
1028  * URI-reference = URI / relative-ref
1029  *
1030  * Returns 0 or the error code
1031  */
1032 int
xmlParseURIReference(xmlURIPtr uri,const char * str)1033 xmlParseURIReference(xmlURIPtr uri, const char *str) {
1034     return(xmlParse3986URIReference(uri, str));
1035 }
1036 
1037 /**
1038  * xmlParseURIRaw:
1039  * @str:  the URI string to analyze
1040  * @raw:  if 1 unescaping of URI pieces are disabled
1041  *
1042  * Parse an URI but allows to keep intact the original fragments.
1043  *
1044  * URI-reference = URI / relative-ref
1045  *
1046  * Returns a newly built xmlURIPtr or NULL in case of error
1047  */
1048 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)1049 xmlParseURIRaw(const char *str, int raw) {
1050     xmlURIPtr uri;
1051     int ret;
1052 
1053     if (str == NULL)
1054 	return(NULL);
1055     uri = xmlCreateURI();
1056     if (uri != NULL) {
1057         if (raw) {
1058 	    uri->cleanup |= XML_URI_NO_UNESCAPE;
1059 	}
1060 	ret = xmlParseURIReference(uri, str);
1061         if (ret) {
1062 	    xmlFreeURI(uri);
1063 	    return(NULL);
1064 	}
1065     }
1066     return(uri);
1067 }
1068 
1069 /************************************************************************
1070  *									*
1071  *			Generic URI structure functions			*
1072  *									*
1073  ************************************************************************/
1074 
1075 /**
1076  * xmlCreateURI:
1077  *
1078  * Simply creates an empty xmlURI
1079  *
1080  * Returns the new structure or NULL in case of error
1081  */
1082 xmlURIPtr
xmlCreateURI(void)1083 xmlCreateURI(void) {
1084     xmlURIPtr ret;
1085 
1086     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1087     if (ret == NULL)
1088 	return(NULL);
1089     memset(ret, 0, sizeof(xmlURI));
1090     ret->port = PORT_EMPTY;
1091     return(ret);
1092 }
1093 
1094 /**
1095  * xmlSaveUriRealloc:
1096  *
1097  * Function to handle properly a reallocation when saving an URI
1098  * Also imposes some limit on the length of an URI string output
1099  */
1100 static xmlChar *
xmlSaveUriRealloc(xmlChar * ret,int * max)1101 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1102     xmlChar *temp;
1103     int tmp;
1104 
1105     if (*max > MAX_URI_LENGTH)
1106         return(NULL);
1107     tmp = *max * 2;
1108     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1109     if (temp == NULL)
1110         return(NULL);
1111     *max = tmp;
1112     return(temp);
1113 }
1114 
1115 /**
1116  * xmlSaveUri:
1117  * @uri:  pointer to an xmlURI
1118  *
1119  * Save the URI as an escaped string
1120  *
1121  * Returns a new string (to be deallocated by caller)
1122  */
1123 xmlChar *
xmlSaveUri(xmlURIPtr uri)1124 xmlSaveUri(xmlURIPtr uri) {
1125     xmlChar *ret = NULL;
1126     xmlChar *temp;
1127     const char *p;
1128     int len;
1129     int max;
1130 
1131     if (uri == NULL) return(NULL);
1132 
1133 
1134     max = 80;
1135     ret = xmlMalloc(max + 1);
1136     if (ret == NULL)
1137 	return(NULL);
1138     len = 0;
1139 
1140     if (uri->scheme != NULL) {
1141 	p = uri->scheme;
1142 	while (*p != 0) {
1143 	    if (len >= max) {
1144                 temp = xmlSaveUriRealloc(ret, &max);
1145                 if (temp == NULL) goto mem_error;
1146 		ret = temp;
1147 	    }
1148 	    ret[len++] = *p++;
1149 	}
1150 	if (len >= max) {
1151             temp = xmlSaveUriRealloc(ret, &max);
1152             if (temp == NULL) goto mem_error;
1153             ret = temp;
1154 	}
1155 	ret[len++] = ':';
1156     }
1157     if (uri->opaque != NULL) {
1158 	p = uri->opaque;
1159 	while (*p != 0) {
1160 	    if (len + 3 >= max) {
1161                 temp = xmlSaveUriRealloc(ret, &max);
1162                 if (temp == NULL) goto mem_error;
1163                 ret = temp;
1164 	    }
1165 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1166 		ret[len++] = *p++;
1167 	    else {
1168 		int val = *(unsigned char *)p++;
1169 		int hi = val / 0x10, lo = val % 0x10;
1170 		ret[len++] = '%';
1171 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1172 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1173 	    }
1174 	}
1175     } else {
1176 	if ((uri->server != NULL) || (uri->port != PORT_EMPTY)) {
1177 	    if (len + 3 >= max) {
1178                 temp = xmlSaveUriRealloc(ret, &max);
1179                 if (temp == NULL) goto mem_error;
1180                 ret = temp;
1181 	    }
1182 	    ret[len++] = '/';
1183 	    ret[len++] = '/';
1184 	    if (uri->user != NULL) {
1185 		p = uri->user;
1186 		while (*p != 0) {
1187 		    if (len + 3 >= max) {
1188                         temp = xmlSaveUriRealloc(ret, &max);
1189                         if (temp == NULL) goto mem_error;
1190                         ret = temp;
1191 		    }
1192 		    if ((IS_UNRESERVED(*(p))) ||
1193 			((*(p) == ';')) || ((*(p) == ':')) ||
1194 			((*(p) == '&')) || ((*(p) == '=')) ||
1195 			((*(p) == '+')) || ((*(p) == '$')) ||
1196 			((*(p) == ',')))
1197 			ret[len++] = *p++;
1198 		    else {
1199 			int val = *(unsigned char *)p++;
1200 			int hi = val / 0x10, lo = val % 0x10;
1201 			ret[len++] = '%';
1202 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1203 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1204 		    }
1205 		}
1206 		if (len + 3 >= max) {
1207                     temp = xmlSaveUriRealloc(ret, &max);
1208                     if (temp == NULL) goto mem_error;
1209                     ret = temp;
1210 		}
1211 		ret[len++] = '@';
1212 	    }
1213 	    if (uri->server != NULL) {
1214 		p = uri->server;
1215 		while (*p != 0) {
1216 		    if (len >= max) {
1217 			temp = xmlSaveUriRealloc(ret, &max);
1218 			if (temp == NULL) goto mem_error;
1219 			ret = temp;
1220 		    }
1221                     /* TODO: escaping? */
1222 		    ret[len++] = (xmlChar) *p++;
1223 		}
1224 	    }
1225             if (uri->port > 0) {
1226                 if (len + 10 >= max) {
1227                     temp = xmlSaveUriRealloc(ret, &max);
1228                     if (temp == NULL) goto mem_error;
1229                     ret = temp;
1230                 }
1231                 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1232             }
1233 	} else if (uri->authority != NULL) {
1234 	    if (len + 3 >= max) {
1235                 temp = xmlSaveUriRealloc(ret, &max);
1236                 if (temp == NULL) goto mem_error;
1237                 ret = temp;
1238 	    }
1239 	    ret[len++] = '/';
1240 	    ret[len++] = '/';
1241 	    p = uri->authority;
1242 	    while (*p != 0) {
1243 		if (len + 3 >= max) {
1244                     temp = xmlSaveUriRealloc(ret, &max);
1245                     if (temp == NULL) goto mem_error;
1246                     ret = temp;
1247 		}
1248 		if ((IS_UNRESERVED(*(p))) ||
1249                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1250                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1251                     ((*(p) == '=')) || ((*(p) == '+')))
1252 		    ret[len++] = *p++;
1253 		else {
1254 		    int val = *(unsigned char *)p++;
1255 		    int hi = val / 0x10, lo = val % 0x10;
1256 		    ret[len++] = '%';
1257 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1258 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1259 		}
1260 	    }
1261 	} else if (uri->scheme != NULL) {
1262 	    if (len + 3 >= max) {
1263                 temp = xmlSaveUriRealloc(ret, &max);
1264                 if (temp == NULL) goto mem_error;
1265                 ret = temp;
1266 	    }
1267 	}
1268 	if (uri->path != NULL) {
1269 	    p = uri->path;
1270 	    /*
1271 	     * the colon in file:///d: should not be escaped or
1272 	     * Windows accesses fail later.
1273 	     */
1274 	    if ((uri->scheme != NULL) &&
1275 		(p[0] == '/') &&
1276 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1277 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1278 		(p[2] == ':') &&
1279 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1280 		if (len + 3 >= max) {
1281                     temp = xmlSaveUriRealloc(ret, &max);
1282                     if (temp == NULL) goto mem_error;
1283                     ret = temp;
1284 		}
1285 		ret[len++] = *p++;
1286 		ret[len++] = *p++;
1287 		ret[len++] = *p++;
1288 	    }
1289 	    while (*p != 0) {
1290 		if (len + 3 >= max) {
1291                     temp = xmlSaveUriRealloc(ret, &max);
1292                     if (temp == NULL) goto mem_error;
1293                     ret = temp;
1294 		}
1295 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1296                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1297 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1298 	            ((*(p) == ',')))
1299 		    ret[len++] = *p++;
1300 		else {
1301 		    int val = *(unsigned char *)p++;
1302 		    int hi = val / 0x10, lo = val % 0x10;
1303 		    ret[len++] = '%';
1304 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1305 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1306 		}
1307 	    }
1308 	}
1309 	if (uri->query_raw != NULL) {
1310 	    if (len + 1 >= max) {
1311                 temp = xmlSaveUriRealloc(ret, &max);
1312                 if (temp == NULL) goto mem_error;
1313                 ret = temp;
1314 	    }
1315 	    ret[len++] = '?';
1316 	    p = uri->query_raw;
1317 	    while (*p != 0) {
1318 		if (len + 1 >= max) {
1319                     temp = xmlSaveUriRealloc(ret, &max);
1320                     if (temp == NULL) goto mem_error;
1321                     ret = temp;
1322 		}
1323 		ret[len++] = *p++;
1324 	    }
1325 	} else if (uri->query != NULL) {
1326 	    if (len + 3 >= max) {
1327                 temp = xmlSaveUriRealloc(ret, &max);
1328                 if (temp == NULL) goto mem_error;
1329                 ret = temp;
1330 	    }
1331 	    ret[len++] = '?';
1332 	    p = uri->query;
1333 	    while (*p != 0) {
1334 		if (len + 3 >= max) {
1335                     temp = xmlSaveUriRealloc(ret, &max);
1336                     if (temp == NULL) goto mem_error;
1337                     ret = temp;
1338 		}
1339 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1340 		    ret[len++] = *p++;
1341 		else {
1342 		    int val = *(unsigned char *)p++;
1343 		    int hi = val / 0x10, lo = val % 0x10;
1344 		    ret[len++] = '%';
1345 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1346 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1347 		}
1348 	    }
1349 	}
1350     }
1351     if (uri->fragment != NULL) {
1352 	if (len + 3 >= max) {
1353             temp = xmlSaveUriRealloc(ret, &max);
1354             if (temp == NULL) goto mem_error;
1355             ret = temp;
1356 	}
1357 	ret[len++] = '#';
1358 	p = uri->fragment;
1359 	while (*p != 0) {
1360 	    if (len + 3 >= max) {
1361                 temp = xmlSaveUriRealloc(ret, &max);
1362                 if (temp == NULL) goto mem_error;
1363                 ret = temp;
1364 	    }
1365 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1366 		ret[len++] = *p++;
1367 	    else {
1368 		int val = *(unsigned char *)p++;
1369 		int hi = val / 0x10, lo = val % 0x10;
1370 		ret[len++] = '%';
1371 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1372 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1373 	    }
1374 	}
1375     }
1376     if (len >= max) {
1377         temp = xmlSaveUriRealloc(ret, &max);
1378         if (temp == NULL) goto mem_error;
1379         ret = temp;
1380     }
1381     ret[len] = 0;
1382     return(ret);
1383 
1384 mem_error:
1385     xmlFree(ret);
1386     return(NULL);
1387 }
1388 
1389 /**
1390  * xmlPrintURI:
1391  * @stream:  a FILE* for the output
1392  * @uri:  pointer to an xmlURI
1393  *
1394  * Prints the URI in the stream @stream.
1395  */
1396 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1397 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1398     xmlChar *out;
1399 
1400     out = xmlSaveUri(uri);
1401     if (out != NULL) {
1402 	fprintf(stream, "%s", (char *) out);
1403 	xmlFree(out);
1404     }
1405 }
1406 
1407 /**
1408  * xmlCleanURI:
1409  * @uri:  pointer to an xmlURI
1410  *
1411  * Make sure the xmlURI struct is free of content
1412  */
1413 static void
xmlCleanURI(xmlURIPtr uri)1414 xmlCleanURI(xmlURIPtr uri) {
1415     if (uri == NULL) return;
1416 
1417     if (uri->scheme != NULL) xmlFree(uri->scheme);
1418     uri->scheme = NULL;
1419     if (uri->server != NULL) xmlFree(uri->server);
1420     uri->server = NULL;
1421     if (uri->user != NULL) xmlFree(uri->user);
1422     uri->user = NULL;
1423     if (uri->path != NULL) xmlFree(uri->path);
1424     uri->path = NULL;
1425     if (uri->fragment != NULL) xmlFree(uri->fragment);
1426     uri->fragment = NULL;
1427     if (uri->opaque != NULL) xmlFree(uri->opaque);
1428     uri->opaque = NULL;
1429     if (uri->authority != NULL) xmlFree(uri->authority);
1430     uri->authority = NULL;
1431     if (uri->query != NULL) xmlFree(uri->query);
1432     uri->query = NULL;
1433     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1434     uri->query_raw = NULL;
1435 }
1436 
1437 /**
1438  * xmlFreeURI:
1439  * @uri:  pointer to an xmlURI
1440  *
1441  * Free up the xmlURI struct
1442  */
1443 void
xmlFreeURI(xmlURIPtr uri)1444 xmlFreeURI(xmlURIPtr uri) {
1445     if (uri == NULL) return;
1446 
1447     if (uri->scheme != NULL) xmlFree(uri->scheme);
1448     if (uri->server != NULL) xmlFree(uri->server);
1449     if (uri->user != NULL) xmlFree(uri->user);
1450     if (uri->path != NULL) xmlFree(uri->path);
1451     if (uri->fragment != NULL) xmlFree(uri->fragment);
1452     if (uri->opaque != NULL) xmlFree(uri->opaque);
1453     if (uri->authority != NULL) xmlFree(uri->authority);
1454     if (uri->query != NULL) xmlFree(uri->query);
1455     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1456     xmlFree(uri);
1457 }
1458 
1459 /************************************************************************
1460  *									*
1461  *			Helper functions				*
1462  *									*
1463  ************************************************************************/
1464 
1465 static int
xmlIsPathSeparator(int c,int isFile)1466 xmlIsPathSeparator(int c, int isFile) {
1467     (void) isFile;
1468 
1469     if (c == '/')
1470         return(1);
1471 
1472 #if defined(_WIN32) || defined(__CYGWIN__)
1473     if (isFile && (c == '\\'))
1474         return(1);
1475 #endif
1476 
1477     return(0);
1478 }
1479 
1480 /**
1481  * xmlNormalizePath:
1482  * @path:  pointer to the path string
1483  * @isFile:  true for filesystem paths, false for URIs
1484  *
1485  * Normalize a filesystem path or URI.
1486  *
1487  * Returns 0 or an error code
1488  */
1489 static int
xmlNormalizePath(char * path,int isFile)1490 xmlNormalizePath(char *path, int isFile) {
1491     char *cur, *out;
1492     int numSeg = 0;
1493 
1494     if (path == NULL)
1495 	return(-1);
1496 
1497     cur = path;
1498     out = path;
1499 
1500     if (*cur == 0)
1501         return(0);
1502 
1503     if (xmlIsPathSeparator(*cur, isFile)) {
1504         cur++;
1505         *out++ = '/';
1506     }
1507 
1508     while (*cur != 0) {
1509         /*
1510          * At this point, out is either empty or ends with a separator.
1511          * Collapse multiple separators first.
1512          */
1513         while (xmlIsPathSeparator(*cur, isFile)) {
1514 #if defined(_WIN32) || defined(__CYGWIN__)
1515             /* Allow two separators at start of path */
1516             if ((isFile) && (out == path + 1))
1517                 *out++ = '/';
1518 #endif
1519             cur++;
1520         }
1521 
1522         if (*cur == '.') {
1523             if (cur[1] == 0) {
1524                 /* Ignore "." at end of path */
1525                 break;
1526             } else if (xmlIsPathSeparator(cur[1], isFile)) {
1527                 /* Skip "./" */
1528                 cur += 2;
1529                 continue;
1530             } else if ((cur[1] == '.') &&
1531                        ((cur[2] == 0) || xmlIsPathSeparator(cur[2], isFile))) {
1532                 if (numSeg > 0) {
1533                     /* Handle ".." by removing last segment */
1534                     do {
1535                         out--;
1536                     } while ((out > path) &&
1537                              !xmlIsPathSeparator(out[-1], isFile));
1538                     numSeg--;
1539 
1540                     if (cur[2] == 0)
1541                         break;
1542                     cur += 3;
1543                     continue;
1544                 } else if (out[0] == '/') {
1545                     /* Ignore extraneous ".." in absolute paths */
1546                     if (cur[2] == 0)
1547                         break;
1548                     cur += 3;
1549                     continue;
1550                 } else {
1551                     /* Keep "../" at start of relative path */
1552                     numSeg--;
1553                 }
1554             }
1555         }
1556 
1557         /* Copy segment */
1558         while ((*cur != 0) && !xmlIsPathSeparator(*cur, isFile)) {
1559             *out++ = *cur++;
1560         }
1561 
1562         /* Copy separator */
1563         if (*cur != 0) {
1564             cur++;
1565             *out++ = '/';
1566         }
1567 
1568         numSeg++;
1569     }
1570 
1571     /* Keep "." if output is empty and it's a file */
1572     if ((isFile) && (out <= path))
1573         *out++ = '.';
1574     *out = 0;
1575 
1576     return(0);
1577 }
1578 
1579 /**
1580  * xmlNormalizeURIPath:
1581  * @path:  pointer to the path string
1582  *
1583  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1584  * Section 5.2, steps 6.c through 6.g.
1585  *
1586  * Normalization occurs directly on the string, no new allocation is done
1587  *
1588  * Returns 0 or an error code
1589  */
1590 int
xmlNormalizeURIPath(char * path)1591 xmlNormalizeURIPath(char *path) {
1592     return(xmlNormalizePath(path, 0));
1593 }
1594 
is_hex(char c)1595 static int is_hex(char c) {
1596     if (((c >= '0') && (c <= '9')) ||
1597         ((c >= 'a') && (c <= 'f')) ||
1598         ((c >= 'A') && (c <= 'F')))
1599 	return(1);
1600     return(0);
1601 }
1602 
1603 /**
1604  * xmlURIUnescapeString:
1605  * @str:  the string to unescape
1606  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1607  * @target:  optional destination buffer
1608  *
1609  * Unescaping routine, but does not check that the string is an URI. The
1610  * output is a direct unsigned char translation of %XX values (no encoding)
1611  * Note that the length of the result can only be smaller or same size as
1612  * the input string.
1613  *
1614  * Returns a copy of the string, but unescaped, will return NULL only in case
1615  * of error
1616  */
1617 char *
xmlURIUnescapeString(const char * str,int len,char * target)1618 xmlURIUnescapeString(const char *str, int len, char *target) {
1619     char *ret, *out;
1620     const char *in;
1621 
1622     if (str == NULL)
1623 	return(NULL);
1624     if (len <= 0) len = strlen(str);
1625     if (len < 0) return(NULL);
1626 
1627     if (target == NULL) {
1628 	ret = xmlMalloc(len + 1);
1629 	if (ret == NULL)
1630 	    return(NULL);
1631     } else
1632 	ret = target;
1633     in = str;
1634     out = ret;
1635     while(len > 0) {
1636 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1637             int c = 0;
1638 	    in++;
1639 	    if ((*in >= '0') && (*in <= '9'))
1640 	        c = (*in - '0');
1641 	    else if ((*in >= 'a') && (*in <= 'f'))
1642 	        c = (*in - 'a') + 10;
1643 	    else if ((*in >= 'A') && (*in <= 'F'))
1644 	        c = (*in - 'A') + 10;
1645 	    in++;
1646 	    if ((*in >= '0') && (*in <= '9'))
1647 	        c = c * 16 + (*in - '0');
1648 	    else if ((*in >= 'a') && (*in <= 'f'))
1649 	        c = c * 16 + (*in - 'a') + 10;
1650 	    else if ((*in >= 'A') && (*in <= 'F'))
1651 	        c = c * 16 + (*in - 'A') + 10;
1652 	    in++;
1653 	    len -= 3;
1654             /* Explicit sign change */
1655 	    *out++ = (char) c;
1656 	} else {
1657 	    *out++ = *in++;
1658 	    len--;
1659 	}
1660     }
1661     *out = 0;
1662     return(ret);
1663 }
1664 
1665 /**
1666  * xmlURIEscapeStr:
1667  * @str:  string to escape
1668  * @list: exception list string of chars not to escape
1669  *
1670  * This routine escapes a string to hex, ignoring unreserved characters
1671  * a-z, A-Z, 0-9, "-._~", a few sub-delims "!*'()", the gen-delim "@"
1672  * (why?) and the characters in the exception list.
1673  *
1674  * Returns a new escaped string or NULL in case of error.
1675  */
1676 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1677 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1678     xmlChar *ret, ch;
1679     xmlChar *temp;
1680     const xmlChar *in;
1681     int len, out;
1682 
1683     if (str == NULL)
1684 	return(NULL);
1685     if (str[0] == 0)
1686 	return(xmlStrdup(str));
1687     len = xmlStrlen(str);
1688 
1689     len += 20;
1690     ret = xmlMalloc(len);
1691     if (ret == NULL)
1692 	return(NULL);
1693     in = (const xmlChar *) str;
1694     out = 0;
1695     while(*in != 0) {
1696 	if (len - out <= 3) {
1697             if (len > INT_MAX / 2)
1698                 return(NULL);
1699             temp = xmlRealloc(ret, len * 2);
1700 	    if (temp == NULL) {
1701 		xmlFree(ret);
1702 		return(NULL);
1703 	    }
1704 	    ret = temp;
1705             len *= 2;
1706 	}
1707 
1708 	ch = *in;
1709 
1710 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1711 	    unsigned char val;
1712 	    ret[out++] = '%';
1713 	    val = ch >> 4;
1714 	    if (val <= 9)
1715 		ret[out++] = '0' + val;
1716 	    else
1717 		ret[out++] = 'A' + val - 0xA;
1718 	    val = ch & 0xF;
1719 	    if (val <= 9)
1720 		ret[out++] = '0' + val;
1721 	    else
1722 		ret[out++] = 'A' + val - 0xA;
1723 	    in++;
1724 	} else {
1725 	    ret[out++] = *in++;
1726 	}
1727 
1728     }
1729     ret[out] = 0;
1730     return(ret);
1731 }
1732 
1733 /**
1734  * xmlURIEscape:
1735  * @str:  the string of the URI to escape
1736  *
1737  * Escaping routine, does not do validity checks !
1738  * It will try to escape the chars needing this, but this is heuristic
1739  * based it's impossible to be sure.
1740  *
1741  * Returns an copy of the string, but escaped
1742  *
1743  * 25 May 2001
1744  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1745  * according to RFC2396.
1746  *   - Carl Douglas
1747  */
1748 xmlChar *
xmlURIEscape(const xmlChar * str)1749 xmlURIEscape(const xmlChar * str)
1750 {
1751     xmlChar *ret, *segment = NULL;
1752     xmlURIPtr uri;
1753     int ret2;
1754 
1755     if (str == NULL)
1756         return (NULL);
1757 
1758     uri = xmlCreateURI();
1759     if (uri != NULL) {
1760 	/*
1761 	 * Allow escaping errors in the unescaped form
1762 	 */
1763         uri->cleanup = XML_URI_ALLOW_UNWISE;
1764         ret2 = xmlParseURIReference(uri, (const char *)str);
1765         if (ret2) {
1766             xmlFreeURI(uri);
1767             return (NULL);
1768         }
1769     }
1770 
1771     if (!uri)
1772         return NULL;
1773 
1774     ret = NULL;
1775 
1776 #define NULLCHK(p) if(!p) { \
1777          xmlFreeURI(uri); \
1778          xmlFree(ret); \
1779          return NULL; } \
1780 
1781     if (uri->scheme) {
1782         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1783         NULLCHK(segment)
1784         ret = xmlStrcat(ret, segment);
1785         ret = xmlStrcat(ret, BAD_CAST ":");
1786         xmlFree(segment);
1787     }
1788 
1789     if (uri->authority) {
1790         segment =
1791             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1792         NULLCHK(segment)
1793         ret = xmlStrcat(ret, BAD_CAST "//");
1794         ret = xmlStrcat(ret, segment);
1795         xmlFree(segment);
1796     }
1797 
1798     if (uri->user) {
1799         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1800         NULLCHK(segment)
1801         ret = xmlStrcat(ret,BAD_CAST "//");
1802         ret = xmlStrcat(ret, segment);
1803         ret = xmlStrcat(ret, BAD_CAST "@");
1804         xmlFree(segment);
1805     }
1806 
1807     if (uri->server) {
1808         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1809         NULLCHK(segment)
1810         if (uri->user == NULL)
1811             ret = xmlStrcat(ret, BAD_CAST "//");
1812         ret = xmlStrcat(ret, segment);
1813         xmlFree(segment);
1814     }
1815 
1816     if (uri->port > 0) {
1817         xmlChar port[11];
1818 
1819         snprintf((char *) port, 11, "%d", uri->port);
1820         ret = xmlStrcat(ret, BAD_CAST ":");
1821         ret = xmlStrcat(ret, port);
1822     }
1823 
1824     if (uri->path) {
1825         segment =
1826             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1827         NULLCHK(segment)
1828         ret = xmlStrcat(ret, segment);
1829         xmlFree(segment);
1830     }
1831 
1832     if (uri->query_raw) {
1833         ret = xmlStrcat(ret, BAD_CAST "?");
1834         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1835     }
1836     else if (uri->query) {
1837         segment =
1838             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1839         NULLCHK(segment)
1840         ret = xmlStrcat(ret, BAD_CAST "?");
1841         ret = xmlStrcat(ret, segment);
1842         xmlFree(segment);
1843     }
1844 
1845     if (uri->opaque) {
1846         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1847         NULLCHK(segment)
1848         ret = xmlStrcat(ret, segment);
1849         xmlFree(segment);
1850     }
1851 
1852     if (uri->fragment) {
1853         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1854         NULLCHK(segment)
1855         ret = xmlStrcat(ret, BAD_CAST "#");
1856         ret = xmlStrcat(ret, segment);
1857         xmlFree(segment);
1858     }
1859 
1860     xmlFreeURI(uri);
1861 #undef NULLCHK
1862 
1863     return (ret);
1864 }
1865 
1866 /************************************************************************
1867  *									*
1868  *			Public functions				*
1869  *									*
1870  ************************************************************************/
1871 
1872 static int
xmlIsAbsolutePath(const xmlChar * path)1873 xmlIsAbsolutePath(const xmlChar *path) {
1874     int c = path[0];
1875 
1876     if (xmlIsPathSeparator(c, 1))
1877         return(1);
1878 
1879 #if defined(_WIN32) || defined(__CYGWIN__)
1880     if ((((c >= 'A') && (c <= 'Z')) ||
1881          ((c >= 'a') && (c <= 'z'))) &&
1882         (path[1] == ':'))
1883         return(1);
1884 #endif
1885 
1886     return(0);
1887 }
1888 
1889 /**
1890  * xmlResolvePath:
1891  * @ref:  the filesystem path
1892  * @base:  the base value
1893  * @out:  pointer to result URI
1894  *
1895  * Resolves a filesystem path from a base path.
1896  *
1897  * Returns 0 on success, -1 if a memory allocation failed or an error
1898  * code if URI or base are invalid.
1899  */
1900 static int
xmlResolvePath(const xmlChar * escRef,const xmlChar * base,xmlChar ** out)1901 xmlResolvePath(const xmlChar *escRef, const xmlChar *base, xmlChar **out) {
1902     const xmlChar *fragment;
1903     xmlChar *tmp = NULL;
1904     xmlChar *ref = NULL;
1905     xmlChar *result = NULL;
1906     int ret = -1;
1907     int i;
1908 
1909     if (out == NULL)
1910         return(1);
1911     *out = NULL;
1912 
1913     if ((escRef == NULL) || (escRef[0] == 0)) {
1914         if ((base == NULL) || (base[0] == 0))
1915             return(1);
1916         ref = xmlStrdup(base);
1917         if (ref == NULL)
1918             goto err_memory;
1919         *out = ref;
1920         return(0);
1921     }
1922 
1923     /*
1924      * If a URI is resolved, we can assume it is a valid URI and not
1925      * a filesystem path. This means we have to unescape the part
1926      * before the fragment.
1927      */
1928     fragment = xmlStrchr(escRef, '#');
1929     if (fragment != NULL) {
1930         tmp = xmlStrndup(escRef, fragment - escRef);
1931         if (tmp == NULL)
1932             goto err_memory;
1933         escRef = tmp;
1934     }
1935 
1936     ref = (xmlChar *) xmlURIUnescapeString((char *) escRef, -1, NULL);
1937     if (ref == NULL)
1938         goto err_memory;
1939 
1940     if ((base == NULL) || (base[0] == 0))
1941         goto done;
1942 
1943     if (xmlIsAbsolutePath(ref))
1944         goto done;
1945 
1946     /*
1947      * Remove last segment from base
1948      */
1949     i = xmlStrlen(base);
1950     while ((i > 0) && !xmlIsPathSeparator(base[i-1], 1))
1951         i--;
1952 
1953     /*
1954      * Concatenate base and ref
1955      */
1956     if (i > 0) {
1957         int refLen = xmlStrlen(ref);
1958 
1959         result = xmlMalloc(i + refLen + 1);
1960         if (result == NULL)
1961             goto err_memory;
1962 
1963         memcpy(result, base, i);
1964         memcpy(result + i, ref, refLen + 1);
1965     }
1966 
1967     /*
1968      * Normalize
1969      */
1970     xmlNormalizePath((char *) result, 1);
1971 
1972 done:
1973     if (result == NULL) {
1974         result = ref;
1975         ref = NULL;
1976     }
1977 
1978     if (fragment != NULL) {
1979         result = xmlStrcat(result, fragment);
1980         if (result == NULL)
1981             goto err_memory;
1982     }
1983 
1984     *out = result;
1985     ret = 0;
1986 
1987 err_memory:
1988     xmlFree(tmp);
1989     xmlFree(ref);
1990     return(ret);
1991 }
1992 
1993 /**
1994  * xmlBuildURISafe:
1995  * @URI:  the URI instance found in the document
1996  * @base:  the base value
1997  * @valPtr:  pointer to result URI
1998  *
1999  * Computes he final URI of the reference done by checking that
2000  * the given URI is valid, and building the final URI using the
2001  * base URI. This is processed according to section 5.2 of the
2002  * RFC 2396
2003  *
2004  * 5.2. Resolving Relative References to Absolute Form
2005  *
2006  * Available since 2.13.0.
2007  *
2008  * Returns 0 on success, -1 if a memory allocation failed or an error
2009  * code if URI or base are invalid.
2010  */
2011 int
xmlBuildURISafe(const xmlChar * URI,const xmlChar * base,xmlChar ** valPtr)2012 xmlBuildURISafe(const xmlChar *URI, const xmlChar *base, xmlChar **valPtr) {
2013     xmlChar *val = NULL;
2014     int ret, len, indx, cur, out;
2015     xmlURIPtr ref = NULL;
2016     xmlURIPtr bas = NULL;
2017     xmlURIPtr res = NULL;
2018 
2019     if (valPtr == NULL)
2020         return(1);
2021     *valPtr = NULL;
2022 
2023     if (URI == NULL)
2024         return(1);
2025 
2026     if (base == NULL) {
2027         val = xmlStrdup(URI);
2028         if (val == NULL)
2029             return(-1);
2030         *valPtr = val;
2031         return(0);
2032     }
2033 
2034     /*
2035      * 1) The URI reference is parsed into the potential four components and
2036      *    fragment identifier, as described in Section 4.3.
2037      *
2038      *    NOTE that a completely empty URI is treated by modern browsers
2039      *    as a reference to "." rather than as a synonym for the current
2040      *    URI.  Should we do that here?
2041      */
2042     if (URI[0] != 0)
2043         ret = xmlParseURISafe((const char *) URI, &ref);
2044     else
2045         ret = 0;
2046     if (ret != 0)
2047 	goto done;
2048     if ((ref != NULL) && (ref->scheme != NULL)) {
2049 	/*
2050 	 * The URI is absolute don't modify.
2051 	 */
2052 	val = xmlStrdup(URI);
2053         if (val == NULL)
2054             ret = -1;
2055 	goto done;
2056     }
2057 
2058     /*
2059      * If base has no scheme or authority, it is assumed to be a
2060      * filesystem path.
2061      */
2062     if (xmlStrstr(base, BAD_CAST "://") == NULL) {
2063         xmlFreeURI(ref);
2064         return(xmlResolvePath(URI, base, valPtr));
2065     }
2066 
2067     ret = xmlParseURISafe((const char *) base, &bas);
2068     if (ret < 0)
2069         goto done;
2070     if (ret != 0) {
2071 	if (ref) {
2072             ret = 0;
2073 	    val = xmlSaveUri(ref);
2074             if (val == NULL)
2075                 ret = -1;
2076         }
2077 	goto done;
2078     }
2079     if (ref == NULL) {
2080 	/*
2081 	 * the base fragment must be ignored
2082 	 */
2083 	if (bas->fragment != NULL) {
2084 	    xmlFree(bas->fragment);
2085 	    bas->fragment = NULL;
2086 	}
2087 	val = xmlSaveUri(bas);
2088         if (val == NULL)
2089             ret = -1;
2090 	goto done;
2091     }
2092 
2093     /*
2094      * 2) If the path component is empty and the scheme, authority, and
2095      *    query components are undefined, then it is a reference to the
2096      *    current document and we are done.  Otherwise, the reference URI's
2097      *    query and fragment components are defined as found (or not found)
2098      *    within the URI reference and not inherited from the base URI.
2099      *
2100      *    NOTE that in modern browsers, the parsing differs from the above
2101      *    in the following aspect:  the query component is allowed to be
2102      *    defined while still treating this as a reference to the current
2103      *    document.
2104      */
2105     ret = -1;
2106     res = xmlCreateURI();
2107     if (res == NULL)
2108 	goto done;
2109     if ((ref->scheme == NULL) && (ref->path == NULL) &&
2110 	((ref->authority == NULL) && (ref->server == NULL) &&
2111          (ref->port == PORT_EMPTY))) {
2112 	if (bas->scheme != NULL) {
2113 	    res->scheme = xmlMemStrdup(bas->scheme);
2114             if (res->scheme == NULL)
2115                 goto done;
2116         }
2117 	if (bas->authority != NULL) {
2118 	    res->authority = xmlMemStrdup(bas->authority);
2119             if (res->authority == NULL)
2120                 goto done;
2121         } else {
2122 	    if (bas->server != NULL) {
2123 		res->server = xmlMemStrdup(bas->server);
2124                 if (res->server == NULL)
2125                     goto done;
2126             }
2127 	    if (bas->user != NULL) {
2128 		res->user = xmlMemStrdup(bas->user);
2129                 if (res->user == NULL)
2130                     goto done;
2131             }
2132 	    res->port = bas->port;
2133 	}
2134 	if (bas->path != NULL) {
2135 	    res->path = xmlMemStrdup(bas->path);
2136             if (res->path == NULL)
2137                 goto done;
2138         }
2139 	if (ref->query_raw != NULL) {
2140 	    res->query_raw = xmlMemStrdup (ref->query_raw);
2141             if (res->query_raw == NULL)
2142                 goto done;
2143         } else if (ref->query != NULL) {
2144 	    res->query = xmlMemStrdup(ref->query);
2145             if (res->query == NULL)
2146                 goto done;
2147         } else if (bas->query_raw != NULL) {
2148 	    res->query_raw = xmlMemStrdup(bas->query_raw);
2149             if (res->query_raw == NULL)
2150                 goto done;
2151         } else if (bas->query != NULL) {
2152 	    res->query = xmlMemStrdup(bas->query);
2153             if (res->query == NULL)
2154                 goto done;
2155         }
2156 	if (ref->fragment != NULL) {
2157 	    res->fragment = xmlMemStrdup(ref->fragment);
2158             if (res->fragment == NULL)
2159                 goto done;
2160         }
2161 	goto step_7;
2162     }
2163 
2164     /*
2165      * 3) If the scheme component is defined, indicating that the reference
2166      *    starts with a scheme name, then the reference is interpreted as an
2167      *    absolute URI and we are done.  Otherwise, the reference URI's
2168      *    scheme is inherited from the base URI's scheme component.
2169      */
2170     if (ref->scheme != NULL) {
2171 	val = xmlSaveUri(ref);
2172         if (val != NULL)
2173             ret = 0;
2174 	goto done;
2175     }
2176     if (bas->scheme != NULL) {
2177 	res->scheme = xmlMemStrdup(bas->scheme);
2178         if (res->scheme == NULL)
2179             goto done;
2180     }
2181 
2182     if (ref->query_raw != NULL) {
2183 	res->query_raw = xmlMemStrdup(ref->query_raw);
2184         if (res->query_raw == NULL)
2185             goto done;
2186     } else if (ref->query != NULL) {
2187 	res->query = xmlMemStrdup(ref->query);
2188         if (res->query == NULL)
2189             goto done;
2190     }
2191     if (ref->fragment != NULL) {
2192 	res->fragment = xmlMemStrdup(ref->fragment);
2193         if (res->fragment == NULL)
2194             goto done;
2195     }
2196 
2197     /*
2198      * 4) If the authority component is defined, then the reference is a
2199      *    network-path and we skip to step 7.  Otherwise, the reference
2200      *    URI's authority is inherited from the base URI's authority
2201      *    component, which will also be undefined if the URI scheme does not
2202      *    use an authority component.
2203      */
2204     if ((ref->authority != NULL) || (ref->server != NULL) ||
2205          (ref->port != PORT_EMPTY)) {
2206 	if (ref->authority != NULL) {
2207 	    res->authority = xmlMemStrdup(ref->authority);
2208             if (res->authority == NULL)
2209                 goto done;
2210         } else {
2211             if (ref->server != NULL) {
2212                 res->server = xmlMemStrdup(ref->server);
2213                 if (res->server == NULL)
2214                     goto done;
2215             }
2216 	    if (ref->user != NULL) {
2217 		res->user = xmlMemStrdup(ref->user);
2218                 if (res->user == NULL)
2219                     goto done;
2220             }
2221             res->port = ref->port;
2222 	}
2223 	if (ref->path != NULL) {
2224 	    res->path = xmlMemStrdup(ref->path);
2225             if (res->path == NULL)
2226                 goto done;
2227         }
2228 	goto step_7;
2229     }
2230     if (bas->authority != NULL) {
2231 	res->authority = xmlMemStrdup(bas->authority);
2232         if (res->authority == NULL)
2233             goto done;
2234     } else if ((bas->server != NULL) || (bas->port != PORT_EMPTY)) {
2235 	if (bas->server != NULL) {
2236 	    res->server = xmlMemStrdup(bas->server);
2237             if (res->server == NULL)
2238                 goto done;
2239         }
2240 	if (bas->user != NULL) {
2241 	    res->user = xmlMemStrdup(bas->user);
2242             if (res->user == NULL)
2243                 goto done;
2244         }
2245 	res->port = bas->port;
2246     }
2247 
2248     /*
2249      * 5) If the path component begins with a slash character ("/"), then
2250      *    the reference is an absolute-path and we skip to step 7.
2251      */
2252     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2253 	res->path = xmlMemStrdup(ref->path);
2254         if (res->path == NULL)
2255             goto done;
2256 	goto step_7;
2257     }
2258 
2259 
2260     /*
2261      * 6) If this step is reached, then we are resolving a relative-path
2262      *    reference.  The relative path needs to be merged with the base
2263      *    URI's path.  Although there are many ways to do this, we will
2264      *    describe a simple method using a separate string buffer.
2265      *
2266      * Allocate a buffer large enough for the result string.
2267      */
2268     len = 2; /* extra / and 0 */
2269     if (ref->path != NULL)
2270 	len += strlen(ref->path);
2271     if (bas->path != NULL)
2272 	len += strlen(bas->path);
2273     res->path = xmlMalloc(len);
2274     if (res->path == NULL)
2275 	goto done;
2276     res->path[0] = 0;
2277 
2278     /*
2279      * a) All but the last segment of the base URI's path component is
2280      *    copied to the buffer.  In other words, any characters after the
2281      *    last (right-most) slash character, if any, are excluded.
2282      */
2283     cur = 0;
2284     out = 0;
2285     if (bas->path != NULL) {
2286 	while (bas->path[cur] != 0) {
2287 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2288 		cur++;
2289 	    if (bas->path[cur] == 0)
2290 		break;
2291 
2292 	    cur++;
2293 	    while (out < cur) {
2294 		res->path[out] = bas->path[out];
2295 		out++;
2296 	    }
2297 	}
2298     }
2299     res->path[out] = 0;
2300 
2301     /*
2302      * b) The reference's path component is appended to the buffer
2303      *    string.
2304      */
2305     if (ref->path != NULL && ref->path[0] != 0) {
2306 	indx = 0;
2307 	/*
2308 	 * Ensure the path includes a '/'
2309 	 */
2310 	if ((out == 0) && ((bas->server != NULL) || bas->port != PORT_EMPTY))
2311 	    res->path[out++] = '/';
2312 	while (ref->path[indx] != 0) {
2313 	    res->path[out++] = ref->path[indx++];
2314 	}
2315     }
2316     res->path[out] = 0;
2317 
2318     /*
2319      * Steps c) to h) are really path normalization steps
2320      */
2321     xmlNormalizeURIPath(res->path);
2322 
2323 step_7:
2324 
2325     /*
2326      * 7) The resulting URI components, including any inherited from the
2327      *    base URI, are recombined to give the absolute form of the URI
2328      *    reference.
2329      */
2330     val = xmlSaveUri(res);
2331     if (val != NULL)
2332         ret = 0;
2333 
2334 done:
2335     if (ref != NULL)
2336 	xmlFreeURI(ref);
2337     if (bas != NULL)
2338 	xmlFreeURI(bas);
2339     if (res != NULL)
2340 	xmlFreeURI(res);
2341     *valPtr = val;
2342     return(ret);
2343 }
2344 
2345 /**
2346  * xmlBuildURI:
2347  * @URI:  the URI instance found in the document
2348  * @base:  the base value
2349  *
2350  * Computes he final URI of the reference done by checking that
2351  * the given URI is valid, and building the final URI using the
2352  * base URI. This is processed according to section 5.2 of the
2353  * RFC 2396
2354  *
2355  * 5.2. Resolving Relative References to Absolute Form
2356  *
2357  * Returns a new URI string (to be freed by the caller) or NULL in case
2358  *         of error.
2359  */
2360 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)2361 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
2362     xmlChar *out;
2363 
2364     xmlBuildURISafe(URI, base, &out);
2365     return(out);
2366 }
2367 
2368 static int
xmlParseUriOrPath(const char * str,xmlURIPtr * out,int * drive)2369 xmlParseUriOrPath(const char *str, xmlURIPtr *out, int *drive) {
2370     xmlURIPtr uri;
2371     char *buf = NULL;
2372     int ret;
2373 
2374     *out = NULL;
2375     *drive = 0;
2376 
2377     uri = xmlCreateURI();
2378     if (uri == NULL) {
2379         ret = -1;
2380 	goto done;
2381     }
2382 
2383     if (xmlStrstr(BAD_CAST str, BAD_CAST "://") == NULL) {
2384         const char *path;
2385         size_t pathSize;
2386         int prependSlash = 0;
2387 
2388         buf = xmlMemStrdup(str);
2389         if (buf == NULL) {
2390             ret = -1;
2391             goto done;
2392         }
2393         xmlNormalizePath(buf, /* isFile */ 1);
2394 
2395         path = buf;
2396 
2397         if (xmlIsAbsolutePath(BAD_CAST buf)) {
2398 #if defined(_WIN32) || defined(__CYGWIN__)
2399             const char *server = NULL;
2400             int isFileScheme = 0;
2401 #endif
2402 
2403 #if defined(_WIN32) || defined(__CYGWIN__)
2404             if (strncmp(buf, "//?/UNC/", 8) == 0) {
2405                 server = buf + 8;
2406                 isFileScheme = 1;
2407             } else if (strncmp(buf, "//?/", 4) == 0) {
2408                 path = buf + 3;
2409                 isFileScheme = 1;
2410             } else if (strncmp(buf, "//", 2) == 0) {
2411                 server = buf + 2;
2412                 isFileScheme = 1;
2413             }
2414 
2415             if (server != NULL) {
2416                 const char *end = strchr(server, '/');
2417 
2418                 if (end == NULL) {
2419                     uri->server = xmlMemStrdup(server);
2420                     path = "/";
2421                 } else {
2422                     uri->server = (char *) xmlStrndup(BAD_CAST server,
2423                                                       end - server);
2424                     path = end;
2425                 }
2426                 if (uri->server == NULL) {
2427                     ret = -1;
2428                     goto done;
2429                 }
2430             }
2431 
2432             if ((((path[0] >= 'A') && (path[0] <= 'Z')) ||
2433                  ((path[0] >= 'a') && (path[0] <= 'z'))) &&
2434                 (path[1] == ':')) {
2435                 prependSlash = 1;
2436                 isFileScheme = 1;
2437             }
2438 
2439             if (isFileScheme) {
2440                 uri->scheme = xmlMemStrdup("file");
2441                 if (uri->scheme == NULL) {
2442                     ret = -1;
2443                     goto done;
2444                 }
2445 
2446                 if (uri->server == NULL)
2447                     uri->port = PORT_EMPTY_SERVER;
2448             }
2449 #endif
2450         }
2451 
2452         pathSize = strlen(path);
2453         uri->path = xmlMalloc(pathSize + prependSlash + 1);
2454         if (uri->path == NULL) {
2455             ret = -1;
2456             goto done;
2457         }
2458         if (prependSlash) {
2459             uri->path[0] = '/';
2460             memcpy(uri->path + 1, path, pathSize + 1);
2461         } else {
2462             memcpy(uri->path, path, pathSize + 1);
2463         }
2464     } else {
2465 	ret = xmlParseURIReference(uri, str);
2466 	if (ret != 0)
2467 	    goto done;
2468 
2469         xmlNormalizePath(uri->path, /* isFile */ 0);
2470     }
2471 
2472 #if defined(_WIN32) || defined(__CYGWIN__)
2473     if ((uri->path[0] == '/') &&
2474         (((uri->path[1] >= 'A') && (uri->path[1] <= 'Z')) ||
2475          ((uri->path[1] >= 'a') && (uri->path[1] <= 'z'))) &&
2476         (uri->path[2] == ':'))
2477         *drive = uri->path[1];
2478 #endif
2479 
2480     *out = uri;
2481     uri = NULL;
2482     ret = 0;
2483 
2484 done:
2485     xmlFreeURI(uri);
2486     xmlFree(buf);
2487 
2488     return(ret);
2489 }
2490 
2491 /**
2492  * xmlBuildRelativeURISafe:
2493  * @URI:  the URI reference under consideration
2494  * @base:  the base value
2495  * @valPtr:  pointer to result URI
2496  *
2497  * Expresses the URI of the reference in terms relative to the
2498  * base. Some examples of this operation include:
2499  *
2500  *     base = "http://site1.com/docs/book1.html"
2501  *        URI input                        URI returned
2502  *     http://site1.com/docs/pic1.gif   pic1.gif
2503  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2504  *
2505  *     base = "docs/book1.html"
2506  *        URI input                        URI returned
2507  *     docs/pic1.gif                    pic1.gif
2508  *     docs/img/pic1.gif                img/pic1.gif
2509  *     img/pic1.gif                     ../img/pic1.gif
2510  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2511  *
2512  * Available since 2.13.0.
2513  *
2514  * Returns 0 on success, -1 if a memory allocation failed or an error
2515  * code if URI or base are invalid.
2516  */
2517 int
xmlBuildRelativeURISafe(const xmlChar * URI,const xmlChar * base,xmlChar ** valPtr)2518 xmlBuildRelativeURISafe(const xmlChar * URI, const xmlChar * base,
2519                         xmlChar **valPtr)
2520 {
2521     xmlChar *val = NULL;
2522     int ret = 0;
2523     int ix;
2524     int nbslash = 0;
2525     int len;
2526     xmlURIPtr ref = NULL;
2527     xmlURIPtr bas = NULL;
2528     const xmlChar *bptr, *uptr, *rptr;
2529     xmlChar *vptr;
2530     int remove_path = 0;
2531     int refDrive, baseDrive;
2532 
2533     if (valPtr == NULL)
2534         return(1);
2535     *valPtr = NULL;
2536     if ((URI == NULL) || (*URI == 0))
2537 	return(1);
2538 
2539     ret = xmlParseUriOrPath((char *) URI, &ref, &refDrive);
2540     if (ret < 0)
2541         goto done;
2542     if (ret != 0) {
2543         /* Return URI if URI is invalid */
2544         ret = 0;
2545         val = xmlStrdup(URI);
2546         if (val == NULL)
2547             ret = -1;
2548         goto done;
2549     }
2550 
2551     /* Return URI if base is empty */
2552     if ((base == NULL) || (*base == 0))
2553         goto done;
2554 
2555     ret = xmlParseUriOrPath((char *) base, &bas, &baseDrive);
2556     if (ret < 0)
2557         goto done;
2558     if (ret != 0) {
2559         /* Return URI if base is invalid */
2560         ret = 0;
2561         goto done;
2562     }
2563 
2564     /*
2565      * If the scheme / server on the URI differs from the base,
2566      * just return the URI
2567      */
2568     if ((xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2569 	(xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)) ||
2570         (bas->port != ref->port) ||
2571         (baseDrive != refDrive)) {
2572 	goto done;
2573     }
2574     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2575 	val = xmlStrdup(BAD_CAST "");
2576         if (val == NULL)
2577             ret = -1;
2578 	goto done;
2579     }
2580     if (bas->path == NULL) {
2581 	val = xmlStrdup((xmlChar *)ref->path);
2582         if (val == NULL) {
2583             ret = -1;
2584             goto done;
2585         }
2586 	goto escape;
2587     }
2588     if (ref->path == NULL) {
2589         ref->path = (char *) "/";
2590 	remove_path = 1;
2591     }
2592 
2593     bptr = (xmlChar *) bas->path;
2594     rptr = (xmlChar *) ref->path;
2595 
2596     /*
2597      * Return URI if URI and base aren't both absolute or relative.
2598      */
2599     if ((bptr[0] == '/') != (rptr[0] == '/'))
2600         goto done;
2601 
2602     /*
2603      * At this point we can compare the two paths
2604      */
2605     {
2606         int pos = 0;
2607 
2608         /*
2609          * Next we compare the two strings and find where they first differ
2610          */
2611 	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2612 	    pos++;
2613 
2614 	if (bptr[pos] == rptr[pos]) {
2615 	    val = xmlStrdup(BAD_CAST "");
2616             if (val == NULL)
2617                 ret = -1;
2618 	    goto done;		/* (I can't imagine why anyone would do this) */
2619 	}
2620 
2621 	/*
2622 	 * In URI, "back up" to the last '/' encountered.  This will be the
2623 	 * beginning of the "unique" suffix of URI
2624 	 */
2625 	ix = pos;
2626 	for (; ix > 0; ix--) {
2627 	    if (rptr[ix - 1] == '/')
2628 		break;
2629 	}
2630 	uptr = (xmlChar *)&rptr[ix];
2631 
2632 	/*
2633 	 * In base, count the number of '/' from the differing point
2634 	 */
2635 	for (; bptr[ix] != 0; ix++) {
2636 	    if (bptr[ix] == '/')
2637 		nbslash++;
2638 	}
2639 
2640 	/*
2641 	 * e.g: URI="foo/" base="foo/bar" -> "./"
2642 	 */
2643 	if (nbslash == 0 && !uptr[0]) {
2644 	    val = xmlStrdup(BAD_CAST "./");
2645             if (val == NULL)
2646                 ret = -1;
2647 	    goto done;
2648 	}
2649 
2650 	len = xmlStrlen (uptr) + 1;
2651     }
2652 
2653     if (nbslash == 0) {
2654 	if (uptr != NULL) {
2655 	    /* exception characters from xmlSaveUri */
2656 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2657             if (val == NULL)
2658                 ret = -1;
2659         }
2660 	goto done;
2661     }
2662 
2663     /*
2664      * Allocate just enough space for the returned string -
2665      * length of the remainder of the URI, plus enough space
2666      * for the "../" groups, plus one for the terminator
2667      */
2668     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2669     if (val == NULL) {
2670         ret = -1;
2671 	goto done;
2672     }
2673     vptr = val;
2674     /*
2675      * Put in as many "../" as needed
2676      */
2677     for (; nbslash>0; nbslash--) {
2678 	*vptr++ = '.';
2679 	*vptr++ = '.';
2680 	*vptr++ = '/';
2681     }
2682     /*
2683      * Finish up with the end of the URI
2684      */
2685     if (uptr != NULL) {
2686         if ((vptr > val) && (len > 0) &&
2687 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2688 	    memcpy (vptr, uptr + 1, len - 1);
2689 	    vptr[len - 2] = 0;
2690 	} else {
2691 	    memcpy (vptr, uptr, len);
2692 	    vptr[len - 1] = 0;
2693 	}
2694     } else {
2695 	vptr[len - 1] = 0;
2696     }
2697 
2698 escape:
2699     /* escape the freshly-built path */
2700     vptr = val;
2701     /* exception characters from xmlSaveUri */
2702     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2703     if (val == NULL)
2704         ret = -1;
2705     else
2706         ret = 0;
2707     xmlFree(vptr);
2708 
2709 done:
2710     if ((ret == 0) && (val == NULL)) {
2711         val = xmlSaveUri(ref);
2712         if (val == NULL)
2713             ret = -1;
2714     }
2715 
2716     /*
2717      * Free the working variables
2718      */
2719     if (remove_path != 0)
2720         ref->path = NULL;
2721     if (ref != NULL)
2722 	xmlFreeURI (ref);
2723     if (bas != NULL)
2724 	xmlFreeURI (bas);
2725     if (ret != 0) {
2726         xmlFree(val);
2727         val = NULL;
2728     }
2729 
2730     *valPtr = val;
2731     return(ret);
2732 }
2733 
2734 /*
2735  * xmlBuildRelativeURI:
2736  * @URI:  the URI reference under consideration
2737  * @base:  the base value
2738  *
2739  * See xmlBuildRelativeURISafe.
2740  *
2741  * Returns a new URI string (to be freed by the caller) or NULL in case
2742  * error.
2743  */
2744 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2745 xmlBuildRelativeURI(const xmlChar * URI, const xmlChar * base)
2746 {
2747     xmlChar *val;
2748 
2749     xmlBuildRelativeURISafe(URI, base, &val);
2750     return(val);
2751 }
2752 
2753 /**
2754  * xmlCanonicPath:
2755  * @path:  the resource locator in a filesystem notation
2756  *
2757  * Prepares a path.
2758  *
2759  * If the path contains the substring "://", it is considered a
2760  * Legacy Extended IRI. Characters which aren't allowed in URIs are
2761  * escaped.
2762  *
2763  * Otherwise, the path is considered a filesystem path which is
2764  * copied without modification.
2765  *
2766  * The caller is responsible for freeing the memory occupied
2767  * by the returned string. If there is insufficient memory available, or the
2768  * argument is NULL, the function returns NULL.
2769  *
2770  * Returns the escaped path.
2771  */
2772 xmlChar *
xmlCanonicPath(const xmlChar * path)2773 xmlCanonicPath(const xmlChar *path)
2774 {
2775     xmlChar *ret;
2776 
2777     if (path == NULL)
2778 	return(NULL);
2779 
2780     /* Check if this is an "absolute uri" */
2781     if (xmlStrstr(path, BAD_CAST "://") != NULL) {
2782 	/*
2783          * Escape all characters except reserved, unreserved and the
2784          * percent sign.
2785          *
2786          * xmlURIEscapeStr already keeps unreserved characters, so we
2787          * pass gen-delims, sub-delims and "%" to ignore.
2788          */
2789         ret = xmlURIEscapeStr(path, BAD_CAST ":/?#[]@!$&()*+,;='%");
2790     } else {
2791         ret = xmlStrdup((const xmlChar *) path);
2792     }
2793 
2794     return(ret);
2795 }
2796 
2797 /**
2798  * xmlPathToURI:
2799  * @path:  the resource locator in a filesystem notation
2800  *
2801  * Constructs an URI expressing the existing path
2802  *
2803  * Returns a new URI, or a duplicate of the path parameter if the
2804  * construction fails. The caller is responsible for freeing the memory
2805  * occupied by the returned string. If there is insufficient memory available,
2806  * or the argument is NULL, the function returns NULL.
2807  */
2808 xmlChar *
xmlPathToURI(const xmlChar * path)2809 xmlPathToURI(const xmlChar *path)
2810 {
2811     return(xmlCanonicPath(path));
2812 }
2813