xref: /aosp_15_r20/external/cronet/third_party/libxml/src/uri.c (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 /**
2  * uri.c: set of generic URI related routines
3  *
4  * Reference: RFCs 3986, 2732 and 2373
5  *
6  * See Copyright for the status of this software.
7  *
8  * [email protected]
9  */
10 
11 #define IN_LIBXML
12 #include "libxml.h"
13 
14 #include <limits.h>
15 #include <string.h>
16 
17 #include <libxml/xmlmemory.h>
18 #include <libxml/uri.h>
19 #include <libxml/xmlerror.h>
20 
21 #include "private/error.h"
22 
23 /**
24  * MAX_URI_LENGTH:
25  *
26  * The definition of the URI regexp in the above RFC has no size limit
27  * In practice they are usually relatively short except for the
28  * data URI scheme as defined in RFC 2397. Even for data URI the usual
29  * maximum size before hitting random practical limits is around 64 KB
30  * and 4KB is usually a maximum admitted limit for proper operations.
31  * The value below is more a security limit than anything else and
32  * really should never be hit by 'normal' operations
33  * Set to 1 MByte in 2012, this is only enforced on output
34  */
35 #define MAX_URI_LENGTH 1024 * 1024
36 
37 #define PORT_EMPTY           0
38 #define PORT_EMPTY_SERVER   -1
39 
40 static void xmlCleanURI(xmlURIPtr uri);
41 
42 /*
43  * Old rule from 2396 used in legacy handling code
44  * alpha    = lowalpha | upalpha
45  */
46 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
47 
48 
49 /*
50  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
51  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
52  *            "u" | "v" | "w" | "x" | "y" | "z"
53  */
54 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
55 
56 /*
57  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
58  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
59  *           "U" | "V" | "W" | "X" | "Y" | "Z"
60  */
61 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
62 
63 #ifdef IS_DIGIT
64 #undef IS_DIGIT
65 #endif
66 /*
67  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
68  */
69 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
70 
71 /*
72  * alphanum = alpha | digit
73  */
74 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
75 
76 /*
77  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
78  */
79 
80 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
81     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
82     ((x) == '(') || ((x) == ')'))
83 
84 /*
85  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
86  */
87 #define IS_UNWISE(p)                                                    \
88       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
89        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
90        ((*(p) == ']')) || ((*(p) == '`')))
91 
92 /*
93  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
94  *            "[" | "]"
95  */
96 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
97         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
98         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
99         ((x) == ']'))
100 
101 /*
102  * unreserved = alphanum | mark
103  */
104 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
105 
106 /*
107  * Skip to next pointer char, handle escaped sequences
108  */
109 #define NEXT(p) ((*p == '%')? p += 3 : p++)
110 
111 /*
112  * Productions from the spec.
113  *
114  *    authority     = server | reg_name
115  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
116  *                        ";" | ":" | "@" | "&" | "=" | "+" )
117  *
118  * path          = [ abs_path | opaque_part ]
119  */
120 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
121 
122 /************************************************************************
123  *									*
124  *                         RFC 3986 parser				*
125  *									*
126  ************************************************************************/
127 
128 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
129 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
130                       ((*(p) >= 'A') && (*(p) <= 'Z')))
131 #define ISA_HEXDIG(p)							\
132        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
133         ((*(p) >= 'A') && (*(p) <= 'F')))
134 
135 /*
136  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
137  *                     / "*" / "+" / "," / ";" / "="
138  */
139 #define ISA_SUB_DELIM(p)						\
140       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
141        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
142        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
143        ((*(p) == '=')) || ((*(p) == '\'')))
144 
145 /*
146  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
147  */
148 #define ISA_GEN_DELIM(p)						\
149       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
150        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
151        ((*(p) == '@')))
152 
153 /*
154  *    reserved      = gen-delims / sub-delims
155  */
156 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
157 
158 /*
159  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
160  */
161 #define ISA_STRICTLY_UNRESERVED(p)					\
162       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
163        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
164 
165 /*
166  *    pct-encoded   = "%" HEXDIG HEXDIG
167  */
168 #define ISA_PCT_ENCODED(p)						\
169      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
170 
171 /*
172  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
173  */
174 #define ISA_PCHAR(u, p)							\
175      (ISA_UNRESERVED(u, p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
176       ((*(p) == ':')) || ((*(p) == '@')))
177 
178 /*
179  * From https://www.w3.org/TR/leiri/
180  *
181  * " " / "<" / ">" / '"' / "{" / "}" / "|"
182  * / "\" / "^" / "`" / %x0-1F / %x7F-D7FF
183  * / %xE000-FFFD / %x10000-10FFFF
184  */
185 #define ISA_UCSCHAR(p) \
186     ((*(p) <= 0x20) || (*(p) >= 0x7F) || (*(p) == '<') || (*(p) == '>') || \
187      (*(p) == '"')  || (*(p) == '{')  || (*(p) == '}') || (*(p) == '|') || \
188      (*(p) == '\\') || (*(p) == '^')  || (*(p) == '`'))
189 
190 #define ISA_UNRESERVED(u, p) (xmlIsUnreserved(u, p))
191 
192 #define XML_URI_ALLOW_UNWISE    1
193 #define XML_URI_NO_UNESCAPE     2
194 #define XML_URI_ALLOW_UCSCHAR   4
195 
196 static int
xmlIsUnreserved(xmlURIPtr uri,const char * cur)197 xmlIsUnreserved(xmlURIPtr uri, const char *cur) {
198     if (uri == NULL)
199         return(0);
200 
201     if (ISA_STRICTLY_UNRESERVED(cur))
202         return(1);
203 
204     if (uri->cleanup & XML_URI_ALLOW_UNWISE) {
205         if (IS_UNWISE(cur))
206             return(1);
207     } else if (uri->cleanup & XML_URI_ALLOW_UCSCHAR) {
208         if (ISA_UCSCHAR(cur))
209             return(1);
210     }
211 
212     return(0);
213 }
214 
215 /**
216  * xmlParse3986Scheme:
217  * @uri:  pointer to an URI structure
218  * @str:  pointer to the string to analyze
219  *
220  * Parse an URI scheme
221  *
222  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
223  *
224  * Returns 0 or the error code
225  */
226 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)227 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
228     const char *cur;
229 
230     cur = *str;
231     if (!ISA_ALPHA(cur))
232 	return(1);
233     cur++;
234     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
235            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
236     if (uri != NULL) {
237 	if (uri->scheme != NULL) xmlFree(uri->scheme);
238 	uri->scheme = STRNDUP(*str, cur - *str);
239         if (uri->scheme == NULL)
240             return(-1);
241     }
242     *str = cur;
243     return(0);
244 }
245 
246 /**
247  * xmlParse3986Fragment:
248  * @uri:  pointer to an URI structure
249  * @str:  pointer to the string to analyze
250  *
251  * Parse the query part of an URI
252  *
253  * fragment      = *( pchar / "/" / "?" )
254  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
255  *       in the fragment identifier but this is used very broadly for
256  *       xpointer scheme selection, so we are allowing it here to not break
257  *       for example all the DocBook processing chains.
258  *
259  * Returns 0 or the error code
260  */
261 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)262 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
263 {
264     const char *cur;
265 
266     cur = *str;
267 
268     while ((ISA_PCHAR(uri, cur)) || (*cur == '/') || (*cur == '?') ||
269            (*cur == '[') || (*cur == ']'))
270         NEXT(cur);
271     if (uri != NULL) {
272         if (uri->fragment != NULL)
273             xmlFree(uri->fragment);
274 	if (uri->cleanup & XML_URI_NO_UNESCAPE)
275 	    uri->fragment = STRNDUP(*str, cur - *str);
276 	else
277 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
278         if (uri->fragment == NULL)
279             return (-1);
280     }
281     *str = cur;
282     return (0);
283 }
284 
285 /**
286  * xmlParse3986Query:
287  * @uri:  pointer to an URI structure
288  * @str:  pointer to the string to analyze
289  *
290  * Parse the query part of an URI
291  *
292  * query = *uric
293  *
294  * Returns 0 or the error code
295  */
296 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)297 xmlParse3986Query(xmlURIPtr uri, const char **str)
298 {
299     const char *cur;
300 
301     cur = *str;
302 
303     while ((ISA_PCHAR(uri, cur)) || (*cur == '/') || (*cur == '?'))
304         NEXT(cur);
305     if (uri != NULL) {
306         if (uri->query != NULL)
307             xmlFree(uri->query);
308 	if (uri->cleanup & XML_URI_NO_UNESCAPE)
309 	    uri->query = STRNDUP(*str, cur - *str);
310 	else
311 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
312         if (uri->query == NULL)
313             return (-1);
314 
315 	/* Save the raw bytes of the query as well.
316 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
317 	 */
318 	if (uri->query_raw != NULL)
319 	    xmlFree (uri->query_raw);
320 	uri->query_raw = STRNDUP (*str, cur - *str);
321         if (uri->query_raw == NULL)
322             return (-1);
323     }
324     *str = cur;
325     return (0);
326 }
327 
328 /**
329  * xmlParse3986Port:
330  * @uri:  pointer to an URI structure
331  * @str:  the string to analyze
332  *
333  * Parse a port part and fills in the appropriate fields
334  * of the @uri structure
335  *
336  * port          = *DIGIT
337  *
338  * Returns 0 or the error code
339  */
340 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)341 xmlParse3986Port(xmlURIPtr uri, const char **str)
342 {
343     const char *cur = *str;
344     int port = 0;
345 
346     if (ISA_DIGIT(cur)) {
347 	while (ISA_DIGIT(cur)) {
348             int digit = *cur - '0';
349 
350             if (port > INT_MAX / 10)
351                 return(1);
352             port *= 10;
353             if (port > INT_MAX - digit)
354                 return(1);
355 	    port += digit;
356 
357 	    cur++;
358 	}
359 	if (uri != NULL)
360 	    uri->port = port;
361 	*str = cur;
362 	return(0);
363     }
364     return(1);
365 }
366 
367 /**
368  * xmlParse3986Userinfo:
369  * @uri:  pointer to an URI structure
370  * @str:  the string to analyze
371  *
372  * Parse an user information part and fills in the appropriate fields
373  * of the @uri structure
374  *
375  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
376  *
377  * Returns 0 or the error code
378  */
379 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)380 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
381 {
382     const char *cur;
383 
384     cur = *str;
385     while (ISA_UNRESERVED(uri, cur) || ISA_PCT_ENCODED(cur) ||
386            ISA_SUB_DELIM(cur) || (*cur == ':'))
387 	NEXT(cur);
388     if (*cur == '@') {
389 	if (uri != NULL) {
390 	    if (uri->user != NULL) xmlFree(uri->user);
391 	    if (uri->cleanup & XML_URI_NO_UNESCAPE)
392 		uri->user = STRNDUP(*str, cur - *str);
393 	    else
394 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
395             if (uri->user == NULL)
396                 return(-1);
397 	}
398 	*str = cur;
399 	return(0);
400     }
401     return(1);
402 }
403 
404 /**
405  * xmlParse3986DecOctet:
406  * @str:  the string to analyze
407  *
408  *    dec-octet     = DIGIT                 ; 0-9
409  *                  / %x31-39 DIGIT         ; 10-99
410  *                  / "1" 2DIGIT            ; 100-199
411  *                  / "2" %x30-34 DIGIT     ; 200-249
412  *                  / "25" %x30-35          ; 250-255
413  *
414  * Skip a dec-octet.
415  *
416  * Returns 0 if found and skipped, 1 otherwise
417  */
418 static int
xmlParse3986DecOctet(const char ** str)419 xmlParse3986DecOctet(const char **str) {
420     const char *cur = *str;
421 
422     if (!(ISA_DIGIT(cur)))
423         return(1);
424     if (!ISA_DIGIT(cur+1))
425 	cur++;
426     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
427 	cur += 2;
428     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
429 	cur += 3;
430     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
431 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
432 	cur += 3;
433     else if ((*cur == '2') && (*(cur + 1) == '5') &&
434 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
435 	cur += 3;
436     else
437         return(1);
438     *str = cur;
439     return(0);
440 }
441 /**
442  * xmlParse3986Host:
443  * @uri:  pointer to an URI structure
444  * @str:  the string to analyze
445  *
446  * Parse an host part and fills in the appropriate fields
447  * of the @uri structure
448  *
449  * host          = IP-literal / IPv4address / reg-name
450  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
451  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
452  * reg-name      = *( unreserved / pct-encoded / sub-delims )
453  *
454  * Returns 0 or the error code
455  */
456 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)457 xmlParse3986Host(xmlURIPtr uri, const char **str)
458 {
459     const char *cur = *str;
460     const char *host;
461 
462     host = cur;
463     /*
464      * IPv6 and future addressing scheme are enclosed between brackets
465      */
466     if (*cur == '[') {
467         cur++;
468 	while ((*cur != ']') && (*cur != 0))
469 	    cur++;
470 	if (*cur != ']')
471 	    return(1);
472 	cur++;
473 	goto found;
474     }
475     /*
476      * try to parse an IPv4
477      */
478     if (ISA_DIGIT(cur)) {
479         if (xmlParse3986DecOctet(&cur) != 0)
480 	    goto not_ipv4;
481 	if (*cur != '.')
482 	    goto not_ipv4;
483 	cur++;
484         if (xmlParse3986DecOctet(&cur) != 0)
485 	    goto not_ipv4;
486 	if (*cur != '.')
487 	    goto not_ipv4;
488         if (xmlParse3986DecOctet(&cur) != 0)
489 	    goto not_ipv4;
490 	if (*cur != '.')
491 	    goto not_ipv4;
492         if (xmlParse3986DecOctet(&cur) != 0)
493 	    goto not_ipv4;
494 	goto found;
495 not_ipv4:
496         cur = *str;
497     }
498     /*
499      * then this should be a hostname which can be empty
500      */
501     while (ISA_UNRESERVED(uri, cur) ||
502            ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
503         NEXT(cur);
504 found:
505     if (uri != NULL) {
506 	if (uri->authority != NULL) xmlFree(uri->authority);
507 	uri->authority = NULL;
508 	if (uri->server != NULL) xmlFree(uri->server);
509 	if (cur != host) {
510 	    if (uri->cleanup & XML_URI_NO_UNESCAPE)
511 		uri->server = STRNDUP(host, cur - host);
512 	    else
513 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
514             if (uri->server == NULL)
515                 return(-1);
516 	} else
517 	    uri->server = NULL;
518     }
519     *str = cur;
520     return(0);
521 }
522 
523 /**
524  * xmlParse3986Authority:
525  * @uri:  pointer to an URI structure
526  * @str:  the string to analyze
527  *
528  * Parse an authority part and fills in the appropriate fields
529  * of the @uri structure
530  *
531  * authority     = [ userinfo "@" ] host [ ":" port ]
532  *
533  * Returns 0 or the error code
534  */
535 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)536 xmlParse3986Authority(xmlURIPtr uri, const char **str)
537 {
538     const char *cur;
539     int ret;
540 
541     cur = *str;
542     /*
543      * try to parse an userinfo and check for the trailing @
544      */
545     ret = xmlParse3986Userinfo(uri, &cur);
546     if (ret < 0)
547         return(ret);
548     if ((ret != 0) || (*cur != '@'))
549         cur = *str;
550     else
551         cur++;
552     ret = xmlParse3986Host(uri, &cur);
553     if (ret != 0) return(ret);
554     if (*cur == ':') {
555         cur++;
556         ret = xmlParse3986Port(uri, &cur);
557 	if (ret != 0) return(ret);
558     }
559     *str = cur;
560     return(0);
561 }
562 
563 /**
564  * xmlParse3986Segment:
565  * @str:  the string to analyze
566  * @forbid: an optional forbidden character
567  * @empty: allow an empty segment
568  *
569  * Parse a segment and fills in the appropriate fields
570  * of the @uri structure
571  *
572  * segment       = *pchar
573  * segment-nz    = 1*pchar
574  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
575  *               ; non-zero-length segment without any colon ":"
576  *
577  * Returns 0 or the error code
578  */
579 static int
xmlParse3986Segment(xmlURIPtr uri,const char ** str,char forbid,int empty)580 xmlParse3986Segment(xmlURIPtr uri, const char **str, char forbid, int empty)
581 {
582     const char *cur;
583 
584     cur = *str;
585     if (!ISA_PCHAR(uri, cur)) {
586         if (empty)
587 	    return(0);
588 	return(1);
589     }
590     while (ISA_PCHAR(uri, cur) && (*cur != forbid))
591         NEXT(cur);
592     *str = cur;
593     return (0);
594 }
595 
596 /**
597  * xmlParse3986PathAbEmpty:
598  * @uri:  pointer to an URI structure
599  * @str:  the string to analyze
600  *
601  * Parse an path absolute or empty and fills in the appropriate fields
602  * of the @uri structure
603  *
604  * path-abempty  = *( "/" segment )
605  *
606  * Returns 0 or the error code
607  */
608 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)609 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
610 {
611     const char *cur;
612     int ret;
613 
614     cur = *str;
615 
616     while (*cur == '/') {
617         cur++;
618 	ret = xmlParse3986Segment(uri, &cur, 0, 1);
619 	if (ret != 0) return(ret);
620     }
621     if (uri != NULL) {
622 	if (uri->path != NULL) xmlFree(uri->path);
623         if (*str != cur) {
624             if (uri->cleanup & XML_URI_NO_UNESCAPE)
625                 uri->path = STRNDUP(*str, cur - *str);
626             else
627                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
628             if (uri->path == NULL)
629                 return (-1);
630         } else {
631             uri->path = NULL;
632         }
633     }
634     *str = cur;
635     return (0);
636 }
637 
638 /**
639  * xmlParse3986PathAbsolute:
640  * @uri:  pointer to an URI structure
641  * @str:  the string to analyze
642  *
643  * Parse an path absolute and fills in the appropriate fields
644  * of the @uri structure
645  *
646  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
647  *
648  * Returns 0 or the error code
649  */
650 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)651 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
652 {
653     const char *cur;
654     int ret;
655 
656     cur = *str;
657 
658     if (*cur != '/')
659         return(1);
660     cur++;
661     ret = xmlParse3986Segment(uri, &cur, 0, 0);
662     if (ret == 0) {
663 	while (*cur == '/') {
664 	    cur++;
665 	    ret = xmlParse3986Segment(uri, &cur, 0, 1);
666 	    if (ret != 0) return(ret);
667 	}
668     }
669     if (uri != NULL) {
670 	if (uri->path != NULL) xmlFree(uri->path);
671         if (cur != *str) {
672             if (uri->cleanup & XML_URI_NO_UNESCAPE)
673                 uri->path = STRNDUP(*str, cur - *str);
674             else
675                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
676             if (uri->path == NULL)
677                 return (-1);
678         } else {
679             uri->path = NULL;
680         }
681     }
682     *str = cur;
683     return (0);
684 }
685 
686 /**
687  * xmlParse3986PathRootless:
688  * @uri:  pointer to an URI structure
689  * @str:  the string to analyze
690  *
691  * Parse an path without root and fills in the appropriate fields
692  * of the @uri structure
693  *
694  * path-rootless = segment-nz *( "/" segment )
695  *
696  * Returns 0 or the error code
697  */
698 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)699 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
700 {
701     const char *cur;
702     int ret;
703 
704     cur = *str;
705 
706     ret = xmlParse3986Segment(uri, &cur, 0, 0);
707     if (ret != 0) return(ret);
708     while (*cur == '/') {
709         cur++;
710 	ret = xmlParse3986Segment(uri, &cur, 0, 1);
711 	if (ret != 0) return(ret);
712     }
713     if (uri != NULL) {
714 	if (uri->path != NULL) xmlFree(uri->path);
715         if (cur != *str) {
716             if (uri->cleanup & XML_URI_NO_UNESCAPE)
717                 uri->path = STRNDUP(*str, cur - *str);
718             else
719                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
720             if (uri->path == NULL)
721                 return (-1);
722         } else {
723             uri->path = NULL;
724         }
725     }
726     *str = cur;
727     return (0);
728 }
729 
730 /**
731  * xmlParse3986PathNoScheme:
732  * @uri:  pointer to an URI structure
733  * @str:  the string to analyze
734  *
735  * Parse an path which is not a scheme and fills in the appropriate fields
736  * of the @uri structure
737  *
738  * path-noscheme = segment-nz-nc *( "/" segment )
739  *
740  * Returns 0 or the error code
741  */
742 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)743 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
744 {
745     const char *cur;
746     int ret;
747 
748     cur = *str;
749 
750     ret = xmlParse3986Segment(uri, &cur, ':', 0);
751     if (ret != 0) return(ret);
752     while (*cur == '/') {
753         cur++;
754 	ret = xmlParse3986Segment(uri, &cur, 0, 1);
755 	if (ret != 0) return(ret);
756     }
757     if (uri != NULL) {
758 	if (uri->path != NULL) xmlFree(uri->path);
759         if (cur != *str) {
760             if (uri->cleanup & XML_URI_NO_UNESCAPE)
761                 uri->path = STRNDUP(*str, cur - *str);
762             else
763                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
764             if (uri->path == NULL)
765                 return (-1);
766         } else {
767             uri->path = NULL;
768         }
769     }
770     *str = cur;
771     return (0);
772 }
773 
774 /**
775  * xmlParse3986HierPart:
776  * @uri:  pointer to an URI structure
777  * @str:  the string to analyze
778  *
779  * Parse an hierarchical part and fills in the appropriate fields
780  * of the @uri structure
781  *
782  * hier-part     = "//" authority path-abempty
783  *                / path-absolute
784  *                / path-rootless
785  *                / path-empty
786  *
787  * Returns 0 or the error code
788  */
789 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)790 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
791 {
792     const char *cur;
793     int ret;
794 
795     cur = *str;
796 
797     if ((*cur == '/') && (*(cur + 1) == '/')) {
798         cur += 2;
799 	ret = xmlParse3986Authority(uri, &cur);
800 	if (ret != 0) return(ret);
801         /*
802          * An empty server is marked with a special URI value.
803          */
804 	if ((uri->server == NULL) && (uri->port == PORT_EMPTY))
805 	    uri->port = PORT_EMPTY_SERVER;
806 	ret = xmlParse3986PathAbEmpty(uri, &cur);
807 	if (ret != 0) return(ret);
808 	*str = cur;
809 	return(0);
810     } else if (*cur == '/') {
811         ret = xmlParse3986PathAbsolute(uri, &cur);
812 	if (ret != 0) return(ret);
813     } else if (ISA_PCHAR(uri, cur)) {
814         ret = xmlParse3986PathRootless(uri, &cur);
815 	if (ret != 0) return(ret);
816     } else {
817 	/* path-empty is effectively empty */
818 	if (uri != NULL) {
819 	    if (uri->path != NULL) xmlFree(uri->path);
820 	    uri->path = NULL;
821 	}
822     }
823     *str = cur;
824     return (0);
825 }
826 
827 /**
828  * xmlParse3986RelativeRef:
829  * @uri:  pointer to an URI structure
830  * @str:  the string to analyze
831  *
832  * Parse an URI string and fills in the appropriate fields
833  * of the @uri structure
834  *
835  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
836  * relative-part = "//" authority path-abempty
837  *               / path-absolute
838  *               / path-noscheme
839  *               / path-empty
840  *
841  * Returns 0 or the error code
842  */
843 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)844 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
845     int ret;
846 
847     if ((*str == '/') && (*(str + 1) == '/')) {
848         str += 2;
849 	ret = xmlParse3986Authority(uri, &str);
850 	if (ret != 0) return(ret);
851 	ret = xmlParse3986PathAbEmpty(uri, &str);
852 	if (ret != 0) return(ret);
853     } else if (*str == '/') {
854 	ret = xmlParse3986PathAbsolute(uri, &str);
855 	if (ret != 0) return(ret);
856     } else if (ISA_PCHAR(uri, str)) {
857         ret = xmlParse3986PathNoScheme(uri, &str);
858 	if (ret != 0) return(ret);
859     } else {
860 	/* path-empty is effectively empty */
861 	if (uri != NULL) {
862 	    if (uri->path != NULL) xmlFree(uri->path);
863 	    uri->path = NULL;
864 	}
865     }
866 
867     if (*str == '?') {
868 	str++;
869 	ret = xmlParse3986Query(uri, &str);
870 	if (ret != 0) return(ret);
871     }
872     if (*str == '#') {
873 	str++;
874 	ret = xmlParse3986Fragment(uri, &str);
875 	if (ret != 0) return(ret);
876     }
877     if (*str != 0) {
878 	xmlCleanURI(uri);
879 	return(1);
880     }
881     return(0);
882 }
883 
884 
885 /**
886  * xmlParse3986URI:
887  * @uri:  pointer to an URI structure
888  * @str:  the string to analyze
889  *
890  * Parse an URI string and fills in the appropriate fields
891  * of the @uri structure
892  *
893  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
894  *
895  * Returns 0 or the error code
896  */
897 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)898 xmlParse3986URI(xmlURIPtr uri, const char *str) {
899     int ret;
900 
901     ret = xmlParse3986Scheme(uri, &str);
902     if (ret != 0) return(ret);
903     if (*str != ':') {
904 	return(1);
905     }
906     str++;
907     ret = xmlParse3986HierPart(uri, &str);
908     if (ret != 0) return(ret);
909     if (*str == '?') {
910 	str++;
911 	ret = xmlParse3986Query(uri, &str);
912 	if (ret != 0) return(ret);
913     }
914     if (*str == '#') {
915 	str++;
916 	ret = xmlParse3986Fragment(uri, &str);
917 	if (ret != 0) return(ret);
918     }
919     if (*str != 0) {
920 	xmlCleanURI(uri);
921 	return(1);
922     }
923     return(0);
924 }
925 
926 /**
927  * xmlParse3986URIReference:
928  * @uri:  pointer to an URI structure
929  * @str:  the string to analyze
930  *
931  * Parse an URI reference string and fills in the appropriate fields
932  * of the @uri structure
933  *
934  * URI-reference = URI / relative-ref
935  *
936  * Returns 0 or the error code
937  */
938 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)939 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
940     int ret;
941 
942     if (str == NULL)
943 	return(-1);
944     xmlCleanURI(uri);
945 
946     /*
947      * Try first to parse absolute refs, then fallback to relative if
948      * it fails.
949      */
950     ret = xmlParse3986URI(uri, str);
951     if (ret < 0)
952         return(ret);
953     if (ret != 0) {
954 	xmlCleanURI(uri);
955         ret = xmlParse3986RelativeRef(uri, str);
956 	if (ret != 0) {
957 	    xmlCleanURI(uri);
958 	    return(ret);
959 	}
960     }
961     return(0);
962 }
963 
964 /**
965  * xmlParseURISafe:
966  * @str:  the URI string to analyze
967  * @uriOut:  optional pointer to parsed URI
968  *
969  * Parse an URI based on RFC 3986
970  *
971  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
972  *
973  * Returns 0 on success, an error code (typically 1) if the URI is invalid
974  * or -1 if a memory allocation failed.
975  */
976 int
xmlParseURISafe(const char * str,xmlURIPtr * uriOut)977 xmlParseURISafe(const char *str, xmlURIPtr *uriOut) {
978     xmlURIPtr uri;
979     int ret;
980 
981     if (uriOut != NULL)
982         *uriOut = NULL;
983     if (str == NULL)
984 	return(1);
985 
986     uri = xmlCreateURI();
987     if (uri == NULL)
988         return(-1);
989 
990     ret = xmlParse3986URIReference(uri, str);
991     if (ret) {
992         xmlFreeURI(uri);
993         return(ret);
994     }
995 
996     if (uriOut != NULL)
997         *uriOut = uri;
998     return(0);
999 }
1000 
1001 /**
1002  * xmlParseURI:
1003  * @str:  the URI string to analyze
1004  *
1005  * Parse an URI based on RFC 3986
1006  *
1007  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1008  *
1009  * Returns a newly built xmlURIPtr or NULL in case of error
1010  */
1011 xmlURIPtr
xmlParseURI(const char * str)1012 xmlParseURI(const char *str) {
1013     xmlURIPtr uri;
1014     xmlParseURISafe(str, &uri);
1015     return(uri);
1016 }
1017 
1018 /**
1019  * xmlParseURIReference:
1020  * @uri:  pointer to an URI structure
1021  * @str:  the string to analyze
1022  *
1023  * Parse an URI reference string based on RFC 3986 and fills in the
1024  * appropriate fields of the @uri structure
1025  *
1026  * URI-reference = URI / relative-ref
1027  *
1028  * Returns 0 or the error code
1029  */
1030 int
xmlParseURIReference(xmlURIPtr uri,const char * str)1031 xmlParseURIReference(xmlURIPtr uri, const char *str) {
1032     return(xmlParse3986URIReference(uri, str));
1033 }
1034 
1035 /**
1036  * xmlParseURIRaw:
1037  * @str:  the URI string to analyze
1038  * @raw:  if 1 unescaping of URI pieces are disabled
1039  *
1040  * Parse an URI but allows to keep intact the original fragments.
1041  *
1042  * URI-reference = URI / relative-ref
1043  *
1044  * Returns a newly built xmlURIPtr or NULL in case of error
1045  */
1046 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)1047 xmlParseURIRaw(const char *str, int raw) {
1048     xmlURIPtr uri;
1049     int ret;
1050 
1051     if (str == NULL)
1052 	return(NULL);
1053     uri = xmlCreateURI();
1054     if (uri != NULL) {
1055         if (raw) {
1056 	    uri->cleanup |= XML_URI_NO_UNESCAPE;
1057 	}
1058 	ret = xmlParseURIReference(uri, str);
1059         if (ret) {
1060 	    xmlFreeURI(uri);
1061 	    return(NULL);
1062 	}
1063     }
1064     return(uri);
1065 }
1066 
1067 /************************************************************************
1068  *									*
1069  *			Generic URI structure functions			*
1070  *									*
1071  ************************************************************************/
1072 
1073 /**
1074  * xmlCreateURI:
1075  *
1076  * Simply creates an empty xmlURI
1077  *
1078  * Returns the new structure or NULL in case of error
1079  */
1080 xmlURIPtr
xmlCreateURI(void)1081 xmlCreateURI(void) {
1082     xmlURIPtr ret;
1083 
1084     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1085     if (ret == NULL)
1086 	return(NULL);
1087     memset(ret, 0, sizeof(xmlURI));
1088     ret->port = PORT_EMPTY;
1089     return(ret);
1090 }
1091 
1092 /**
1093  * xmlSaveUriRealloc:
1094  *
1095  * Function to handle properly a reallocation when saving an URI
1096  * Also imposes some limit on the length of an URI string output
1097  */
1098 static xmlChar *
xmlSaveUriRealloc(xmlChar * ret,int * max)1099 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1100     xmlChar *temp;
1101     int tmp;
1102 
1103     if (*max > MAX_URI_LENGTH)
1104         return(NULL);
1105     tmp = *max * 2;
1106     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1107     if (temp == NULL)
1108         return(NULL);
1109     *max = tmp;
1110     return(temp);
1111 }
1112 
1113 /**
1114  * xmlSaveUri:
1115  * @uri:  pointer to an xmlURI
1116  *
1117  * Save the URI as an escaped string
1118  *
1119  * Returns a new string (to be deallocated by caller)
1120  */
1121 xmlChar *
xmlSaveUri(xmlURIPtr uri)1122 xmlSaveUri(xmlURIPtr uri) {
1123     xmlChar *ret = NULL;
1124     xmlChar *temp;
1125     const char *p;
1126     int len;
1127     int max;
1128 
1129     if (uri == NULL) return(NULL);
1130 
1131 
1132     max = 80;
1133     ret = (xmlChar *) xmlMallocAtomic(max + 1);
1134     if (ret == NULL)
1135 	return(NULL);
1136     len = 0;
1137 
1138     if (uri->scheme != NULL) {
1139 	p = uri->scheme;
1140 	while (*p != 0) {
1141 	    if (len >= max) {
1142                 temp = xmlSaveUriRealloc(ret, &max);
1143                 if (temp == NULL) goto mem_error;
1144 		ret = temp;
1145 	    }
1146 	    ret[len++] = *p++;
1147 	}
1148 	if (len >= max) {
1149             temp = xmlSaveUriRealloc(ret, &max);
1150             if (temp == NULL) goto mem_error;
1151             ret = temp;
1152 	}
1153 	ret[len++] = ':';
1154     }
1155     if (uri->opaque != NULL) {
1156 	p = uri->opaque;
1157 	while (*p != 0) {
1158 	    if (len + 3 >= max) {
1159                 temp = xmlSaveUriRealloc(ret, &max);
1160                 if (temp == NULL) goto mem_error;
1161                 ret = temp;
1162 	    }
1163 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1164 		ret[len++] = *p++;
1165 	    else {
1166 		int val = *(unsigned char *)p++;
1167 		int hi = val / 0x10, lo = val % 0x10;
1168 		ret[len++] = '%';
1169 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1170 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1171 	    }
1172 	}
1173     } else {
1174 	if ((uri->server != NULL) || (uri->port != PORT_EMPTY)) {
1175 	    if (len + 3 >= max) {
1176                 temp = xmlSaveUriRealloc(ret, &max);
1177                 if (temp == NULL) goto mem_error;
1178                 ret = temp;
1179 	    }
1180 	    ret[len++] = '/';
1181 	    ret[len++] = '/';
1182 	    if (uri->user != NULL) {
1183 		p = uri->user;
1184 		while (*p != 0) {
1185 		    if (len + 3 >= max) {
1186                         temp = xmlSaveUriRealloc(ret, &max);
1187                         if (temp == NULL) goto mem_error;
1188                         ret = temp;
1189 		    }
1190 		    if ((IS_UNRESERVED(*(p))) ||
1191 			((*(p) == ';')) || ((*(p) == ':')) ||
1192 			((*(p) == '&')) || ((*(p) == '=')) ||
1193 			((*(p) == '+')) || ((*(p) == '$')) ||
1194 			((*(p) == ',')))
1195 			ret[len++] = *p++;
1196 		    else {
1197 			int val = *(unsigned char *)p++;
1198 			int hi = val / 0x10, lo = val % 0x10;
1199 			ret[len++] = '%';
1200 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1201 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1202 		    }
1203 		}
1204 		if (len + 3 >= max) {
1205                     temp = xmlSaveUriRealloc(ret, &max);
1206                     if (temp == NULL) goto mem_error;
1207                     ret = temp;
1208 		}
1209 		ret[len++] = '@';
1210 	    }
1211 	    if (uri->server != NULL) {
1212 		p = uri->server;
1213 		while (*p != 0) {
1214 		    if (len >= max) {
1215 			temp = xmlSaveUriRealloc(ret, &max);
1216 			if (temp == NULL) goto mem_error;
1217 			ret = temp;
1218 		    }
1219                     /* TODO: escaping? */
1220 		    ret[len++] = (xmlChar) *p++;
1221 		}
1222 	    }
1223             if (uri->port > 0) {
1224                 if (len + 10 >= max) {
1225                     temp = xmlSaveUriRealloc(ret, &max);
1226                     if (temp == NULL) goto mem_error;
1227                     ret = temp;
1228                 }
1229                 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1230             }
1231 	} else if (uri->authority != NULL) {
1232 	    if (len + 3 >= max) {
1233                 temp = xmlSaveUriRealloc(ret, &max);
1234                 if (temp == NULL) goto mem_error;
1235                 ret = temp;
1236 	    }
1237 	    ret[len++] = '/';
1238 	    ret[len++] = '/';
1239 	    p = uri->authority;
1240 	    while (*p != 0) {
1241 		if (len + 3 >= max) {
1242                     temp = xmlSaveUriRealloc(ret, &max);
1243                     if (temp == NULL) goto mem_error;
1244                     ret = temp;
1245 		}
1246 		if ((IS_UNRESERVED(*(p))) ||
1247                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1248                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1249                     ((*(p) == '=')) || ((*(p) == '+')))
1250 		    ret[len++] = *p++;
1251 		else {
1252 		    int val = *(unsigned char *)p++;
1253 		    int hi = val / 0x10, lo = val % 0x10;
1254 		    ret[len++] = '%';
1255 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1256 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1257 		}
1258 	    }
1259 	} else if (uri->scheme != NULL) {
1260 	    if (len + 3 >= max) {
1261                 temp = xmlSaveUriRealloc(ret, &max);
1262                 if (temp == NULL) goto mem_error;
1263                 ret = temp;
1264 	    }
1265 	}
1266 	if (uri->path != NULL) {
1267 	    p = uri->path;
1268 	    /*
1269 	     * the colon in file:///d: should not be escaped or
1270 	     * Windows accesses fail later.
1271 	     */
1272 	    if ((uri->scheme != NULL) &&
1273 		(p[0] == '/') &&
1274 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1275 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1276 		(p[2] == ':') &&
1277 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1278 		if (len + 3 >= max) {
1279                     temp = xmlSaveUriRealloc(ret, &max);
1280                     if (temp == NULL) goto mem_error;
1281                     ret = temp;
1282 		}
1283 		ret[len++] = *p++;
1284 		ret[len++] = *p++;
1285 		ret[len++] = *p++;
1286 	    }
1287 	    while (*p != 0) {
1288 		if (len + 3 >= max) {
1289                     temp = xmlSaveUriRealloc(ret, &max);
1290                     if (temp == NULL) goto mem_error;
1291                     ret = temp;
1292 		}
1293 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1294                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1295 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1296 	            ((*(p) == ',')))
1297 		    ret[len++] = *p++;
1298 		else {
1299 		    int val = *(unsigned char *)p++;
1300 		    int hi = val / 0x10, lo = val % 0x10;
1301 		    ret[len++] = '%';
1302 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1303 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1304 		}
1305 	    }
1306 	}
1307 	if (uri->query_raw != NULL) {
1308 	    if (len + 1 >= max) {
1309                 temp = xmlSaveUriRealloc(ret, &max);
1310                 if (temp == NULL) goto mem_error;
1311                 ret = temp;
1312 	    }
1313 	    ret[len++] = '?';
1314 	    p = uri->query_raw;
1315 	    while (*p != 0) {
1316 		if (len + 1 >= max) {
1317                     temp = xmlSaveUriRealloc(ret, &max);
1318                     if (temp == NULL) goto mem_error;
1319                     ret = temp;
1320 		}
1321 		ret[len++] = *p++;
1322 	    }
1323 	} else if (uri->query != NULL) {
1324 	    if (len + 3 >= max) {
1325                 temp = xmlSaveUriRealloc(ret, &max);
1326                 if (temp == NULL) goto mem_error;
1327                 ret = temp;
1328 	    }
1329 	    ret[len++] = '?';
1330 	    p = uri->query;
1331 	    while (*p != 0) {
1332 		if (len + 3 >= max) {
1333                     temp = xmlSaveUriRealloc(ret, &max);
1334                     if (temp == NULL) goto mem_error;
1335                     ret = temp;
1336 		}
1337 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1338 		    ret[len++] = *p++;
1339 		else {
1340 		    int val = *(unsigned char *)p++;
1341 		    int hi = val / 0x10, lo = val % 0x10;
1342 		    ret[len++] = '%';
1343 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1344 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1345 		}
1346 	    }
1347 	}
1348     }
1349     if (uri->fragment != NULL) {
1350 	if (len + 3 >= max) {
1351             temp = xmlSaveUriRealloc(ret, &max);
1352             if (temp == NULL) goto mem_error;
1353             ret = temp;
1354 	}
1355 	ret[len++] = '#';
1356 	p = uri->fragment;
1357 	while (*p != 0) {
1358 	    if (len + 3 >= max) {
1359                 temp = xmlSaveUriRealloc(ret, &max);
1360                 if (temp == NULL) goto mem_error;
1361                 ret = temp;
1362 	    }
1363 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1364 		ret[len++] = *p++;
1365 	    else {
1366 		int val = *(unsigned char *)p++;
1367 		int hi = val / 0x10, lo = val % 0x10;
1368 		ret[len++] = '%';
1369 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1370 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1371 	    }
1372 	}
1373     }
1374     if (len >= max) {
1375         temp = xmlSaveUriRealloc(ret, &max);
1376         if (temp == NULL) goto mem_error;
1377         ret = temp;
1378     }
1379     ret[len] = 0;
1380     return(ret);
1381 
1382 mem_error:
1383     xmlFree(ret);
1384     return(NULL);
1385 }
1386 
1387 /**
1388  * xmlPrintURI:
1389  * @stream:  a FILE* for the output
1390  * @uri:  pointer to an xmlURI
1391  *
1392  * Prints the URI in the stream @stream.
1393  */
1394 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1395 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1396     xmlChar *out;
1397 
1398     out = xmlSaveUri(uri);
1399     if (out != NULL) {
1400 	fprintf(stream, "%s", (char *) out);
1401 	xmlFree(out);
1402     }
1403 }
1404 
1405 /**
1406  * xmlCleanURI:
1407  * @uri:  pointer to an xmlURI
1408  *
1409  * Make sure the xmlURI struct is free of content
1410  */
1411 static void
xmlCleanURI(xmlURIPtr uri)1412 xmlCleanURI(xmlURIPtr uri) {
1413     if (uri == NULL) return;
1414 
1415     if (uri->scheme != NULL) xmlFree(uri->scheme);
1416     uri->scheme = NULL;
1417     if (uri->server != NULL) xmlFree(uri->server);
1418     uri->server = NULL;
1419     if (uri->user != NULL) xmlFree(uri->user);
1420     uri->user = NULL;
1421     if (uri->path != NULL) xmlFree(uri->path);
1422     uri->path = NULL;
1423     if (uri->fragment != NULL) xmlFree(uri->fragment);
1424     uri->fragment = NULL;
1425     if (uri->opaque != NULL) xmlFree(uri->opaque);
1426     uri->opaque = NULL;
1427     if (uri->authority != NULL) xmlFree(uri->authority);
1428     uri->authority = NULL;
1429     if (uri->query != NULL) xmlFree(uri->query);
1430     uri->query = NULL;
1431     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1432     uri->query_raw = NULL;
1433 }
1434 
1435 /**
1436  * xmlFreeURI:
1437  * @uri:  pointer to an xmlURI
1438  *
1439  * Free up the xmlURI struct
1440  */
1441 void
xmlFreeURI(xmlURIPtr uri)1442 xmlFreeURI(xmlURIPtr uri) {
1443     if (uri == NULL) return;
1444 
1445     if (uri->scheme != NULL) xmlFree(uri->scheme);
1446     if (uri->server != NULL) xmlFree(uri->server);
1447     if (uri->user != NULL) xmlFree(uri->user);
1448     if (uri->path != NULL) xmlFree(uri->path);
1449     if (uri->fragment != NULL) xmlFree(uri->fragment);
1450     if (uri->opaque != NULL) xmlFree(uri->opaque);
1451     if (uri->authority != NULL) xmlFree(uri->authority);
1452     if (uri->query != NULL) xmlFree(uri->query);
1453     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1454     xmlFree(uri);
1455 }
1456 
1457 /************************************************************************
1458  *									*
1459  *			Helper functions				*
1460  *									*
1461  ************************************************************************/
1462 
1463 static int
xmlIsPathSeparator(int c,int isFile)1464 xmlIsPathSeparator(int c, int isFile) {
1465     (void) isFile;
1466 
1467     if (c == '/')
1468         return(1);
1469 
1470 #ifdef _WIN32
1471     if (isFile && (c == '\\'))
1472         return(1);
1473 #endif
1474 
1475     return(0);
1476 }
1477 
1478 /**
1479  * xmlNormalizePath:
1480  * @path:  pointer to the path string
1481  * @isFile:  true for filesystem paths, false for URIs
1482  *
1483  * Normalize a filesystem path or URI.
1484  *
1485  * Returns 0 or an error code
1486  */
1487 static int
xmlNormalizePath(char * path,int isFile)1488 xmlNormalizePath(char *path, int isFile) {
1489     char *cur, *out;
1490     int numSeg = 0;
1491 
1492     if (path == NULL)
1493 	return(-1);
1494 
1495     cur = path;
1496     out = path;
1497 
1498     if (*cur == 0)
1499         return(0);
1500 
1501     if (xmlIsPathSeparator(*cur, isFile)) {
1502         cur++;
1503         *out++ = '/';
1504     }
1505 
1506     while (*cur != 0) {
1507         /*
1508          * At this point, out is either empty or ends with a separator.
1509          * Collapse multiple separators first.
1510          */
1511         while (xmlIsPathSeparator(*cur, isFile)) {
1512 #ifdef _WIN32
1513             /* Allow two separators at start of path */
1514             if ((isFile) && (out == path + 1))
1515                 *out++ = '/';
1516 #endif
1517             cur++;
1518         }
1519 
1520         if (*cur == '.') {
1521             if (cur[1] == 0) {
1522                 /* Ignore "." at end of path */
1523                 break;
1524             } else if (xmlIsPathSeparator(cur[1], isFile)) {
1525                 /* Skip "./" */
1526                 cur += 2;
1527                 continue;
1528             } else if ((cur[1] == '.') &&
1529                        ((cur[2] == 0) || xmlIsPathSeparator(cur[2], isFile))) {
1530                 if (numSeg > 0) {
1531                     /* Handle ".." by removing last segment */
1532                     do {
1533                         out--;
1534                     } while ((out > path) &&
1535                              !xmlIsPathSeparator(out[-1], isFile));
1536                     numSeg--;
1537 
1538                     if (cur[2] == 0)
1539                         break;
1540                     cur += 3;
1541                     continue;
1542                 } else if (out[0] == '/') {
1543                     /* Ignore extraneous ".." in absolute paths */
1544                     if (cur[2] == 0)
1545                         break;
1546                     cur += 3;
1547                     continue;
1548                 } else {
1549                     /* Keep "../" at start of relative path */
1550                     numSeg--;
1551                 }
1552             }
1553         }
1554 
1555         /* Copy segment */
1556         while ((*cur != 0) && !xmlIsPathSeparator(*cur, isFile)) {
1557             *out++ = *cur++;
1558         }
1559 
1560         /* Copy separator */
1561         if (*cur != 0) {
1562             cur++;
1563             *out++ = '/';
1564         }
1565 
1566         numSeg++;
1567     }
1568 
1569     /* Keep "." if output is empty and it's a file */
1570     if ((isFile) && (out <= path))
1571         *out++ = '.';
1572     *out = 0;
1573 
1574     return(0);
1575 }
1576 
1577 /**
1578  * xmlNormalizeURIPath:
1579  * @path:  pointer to the path string
1580  *
1581  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1582  * Section 5.2, steps 6.c through 6.g.
1583  *
1584  * Normalization occurs directly on the string, no new allocation is done
1585  *
1586  * Returns 0 or an error code
1587  */
1588 int
xmlNormalizeURIPath(char * path)1589 xmlNormalizeURIPath(char *path) {
1590     return(xmlNormalizePath(path, 0));
1591 }
1592 
is_hex(char c)1593 static int is_hex(char c) {
1594     if (((c >= '0') && (c <= '9')) ||
1595         ((c >= 'a') && (c <= 'f')) ||
1596         ((c >= 'A') && (c <= 'F')))
1597 	return(1);
1598     return(0);
1599 }
1600 
1601 /**
1602  * xmlURIUnescapeString:
1603  * @str:  the string to unescape
1604  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1605  * @target:  optional destination buffer
1606  *
1607  * Unescaping routine, but does not check that the string is an URI. The
1608  * output is a direct unsigned char translation of %XX values (no encoding)
1609  * Note that the length of the result can only be smaller or same size as
1610  * the input string.
1611  *
1612  * Returns a copy of the string, but unescaped, will return NULL only in case
1613  * of error
1614  */
1615 char *
xmlURIUnescapeString(const char * str,int len,char * target)1616 xmlURIUnescapeString(const char *str, int len, char *target) {
1617     char *ret, *out;
1618     const char *in;
1619 
1620     if (str == NULL)
1621 	return(NULL);
1622     if (len <= 0) len = strlen(str);
1623     if (len < 0) return(NULL);
1624 
1625     if (target == NULL) {
1626 	ret = (char *) xmlMallocAtomic(len + 1);
1627 	if (ret == NULL)
1628 	    return(NULL);
1629     } else
1630 	ret = target;
1631     in = str;
1632     out = ret;
1633     while(len > 0) {
1634 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1635             int c = 0;
1636 	    in++;
1637 	    if ((*in >= '0') && (*in <= '9'))
1638 	        c = (*in - '0');
1639 	    else if ((*in >= 'a') && (*in <= 'f'))
1640 	        c = (*in - 'a') + 10;
1641 	    else if ((*in >= 'A') && (*in <= 'F'))
1642 	        c = (*in - 'A') + 10;
1643 	    in++;
1644 	    if ((*in >= '0') && (*in <= '9'))
1645 	        c = c * 16 + (*in - '0');
1646 	    else if ((*in >= 'a') && (*in <= 'f'))
1647 	        c = c * 16 + (*in - 'a') + 10;
1648 	    else if ((*in >= 'A') && (*in <= 'F'))
1649 	        c = c * 16 + (*in - 'A') + 10;
1650 	    in++;
1651 	    len -= 3;
1652             /* Explicit sign change */
1653 	    *out++ = (char) c;
1654 	} else {
1655 	    *out++ = *in++;
1656 	    len--;
1657 	}
1658     }
1659     *out = 0;
1660     return(ret);
1661 }
1662 
1663 /**
1664  * xmlURIEscapeStr:
1665  * @str:  string to escape
1666  * @list: exception list string of chars not to escape
1667  *
1668  * This routine escapes a string to hex, ignoring unreserved characters
1669  * a-z, A-Z, 0-9, "-._~", a few sub-delims "!*'()", the gen-delim "@"
1670  * (why?) and the characters in the exception list.
1671  *
1672  * Returns a new escaped string or NULL in case of error.
1673  */
1674 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1675 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1676     xmlChar *ret, ch;
1677     xmlChar *temp;
1678     const xmlChar *in;
1679     int len, out;
1680 
1681     if (str == NULL)
1682 	return(NULL);
1683     if (str[0] == 0)
1684 	return(xmlStrdup(str));
1685     len = xmlStrlen(str);
1686 
1687     len += 20;
1688     ret = (xmlChar *) xmlMallocAtomic(len);
1689     if (ret == NULL)
1690 	return(NULL);
1691     in = (const xmlChar *) str;
1692     out = 0;
1693     while(*in != 0) {
1694 	if (len - out <= 3) {
1695             if (len > INT_MAX / 2)
1696                 return(NULL);
1697             temp = xmlRealloc(ret, len * 2);
1698 	    if (temp == NULL) {
1699 		xmlFree(ret);
1700 		return(NULL);
1701 	    }
1702 	    ret = temp;
1703             len *= 2;
1704 	}
1705 
1706 	ch = *in;
1707 
1708 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1709 	    unsigned char val;
1710 	    ret[out++] = '%';
1711 	    val = ch >> 4;
1712 	    if (val <= 9)
1713 		ret[out++] = '0' + val;
1714 	    else
1715 		ret[out++] = 'A' + val - 0xA;
1716 	    val = ch & 0xF;
1717 	    if (val <= 9)
1718 		ret[out++] = '0' + val;
1719 	    else
1720 		ret[out++] = 'A' + val - 0xA;
1721 	    in++;
1722 	} else {
1723 	    ret[out++] = *in++;
1724 	}
1725 
1726     }
1727     ret[out] = 0;
1728     return(ret);
1729 }
1730 
1731 /**
1732  * xmlURIEscape:
1733  * @str:  the string of the URI to escape
1734  *
1735  * Escaping routine, does not do validity checks !
1736  * It will try to escape the chars needing this, but this is heuristic
1737  * based it's impossible to be sure.
1738  *
1739  * Returns an copy of the string, but escaped
1740  *
1741  * 25 May 2001
1742  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1743  * according to RFC2396.
1744  *   - Carl Douglas
1745  */
1746 xmlChar *
xmlURIEscape(const xmlChar * str)1747 xmlURIEscape(const xmlChar * str)
1748 {
1749     xmlChar *ret, *segment = NULL;
1750     xmlURIPtr uri;
1751     int ret2;
1752 
1753     if (str == NULL)
1754         return (NULL);
1755 
1756     uri = xmlCreateURI();
1757     if (uri != NULL) {
1758 	/*
1759 	 * Allow escaping errors in the unescaped form
1760 	 */
1761         uri->cleanup = XML_URI_ALLOW_UNWISE;
1762         ret2 = xmlParseURIReference(uri, (const char *)str);
1763         if (ret2) {
1764             xmlFreeURI(uri);
1765             return (NULL);
1766         }
1767     }
1768 
1769     if (!uri)
1770         return NULL;
1771 
1772     ret = NULL;
1773 
1774 #define NULLCHK(p) if(!p) { \
1775          xmlFreeURI(uri); \
1776          xmlFree(ret); \
1777          return NULL; } \
1778 
1779     if (uri->scheme) {
1780         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1781         NULLCHK(segment)
1782         ret = xmlStrcat(ret, segment);
1783         ret = xmlStrcat(ret, BAD_CAST ":");
1784         xmlFree(segment);
1785     }
1786 
1787     if (uri->authority) {
1788         segment =
1789             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1790         NULLCHK(segment)
1791         ret = xmlStrcat(ret, BAD_CAST "//");
1792         ret = xmlStrcat(ret, segment);
1793         xmlFree(segment);
1794     }
1795 
1796     if (uri->user) {
1797         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1798         NULLCHK(segment)
1799         ret = xmlStrcat(ret,BAD_CAST "//");
1800         ret = xmlStrcat(ret, segment);
1801         ret = xmlStrcat(ret, BAD_CAST "@");
1802         xmlFree(segment);
1803     }
1804 
1805     if (uri->server) {
1806         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1807         NULLCHK(segment)
1808         if (uri->user == NULL)
1809             ret = xmlStrcat(ret, BAD_CAST "//");
1810         ret = xmlStrcat(ret, segment);
1811         xmlFree(segment);
1812     }
1813 
1814     if (uri->port > 0) {
1815         xmlChar port[11];
1816 
1817         snprintf((char *) port, 11, "%d", uri->port);
1818         ret = xmlStrcat(ret, BAD_CAST ":");
1819         ret = xmlStrcat(ret, port);
1820     }
1821 
1822     if (uri->path) {
1823         segment =
1824             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1825         NULLCHK(segment)
1826         ret = xmlStrcat(ret, segment);
1827         xmlFree(segment);
1828     }
1829 
1830     if (uri->query_raw) {
1831         ret = xmlStrcat(ret, BAD_CAST "?");
1832         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1833     }
1834     else if (uri->query) {
1835         segment =
1836             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1837         NULLCHK(segment)
1838         ret = xmlStrcat(ret, BAD_CAST "?");
1839         ret = xmlStrcat(ret, segment);
1840         xmlFree(segment);
1841     }
1842 
1843     if (uri->opaque) {
1844         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1845         NULLCHK(segment)
1846         ret = xmlStrcat(ret, segment);
1847         xmlFree(segment);
1848     }
1849 
1850     if (uri->fragment) {
1851         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1852         NULLCHK(segment)
1853         ret = xmlStrcat(ret, BAD_CAST "#");
1854         ret = xmlStrcat(ret, segment);
1855         xmlFree(segment);
1856     }
1857 
1858     xmlFreeURI(uri);
1859 #undef NULLCHK
1860 
1861     return (ret);
1862 }
1863 
1864 /************************************************************************
1865  *									*
1866  *			Public functions				*
1867  *									*
1868  ************************************************************************/
1869 
1870 static int
xmlIsAbsolutePath(const xmlChar * path)1871 xmlIsAbsolutePath(const xmlChar *path) {
1872     int c = path[0];
1873 
1874     if (xmlIsPathSeparator(c, 1))
1875         return(1);
1876 
1877 #ifdef _WIN32
1878     if ((((c >= 'A') && (c <= 'Z')) ||
1879          ((c >= 'a') && (c <= 'z'))) &&
1880         (path[1] == ':'))
1881         return(1);
1882 #endif
1883 
1884     return(0);
1885 }
1886 
1887 /**
1888  * xmlResolvePath:
1889  * @ref:  the filesystem path
1890  * @base:  the base value
1891  * @out:  pointer to result URI
1892  *
1893  * Resolves a filesystem path from a base path.
1894  *
1895  * Returns 0 on success, -1 if a memory allocation failed or an error
1896  * code if URI or base are invalid.
1897  */
1898 static int
xmlResolvePath(const xmlChar * escRef,const xmlChar * base,xmlChar ** out)1899 xmlResolvePath(const xmlChar *escRef, const xmlChar *base, xmlChar **out) {
1900     const xmlChar *fragment;
1901     xmlChar *tmp = NULL;
1902     xmlChar *ref = NULL;
1903     xmlChar *result = NULL;
1904     int ret = -1;
1905     int i;
1906 
1907     if (out == NULL)
1908         return(1);
1909     *out = NULL;
1910 
1911     if ((escRef == NULL) || (escRef[0] == 0)) {
1912         if ((base == NULL) || (base[0] == 0))
1913             return(1);
1914         ref = xmlStrdup(base);
1915         if (ref == NULL)
1916             goto err_memory;
1917         *out = ref;
1918         return(0);
1919     }
1920 
1921     /*
1922      * If a URI is resolved, we can assume it is a valid URI and not
1923      * a filesystem path. This means we have to unescape the part
1924      * before the fragment.
1925      */
1926     fragment = xmlStrchr(escRef, '#');
1927     if (fragment != NULL) {
1928         tmp = xmlStrndup(escRef, fragment - escRef);
1929         if (tmp == NULL)
1930             goto err_memory;
1931         escRef = tmp;
1932     }
1933 
1934     ref = (xmlChar *) xmlURIUnescapeString((char *) escRef, -1, NULL);
1935     if (ref == NULL)
1936         goto err_memory;
1937 
1938     if ((base == NULL) || (base[0] == 0))
1939         goto done;
1940 
1941     if (xmlIsAbsolutePath(ref))
1942         goto done;
1943 
1944     /*
1945      * Remove last segment from base
1946      */
1947     i = xmlStrlen(base);
1948     while ((i > 0) && !xmlIsPathSeparator(base[i-1], 1))
1949         i--;
1950 
1951     /*
1952      * Concatenate base and ref
1953      */
1954     if (i > 0) {
1955         int refLen = xmlStrlen(ref);
1956 
1957         result = xmlMalloc(i + refLen + 1);
1958         if (result == NULL)
1959             goto err_memory;
1960 
1961         memcpy(result, base, i);
1962         memcpy(result + i, ref, refLen + 1);
1963     }
1964 
1965     /*
1966      * Normalize
1967      */
1968     xmlNormalizePath((char *) result, 1);
1969 
1970 done:
1971     if (result == NULL) {
1972         result = ref;
1973         ref = NULL;
1974     }
1975 
1976     if (fragment != NULL) {
1977         result = xmlStrcat(result, fragment);
1978         if (result == NULL)
1979             goto err_memory;
1980     }
1981 
1982     *out = result;
1983     ret = 0;
1984 
1985 err_memory:
1986     xmlFree(tmp);
1987     xmlFree(ref);
1988     return(ret);
1989 }
1990 
1991 /**
1992  * xmlBulidURISafe:
1993  * @URI:  the URI instance found in the document
1994  * @base:  the base value
1995  * @valPtr:  pointer to result URI
1996  *
1997  * Computes he final URI of the reference done by checking that
1998  * the given URI is valid, and building the final URI using the
1999  * base URI. This is processed according to section 5.2 of the
2000  * RFC 2396
2001  *
2002  * 5.2. Resolving Relative References to Absolute Form
2003  *
2004  * Returns 0 on success, -1 if a memory allocation failed or an error
2005  * code if URI or base are invalid.
2006  */
2007 int
xmlBuildURISafe(const xmlChar * URI,const xmlChar * base,xmlChar ** valPtr)2008 xmlBuildURISafe(const xmlChar *URI, const xmlChar *base, xmlChar **valPtr) {
2009     xmlChar *val = NULL;
2010     int ret, len, indx, cur, out;
2011     xmlURIPtr ref = NULL;
2012     xmlURIPtr bas = NULL;
2013     xmlURIPtr res = NULL;
2014 
2015     /*
2016      * 1) The URI reference is parsed into the potential four components and
2017      *    fragment identifier, as described in Section 4.3.
2018      *
2019      *    NOTE that a completely empty URI is treated by modern browsers
2020      *    as a reference to "." rather than as a synonym for the current
2021      *    URI.  Should we do that here?
2022      */
2023     if (URI == NULL)
2024         ret = 1;
2025     else if (URI[0] != 0)
2026         ret = xmlParseURISafe((const char *) URI, &ref);
2027     else
2028         ret = 0;
2029     if (ret != 0)
2030 	goto done;
2031     if ((ref != NULL) && (ref->scheme != NULL)) {
2032 	/*
2033 	 * The URI is absolute don't modify.
2034 	 */
2035 	val = xmlStrdup(URI);
2036         if (val == NULL)
2037             ret = -1;
2038 	goto done;
2039     }
2040 
2041     /*
2042      * If base has no scheme or authority, it is assumed to be a
2043      * filesystem path.
2044      */
2045     if (xmlStrstr(base, BAD_CAST "://") == NULL) {
2046         xmlFreeURI(ref);
2047         return(xmlResolvePath(URI, base, valPtr));
2048     }
2049 
2050     ret = xmlParseURISafe((const char *) base, &bas);
2051     if (ret < 0)
2052         goto done;
2053     if (ret != 0) {
2054 	if (ref) {
2055             ret = 0;
2056 	    val = xmlSaveUri(ref);
2057             if (val == NULL)
2058                 ret = -1;
2059         }
2060 	goto done;
2061     }
2062     if (ref == NULL) {
2063 	/*
2064 	 * the base fragment must be ignored
2065 	 */
2066 	if (bas->fragment != NULL) {
2067 	    xmlFree(bas->fragment);
2068 	    bas->fragment = NULL;
2069 	}
2070 	val = xmlSaveUri(bas);
2071         if (val == NULL)
2072             ret = -1;
2073 	goto done;
2074     }
2075 
2076     /*
2077      * 2) If the path component is empty and the scheme, authority, and
2078      *    query components are undefined, then it is a reference to the
2079      *    current document and we are done.  Otherwise, the reference URI's
2080      *    query and fragment components are defined as found (or not found)
2081      *    within the URI reference and not inherited from the base URI.
2082      *
2083      *    NOTE that in modern browsers, the parsing differs from the above
2084      *    in the following aspect:  the query component is allowed to be
2085      *    defined while still treating this as a reference to the current
2086      *    document.
2087      */
2088     ret = -1;
2089     res = xmlCreateURI();
2090     if (res == NULL)
2091 	goto done;
2092     if ((ref->scheme == NULL) && (ref->path == NULL) &&
2093 	((ref->authority == NULL) && (ref->server == NULL) &&
2094          (ref->port == PORT_EMPTY))) {
2095 	if (bas->scheme != NULL) {
2096 	    res->scheme = xmlMemStrdup(bas->scheme);
2097             if (res->scheme == NULL)
2098                 goto done;
2099         }
2100 	if (bas->authority != NULL) {
2101 	    res->authority = xmlMemStrdup(bas->authority);
2102             if (res->authority == NULL)
2103                 goto done;
2104         } else {
2105 	    if (bas->server != NULL) {
2106 		res->server = xmlMemStrdup(bas->server);
2107                 if (res->server == NULL)
2108                     goto done;
2109             }
2110 	    if (bas->user != NULL) {
2111 		res->user = xmlMemStrdup(bas->user);
2112                 if (res->user == NULL)
2113                     goto done;
2114             }
2115 	    res->port = bas->port;
2116 	}
2117 	if (bas->path != NULL) {
2118 	    res->path = xmlMemStrdup(bas->path);
2119             if (res->path == NULL)
2120                 goto done;
2121         }
2122 	if (ref->query_raw != NULL) {
2123 	    res->query_raw = xmlMemStrdup (ref->query_raw);
2124             if (res->query_raw == NULL)
2125                 goto done;
2126         } else if (ref->query != NULL) {
2127 	    res->query = xmlMemStrdup(ref->query);
2128             if (res->query == NULL)
2129                 goto done;
2130         } else if (bas->query_raw != NULL) {
2131 	    res->query_raw = xmlMemStrdup(bas->query_raw);
2132             if (res->query_raw == NULL)
2133                 goto done;
2134         } else if (bas->query != NULL) {
2135 	    res->query = xmlMemStrdup(bas->query);
2136             if (res->query == NULL)
2137                 goto done;
2138         }
2139 	if (ref->fragment != NULL) {
2140 	    res->fragment = xmlMemStrdup(ref->fragment);
2141             if (res->fragment == NULL)
2142                 goto done;
2143         }
2144 	goto step_7;
2145     }
2146 
2147     /*
2148      * 3) If the scheme component is defined, indicating that the reference
2149      *    starts with a scheme name, then the reference is interpreted as an
2150      *    absolute URI and we are done.  Otherwise, the reference URI's
2151      *    scheme is inherited from the base URI's scheme component.
2152      */
2153     if (ref->scheme != NULL) {
2154 	val = xmlSaveUri(ref);
2155         if (val != NULL)
2156             ret = 0;
2157 	goto done;
2158     }
2159     if (bas->scheme != NULL) {
2160 	res->scheme = xmlMemStrdup(bas->scheme);
2161         if (res->scheme == NULL)
2162             goto done;
2163     }
2164 
2165     if (ref->query_raw != NULL) {
2166 	res->query_raw = xmlMemStrdup(ref->query_raw);
2167         if (res->query_raw == NULL)
2168             goto done;
2169     } else if (ref->query != NULL) {
2170 	res->query = xmlMemStrdup(ref->query);
2171         if (res->query == NULL)
2172             goto done;
2173     }
2174     if (ref->fragment != NULL) {
2175 	res->fragment = xmlMemStrdup(ref->fragment);
2176         if (res->fragment == NULL)
2177             goto done;
2178     }
2179 
2180     /*
2181      * 4) If the authority component is defined, then the reference is a
2182      *    network-path and we skip to step 7.  Otherwise, the reference
2183      *    URI's authority is inherited from the base URI's authority
2184      *    component, which will also be undefined if the URI scheme does not
2185      *    use an authority component.
2186      */
2187     if ((ref->authority != NULL) || (ref->server != NULL) ||
2188          (ref->port != PORT_EMPTY)) {
2189 	if (ref->authority != NULL) {
2190 	    res->authority = xmlMemStrdup(ref->authority);
2191             if (res->authority == NULL)
2192                 goto done;
2193         } else {
2194             if (ref->server != NULL) {
2195                 res->server = xmlMemStrdup(ref->server);
2196                 if (res->server == NULL)
2197                     goto done;
2198             }
2199 	    if (ref->user != NULL) {
2200 		res->user = xmlMemStrdup(ref->user);
2201                 if (res->user == NULL)
2202                     goto done;
2203             }
2204             res->port = ref->port;
2205 	}
2206 	if (ref->path != NULL) {
2207 	    res->path = xmlMemStrdup(ref->path);
2208             if (res->path == NULL)
2209                 goto done;
2210         }
2211 	goto step_7;
2212     }
2213     if (bas->authority != NULL) {
2214 	res->authority = xmlMemStrdup(bas->authority);
2215         if (res->authority == NULL)
2216             goto done;
2217     } else if ((bas->server != NULL) || (bas->port != PORT_EMPTY)) {
2218 	if (bas->server != NULL) {
2219 	    res->server = xmlMemStrdup(bas->server);
2220             if (res->server == NULL)
2221                 goto done;
2222         }
2223 	if (bas->user != NULL) {
2224 	    res->user = xmlMemStrdup(bas->user);
2225             if (res->user == NULL)
2226                 goto done;
2227         }
2228 	res->port = bas->port;
2229     }
2230 
2231     /*
2232      * 5) If the path component begins with a slash character ("/"), then
2233      *    the reference is an absolute-path and we skip to step 7.
2234      */
2235     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2236 	res->path = xmlMemStrdup(ref->path);
2237         if (res->path == NULL)
2238             goto done;
2239 	goto step_7;
2240     }
2241 
2242 
2243     /*
2244      * 6) If this step is reached, then we are resolving a relative-path
2245      *    reference.  The relative path needs to be merged with the base
2246      *    URI's path.  Although there are many ways to do this, we will
2247      *    describe a simple method using a separate string buffer.
2248      *
2249      * Allocate a buffer large enough for the result string.
2250      */
2251     len = 2; /* extra / and 0 */
2252     if (ref->path != NULL)
2253 	len += strlen(ref->path);
2254     if (bas->path != NULL)
2255 	len += strlen(bas->path);
2256     res->path = (char *) xmlMallocAtomic(len);
2257     if (res->path == NULL)
2258 	goto done;
2259     res->path[0] = 0;
2260 
2261     /*
2262      * a) All but the last segment of the base URI's path component is
2263      *    copied to the buffer.  In other words, any characters after the
2264      *    last (right-most) slash character, if any, are excluded.
2265      */
2266     cur = 0;
2267     out = 0;
2268     if (bas->path != NULL) {
2269 	while (bas->path[cur] != 0) {
2270 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2271 		cur++;
2272 	    if (bas->path[cur] == 0)
2273 		break;
2274 
2275 	    cur++;
2276 	    while (out < cur) {
2277 		res->path[out] = bas->path[out];
2278 		out++;
2279 	    }
2280 	}
2281     }
2282     res->path[out] = 0;
2283 
2284     /*
2285      * b) The reference's path component is appended to the buffer
2286      *    string.
2287      */
2288     if (ref->path != NULL && ref->path[0] != 0) {
2289 	indx = 0;
2290 	/*
2291 	 * Ensure the path includes a '/'
2292 	 */
2293 	if ((out == 0) && ((bas->server != NULL) || bas->port != PORT_EMPTY))
2294 	    res->path[out++] = '/';
2295 	while (ref->path[indx] != 0) {
2296 	    res->path[out++] = ref->path[indx++];
2297 	}
2298     }
2299     res->path[out] = 0;
2300 
2301     /*
2302      * Steps c) to h) are really path normalization steps
2303      */
2304     xmlNormalizeURIPath(res->path);
2305 
2306 step_7:
2307 
2308     /*
2309      * 7) The resulting URI components, including any inherited from the
2310      *    base URI, are recombined to give the absolute form of the URI
2311      *    reference.
2312      */
2313     val = xmlSaveUri(res);
2314     if (val != NULL)
2315         ret = 0;
2316 
2317 done:
2318     if (ref != NULL)
2319 	xmlFreeURI(ref);
2320     if (bas != NULL)
2321 	xmlFreeURI(bas);
2322     if (res != NULL)
2323 	xmlFreeURI(res);
2324     *valPtr = val;
2325     return(ret);
2326 }
2327 
2328 /**
2329  * xmlBuildURI:
2330  * @URI:  the URI instance found in the document
2331  * @base:  the base value
2332  *
2333  * Computes he final URI of the reference done by checking that
2334  * the given URI is valid, and building the final URI using the
2335  * base URI. This is processed according to section 5.2 of the
2336  * RFC 2396
2337  *
2338  * 5.2. Resolving Relative References to Absolute Form
2339  *
2340  * Returns a new URI string (to be freed by the caller) or NULL in case
2341  *         of error.
2342  */
2343 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)2344 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
2345     xmlChar *out;
2346 
2347     xmlBuildURISafe(URI, base, &out);
2348     return(out);
2349 }
2350 
2351 /**
2352  * xmlBuildRelativeURISafe:
2353  * @URI:  the URI reference under consideration
2354  * @base:  the base value
2355  * @valPtr:  pointer to result URI
2356  *
2357  * Expresses the URI of the reference in terms relative to the
2358  * base.  Some examples of this operation include:
2359  *     base = "http://site1.com/docs/book1.html"
2360  *        URI input                        URI returned
2361  *     docs/pic1.gif                    pic1.gif
2362  *     docs/img/pic1.gif                img/pic1.gif
2363  *     img/pic1.gif                     ../img/pic1.gif
2364  *     http://site1.com/docs/pic1.gif   pic1.gif
2365  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2366  *
2367  *     base = "docs/book1.html"
2368  *        URI input                        URI returned
2369  *     docs/pic1.gif                    pic1.gif
2370  *     docs/img/pic1.gif                img/pic1.gif
2371  *     img/pic1.gif                     ../img/pic1.gif
2372  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2373  *
2374  *
2375  * Note: if the URI reference is really weird or complicated, it may be
2376  *       worthwhile to first convert it into a "nice" one by calling
2377  *       xmlBuildURI (using 'base') before calling this routine,
2378  *       since this routine (for reasonable efficiency) assumes URI has
2379  *       already been through some validation.
2380  *
2381  * Returns 0 on success, -1 if a memory allocation failed or an error
2382  * code if URI or base are invalid.
2383  */
2384 int
xmlBuildRelativeURISafe(const xmlChar * URI,const xmlChar * base,xmlChar ** valPtr)2385 xmlBuildRelativeURISafe(const xmlChar * URI, const xmlChar * base,
2386                         xmlChar **valPtr)
2387 {
2388     xmlChar *val = NULL;
2389     int ret = 0;
2390     int ix;
2391     int nbslash = 0;
2392     int len;
2393     xmlURIPtr ref = NULL;
2394     xmlURIPtr bas = NULL;
2395     xmlChar *bptr, *uptr, *vptr;
2396     int remove_path = 0;
2397 
2398     if (valPtr == NULL)
2399         return(1);
2400     *valPtr = NULL;
2401     if ((URI == NULL) || (*URI == 0))
2402 	return(1);
2403 
2404     /*
2405      * First parse URI into a standard form
2406      */
2407     ref = xmlCreateURI ();
2408     if (ref == NULL) {
2409         ret = -1;
2410 	goto done;
2411     }
2412     /* If URI not already in "relative" form */
2413     if (URI[0] != '.') {
2414 	ret = xmlParseURIReference (ref, (const char *) URI);
2415 	if (ret != 0)
2416 	    goto done;		/* Error in URI, return NULL */
2417     } else {
2418 	ref->path = (char *)xmlStrdup(URI);
2419         if (ref->path == NULL) {
2420             ret = -1;
2421             goto done;
2422         }
2423     }
2424 
2425     /*
2426      * Next parse base into the same standard form
2427      */
2428     if ((base == NULL) || (*base == 0)) {
2429 	val = xmlStrdup (URI);
2430         if (val == NULL)
2431             ret = -1;
2432 	goto done;
2433     }
2434     bas = xmlCreateURI ();
2435     if (bas == NULL) {
2436         ret = -1;
2437 	goto done;
2438     }
2439     if (base[0] != '.') {
2440 	ret = xmlParseURIReference (bas, (const char *) base);
2441 	if (ret != 0)
2442 	    goto done;		/* Error in base, return NULL */
2443     } else {
2444 	bas->path = (char *)xmlStrdup(base);
2445         if (bas->path == NULL) {
2446             ret = -1;
2447             goto done;
2448         }
2449     }
2450 
2451     /*
2452      * If the scheme / server on the URI differs from the base,
2453      * just return the URI
2454      */
2455     if ((ref->scheme != NULL) &&
2456 	((bas->scheme == NULL) ||
2457 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2458 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)) ||
2459          (bas->port != ref->port))) {
2460 	val = xmlStrdup (URI);
2461         if (val == NULL)
2462             ret = -1;
2463 	goto done;
2464     }
2465     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2466 	val = xmlStrdup(BAD_CAST "");
2467         if (val == NULL)
2468             ret = -1;
2469 	goto done;
2470     }
2471     if (bas->path == NULL) {
2472 	val = xmlStrdup((xmlChar *)ref->path);
2473         if (val == NULL)
2474             ret = -1;
2475 	goto done;
2476     }
2477     if (ref->path == NULL) {
2478         ref->path = (char *) "/";
2479 	remove_path = 1;
2480     }
2481 
2482     /*
2483      * At this point (at last!) we can compare the two paths
2484      *
2485      * First we take care of the special case where either of the
2486      * two path components may be missing (bug 316224)
2487      */
2488     bptr = (xmlChar *)bas->path;
2489     {
2490         xmlChar *rptr = (xmlChar *) ref->path;
2491         int pos = 0;
2492 
2493         /*
2494          * Next we compare the two strings and find where they first differ
2495          */
2496 	if ((*rptr == '.') && (rptr[1] == '/'))
2497             rptr += 2;
2498 	if ((*bptr == '.') && (bptr[1] == '/'))
2499             bptr += 2;
2500 	else if ((*bptr == '/') && (*rptr != '/'))
2501 	    bptr++;
2502 	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2503 	    pos++;
2504 
2505 	if (bptr[pos] == rptr[pos]) {
2506 	    val = xmlStrdup(BAD_CAST "");
2507             if (val == NULL)
2508                 ret = -1;
2509 	    goto done;		/* (I can't imagine why anyone would do this) */
2510 	}
2511 
2512 	/*
2513 	 * In URI, "back up" to the last '/' encountered.  This will be the
2514 	 * beginning of the "unique" suffix of URI
2515 	 */
2516 	ix = pos;
2517 	for (; ix > 0; ix--) {
2518 	    if (rptr[ix - 1] == '/')
2519 		break;
2520 	}
2521 	uptr = (xmlChar *)&rptr[ix];
2522 
2523 	/*
2524 	 * In base, count the number of '/' from the differing point
2525 	 */
2526 	for (; bptr[ix] != 0; ix++) {
2527 	    if (bptr[ix] == '/')
2528 		nbslash++;
2529 	}
2530 
2531 	/*
2532 	 * e.g: URI="foo/" base="foo/bar" -> "./"
2533 	 */
2534 	if (nbslash == 0 && !uptr[0]) {
2535 	    val = xmlStrdup(BAD_CAST "./");
2536             if (val == NULL)
2537                 ret = -1;
2538 	    goto done;
2539 	}
2540 
2541 	len = xmlStrlen (uptr) + 1;
2542     }
2543 
2544     if (nbslash == 0) {
2545 	if (uptr != NULL) {
2546 	    /* exception characters from xmlSaveUri */
2547 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2548             if (val == NULL)
2549                 ret = -1;
2550         }
2551 	goto done;
2552     }
2553 
2554     /*
2555      * Allocate just enough space for the returned string -
2556      * length of the remainder of the URI, plus enough space
2557      * for the "../" groups, plus one for the terminator
2558      */
2559     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2560     if (val == NULL) {
2561         ret = -1;
2562 	goto done;
2563     }
2564     vptr = val;
2565     /*
2566      * Put in as many "../" as needed
2567      */
2568     for (; nbslash>0; nbslash--) {
2569 	*vptr++ = '.';
2570 	*vptr++ = '.';
2571 	*vptr++ = '/';
2572     }
2573     /*
2574      * Finish up with the end of the URI
2575      */
2576     if (uptr != NULL) {
2577         if ((vptr > val) && (len > 0) &&
2578 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2579 	    memcpy (vptr, uptr + 1, len - 1);
2580 	    vptr[len - 2] = 0;
2581 	} else {
2582 	    memcpy (vptr, uptr, len);
2583 	    vptr[len - 1] = 0;
2584 	}
2585     } else {
2586 	vptr[len - 1] = 0;
2587     }
2588 
2589     /* escape the freshly-built path */
2590     vptr = val;
2591 	/* exception characters from xmlSaveUri */
2592     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2593     if (val == NULL)
2594         ret = -1;
2595     else
2596         ret = 0;
2597     xmlFree(vptr);
2598 
2599 done:
2600     /*
2601      * Free the working variables
2602      */
2603     if (remove_path != 0)
2604         ref->path = NULL;
2605     if (ref != NULL)
2606 	xmlFreeURI (ref);
2607     if (bas != NULL)
2608 	xmlFreeURI (bas);
2609     if (ret != 0) {
2610         xmlFree(val);
2611         val = NULL;
2612     }
2613 
2614     *valPtr = val;
2615     return(ret);
2616 }
2617 
2618 /*
2619  * xmlBuildRelativeURI:
2620  * @URI:  the URI reference under consideration
2621  * @base:  the base value
2622  *
2623  * See xmlBuildRelativeURISafe.
2624  *
2625  * Returns a new URI string (to be freed by the caller) or NULL in case
2626  * error.
2627  */
2628 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2629 xmlBuildRelativeURI(const xmlChar * URI, const xmlChar * base)
2630 {
2631     xmlChar *val;
2632 
2633     xmlBuildRelativeURISafe(URI, base, &val);
2634     return(val);
2635 }
2636 
2637 /**
2638  * xmlCanonicPath:
2639  * @path:  the resource locator in a filesystem notation
2640  *
2641  * Prepares a path.
2642  *
2643  * If the path contains the substring "://", it is considered a
2644  * Legacy Extended IRI. Characters which aren't allowed in URIs are
2645  * escaped.
2646  *
2647  * Otherwise, the path is considered a filesystem path which is
2648  * copied without modification.
2649  *
2650  * The caller is responsible for freeing the memory occupied
2651  * by the returned string. If there is insufficient memory available, or the
2652  * argument is NULL, the function returns NULL.
2653  */
2654 xmlChar *
xmlCanonicPath(const xmlChar * path)2655 xmlCanonicPath(const xmlChar *path)
2656 {
2657     xmlChar *ret;
2658 
2659     if (path == NULL)
2660 	return(NULL);
2661 
2662     /* Check if this is an "absolute uri" */
2663     if (xmlStrstr(path, BAD_CAST "://") != NULL) {
2664 	/*
2665          * Escape all characters except reserved, unreserved and the
2666          * percent sign.
2667          *
2668          * xmlURIEscapeStr already keeps unreserved characters, so we
2669          * pass gen-delims, sub-delims and "%" to ignore.
2670          */
2671         ret = xmlURIEscapeStr(path, BAD_CAST ":/?#[]@!$&()*+,;='%");
2672     } else {
2673         ret = xmlStrdup((const xmlChar *) path);
2674     }
2675 
2676     return(ret);
2677 }
2678 
2679 /**
2680  * xmlPathToURI:
2681  * @path:  the resource locator in a filesystem notation
2682  *
2683  * Constructs an URI expressing the existing path
2684  *
2685  * Returns a new URI, or a duplicate of the path parameter if the
2686  * construction fails. The caller is responsible for freeing the memory
2687  * occupied by the returned string. If there is insufficient memory available,
2688  * or the argument is NULL, the function returns NULL.
2689  */
2690 xmlChar *
xmlPathToURI(const xmlChar * path)2691 xmlPathToURI(const xmlChar *path)
2692 {
2693     return(xmlCanonicPath(path));
2694 }
2695