xref: /aosp_15_r20/external/cronet/third_party/libxml/src/xmlstring.c (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 /*
2  * string.c : an XML string utilities module
3  *
4  * This module provides various utility functions for manipulating
5  * the xmlChar* type. All functions named xmlStr* have been moved here
6  * from the parser.c file (their original home).
7  *
8  * See Copyright for the status of this software.
9  *
10  * UTF8 string routines from:
11  * William Brack <[email protected]>
12  *
13  * [email protected]
14  */
15 
16 #define IN_LIBXML
17 #include "libxml.h"
18 
19 #include <stdlib.h>
20 #include <string.h>
21 #include <limits.h>
22 #include <libxml/xmlmemory.h>
23 #include <libxml/parserInternals.h>
24 #include <libxml/xmlstring.h>
25 
26 #include "private/parser.h"
27 #include "private/string.h"
28 
29 #ifndef va_copy
30   #ifdef __va_copy
31     #define va_copy(dest, src) __va_copy(dest, src)
32   #else
33     #define va_copy(dest, src) memcpy(dest, src, sizeof(va_list))
34   #endif
35 #endif
36 
37 /************************************************************************
38  *                                                                      *
39  *                Commodity functions to handle xmlChars                *
40  *                                                                      *
41  ************************************************************************/
42 
43 /**
44  * xmlStrndup:
45  * @cur:  the input xmlChar *
46  * @len:  the len of @cur
47  *
48  * a strndup for array of xmlChar's
49  *
50  * Returns a new xmlChar * or NULL
51  */
52 xmlChar *
xmlStrndup(const xmlChar * cur,int len)53 xmlStrndup(const xmlChar *cur, int len) {
54     xmlChar *ret;
55 
56     if ((cur == NULL) || (len < 0)) return(NULL);
57     ret = (xmlChar *) xmlMallocAtomic((size_t) len + 1);
58     if (ret == NULL) {
59         return(NULL);
60     }
61     memcpy(ret, cur, len);
62     ret[len] = 0;
63     return(ret);
64 }
65 
66 /**
67  * xmlStrdup:
68  * @cur:  the input xmlChar *
69  *
70  * a strdup for array of xmlChar's. Since they are supposed to be
71  * encoded in UTF-8 or an encoding with 8bit based chars, we assume
72  * a termination mark of '0'.
73  *
74  * Returns a new xmlChar * or NULL
75  */
76 xmlChar *
xmlStrdup(const xmlChar * cur)77 xmlStrdup(const xmlChar *cur) {
78     const xmlChar *p = cur;
79 
80     if (cur == NULL) return(NULL);
81     while (*p != 0) p++; /* non input consuming */
82     return(xmlStrndup(cur, p - cur));
83 }
84 
85 /**
86  * xmlCharStrndup:
87  * @cur:  the input char *
88  * @len:  the len of @cur
89  *
90  * a strndup for char's to xmlChar's
91  *
92  * Returns a new xmlChar * or NULL
93  */
94 
95 xmlChar *
xmlCharStrndup(const char * cur,int len)96 xmlCharStrndup(const char *cur, int len) {
97     int i;
98     xmlChar *ret;
99 
100     if ((cur == NULL) || (len < 0)) return(NULL);
101     ret = (xmlChar *) xmlMallocAtomic((size_t) len + 1);
102     if (ret == NULL) {
103         return(NULL);
104     }
105     for (i = 0;i < len;i++) {
106         /* Explicit sign change */
107         ret[i] = (xmlChar) cur[i];
108         if (ret[i] == 0) return(ret);
109     }
110     ret[len] = 0;
111     return(ret);
112 }
113 
114 /**
115  * xmlCharStrdup:
116  * @cur:  the input char *
117  *
118  * a strdup for char's to xmlChar's
119  *
120  * Returns a new xmlChar * or NULL
121  */
122 
123 xmlChar *
xmlCharStrdup(const char * cur)124 xmlCharStrdup(const char *cur) {
125     const char *p = cur;
126 
127     if (cur == NULL) return(NULL);
128     while (*p != '\0') p++; /* non input consuming */
129     return(xmlCharStrndup(cur, p - cur));
130 }
131 
132 /**
133  * xmlStrcmp:
134  * @str1:  the first xmlChar *
135  * @str2:  the second xmlChar *
136  *
137  * a strcmp for xmlChar's
138  *
139  * Returns the integer result of the comparison
140  */
141 
142 int
xmlStrcmp(const xmlChar * str1,const xmlChar * str2)143 xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
144     if (str1 == str2) return(0);
145     if (str1 == NULL) return(-1);
146     if (str2 == NULL) return(1);
147 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
148     return(strcmp((const char *)str1, (const char *)str2));
149 #else
150     do {
151         int tmp = *str1++ - *str2;
152         if (tmp != 0) return(tmp);
153     } while (*str2++ != 0);
154     return 0;
155 #endif
156 }
157 
158 /**
159  * xmlStrEqual:
160  * @str1:  the first xmlChar *
161  * @str2:  the second xmlChar *
162  *
163  * Check if both strings are equal of have same content.
164  * Should be a bit more readable and faster than xmlStrcmp()
165  *
166  * Returns 1 if they are equal, 0 if they are different
167  */
168 
169 int
xmlStrEqual(const xmlChar * str1,const xmlChar * str2)170 xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
171     if (str1 == str2) return(1);
172     if (str1 == NULL) return(0);
173     if (str2 == NULL) return(0);
174 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
175     return(strcmp((const char *)str1, (const char *)str2) == 0);
176 #else
177     do {
178         if (*str1++ != *str2) return(0);
179     } while (*str2++);
180     return(1);
181 #endif
182 }
183 
184 /**
185  * xmlStrQEqual:
186  * @pref:  the prefix of the QName
187  * @name:  the localname of the QName
188  * @str:  the second xmlChar *
189  *
190  * Check if a QName is Equal to a given string
191  *
192  * Returns 1 if they are equal, 0 if they are different
193  */
194 
195 int
xmlStrQEqual(const xmlChar * pref,const xmlChar * name,const xmlChar * str)196 xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
197     if (pref == NULL) return(xmlStrEqual(name, str));
198     if (name == NULL) return(0);
199     if (str == NULL) return(0);
200 
201     do {
202         if (*pref++ != *str) return(0);
203     } while ((*str++) && (*pref));
204     if (*str++ != ':') return(0);
205     do {
206         if (*name++ != *str) return(0);
207     } while (*str++);
208     return(1);
209 }
210 
211 /**
212  * xmlStrncmp:
213  * @str1:  the first xmlChar *
214  * @str2:  the second xmlChar *
215  * @len:  the max comparison length
216  *
217  * a strncmp for xmlChar's
218  *
219  * Returns the integer result of the comparison
220  */
221 
222 int
xmlStrncmp(const xmlChar * str1,const xmlChar * str2,int len)223 xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
224     if (len <= 0) return(0);
225     if (str1 == str2) return(0);
226     if (str1 == NULL) return(-1);
227     if (str2 == NULL) return(1);
228 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
229     return(strncmp((const char *)str1, (const char *)str2, len));
230 #else
231     do {
232         int tmp = *str1++ - *str2;
233         if (tmp != 0 || --len == 0) return(tmp);
234     } while (*str2++ != 0);
235     return 0;
236 #endif
237 }
238 
239 static const xmlChar casemap[256] = {
240     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
241     0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
242     0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
243     0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
244     0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
245     0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
246     0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
247     0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
248     0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
249     0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
250     0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
251     0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
252     0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
253     0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
254     0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
255     0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
256     0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
257     0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
258     0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
259     0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
260     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
261     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
262     0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
263     0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
264     0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
265     0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
266     0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
267     0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
268     0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
269     0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
270     0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
271     0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
272 };
273 
274 /**
275  * xmlStrcasecmp:
276  * @str1:  the first xmlChar *
277  * @str2:  the second xmlChar *
278  *
279  * a strcasecmp for xmlChar's
280  *
281  * Returns the integer result of the comparison
282  */
283 
284 int
xmlStrcasecmp(const xmlChar * str1,const xmlChar * str2)285 xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
286     register int tmp;
287 
288     if (str1 == str2) return(0);
289     if (str1 == NULL) return(-1);
290     if (str2 == NULL) return(1);
291     do {
292         tmp = casemap[*str1++] - casemap[*str2];
293         if (tmp != 0) return(tmp);
294     } while (*str2++ != 0);
295     return 0;
296 }
297 
298 /**
299  * xmlStrncasecmp:
300  * @str1:  the first xmlChar *
301  * @str2:  the second xmlChar *
302  * @len:  the max comparison length
303  *
304  * a strncasecmp for xmlChar's
305  *
306  * Returns the integer result of the comparison
307  */
308 
309 int
xmlStrncasecmp(const xmlChar * str1,const xmlChar * str2,int len)310 xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
311     register int tmp;
312 
313     if (len <= 0) return(0);
314     if (str1 == str2) return(0);
315     if (str1 == NULL) return(-1);
316     if (str2 == NULL) return(1);
317     do {
318         tmp = casemap[*str1++] - casemap[*str2];
319         if (tmp != 0 || --len == 0) return(tmp);
320     } while (*str2++ != 0);
321     return 0;
322 }
323 
324 /**
325  * xmlStrchr:
326  * @str:  the xmlChar * array
327  * @val:  the xmlChar to search
328  *
329  * a strchr for xmlChar's
330  *
331  * Returns the xmlChar * for the first occurrence or NULL.
332  */
333 
334 const xmlChar *
xmlStrchr(const xmlChar * str,xmlChar val)335 xmlStrchr(const xmlChar *str, xmlChar val) {
336     if (str == NULL) return(NULL);
337     while (*str != 0) { /* non input consuming */
338         if (*str == val) return((xmlChar *) str);
339         str++;
340     }
341     return(NULL);
342 }
343 
344 /**
345  * xmlStrstr:
346  * @str:  the xmlChar * array (haystack)
347  * @val:  the xmlChar to search (needle)
348  *
349  * a strstr for xmlChar's
350  *
351  * Returns the xmlChar * for the first occurrence or NULL.
352  */
353 
354 const xmlChar *
xmlStrstr(const xmlChar * str,const xmlChar * val)355 xmlStrstr(const xmlChar *str, const xmlChar *val) {
356     int n;
357 
358     if (str == NULL) return(NULL);
359     if (val == NULL) return(NULL);
360     n = xmlStrlen(val);
361 
362     if (n == 0) return(str);
363     while (*str != 0) { /* non input consuming */
364         if (*str == *val) {
365             if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
366         }
367         str++;
368     }
369     return(NULL);
370 }
371 
372 /**
373  * xmlStrcasestr:
374  * @str:  the xmlChar * array (haystack)
375  * @val:  the xmlChar to search (needle)
376  *
377  * a case-ignoring strstr for xmlChar's
378  *
379  * Returns the xmlChar * for the first occurrence or NULL.
380  */
381 
382 const xmlChar *
xmlStrcasestr(const xmlChar * str,const xmlChar * val)383 xmlStrcasestr(const xmlChar *str, const xmlChar *val) {
384     int n;
385 
386     if (str == NULL) return(NULL);
387     if (val == NULL) return(NULL);
388     n = xmlStrlen(val);
389 
390     if (n == 0) return(str);
391     while (*str != 0) { /* non input consuming */
392         if (casemap[*str] == casemap[*val])
393             if (!xmlStrncasecmp(str, val, n)) return(str);
394         str++;
395     }
396     return(NULL);
397 }
398 
399 /**
400  * xmlStrsub:
401  * @str:  the xmlChar * array (haystack)
402  * @start:  the index of the first char (zero based)
403  * @len:  the length of the substring
404  *
405  * Extract a substring of a given string
406  *
407  * Returns the xmlChar * for the first occurrence or NULL.
408  */
409 
410 xmlChar *
xmlStrsub(const xmlChar * str,int start,int len)411 xmlStrsub(const xmlChar *str, int start, int len) {
412     int i;
413 
414     if (str == NULL) return(NULL);
415     if (start < 0) return(NULL);
416     if (len < 0) return(NULL);
417 
418     for (i = 0;i < start;i++) {
419         if (*str == 0) return(NULL);
420         str++;
421     }
422     if (*str == 0) return(NULL);
423     return(xmlStrndup(str, len));
424 }
425 
426 /**
427  * xmlStrlen:
428  * @str:  the xmlChar * array
429  *
430  * length of a xmlChar's string
431  *
432  * Returns the number of xmlChar contained in the ARRAY.
433  */
434 
435 int
xmlStrlen(const xmlChar * str)436 xmlStrlen(const xmlChar *str) {
437     size_t len = str ? strlen((const char *)str) : 0;
438     return(len > INT_MAX ? 0 : len);
439 }
440 
441 /**
442  * xmlStrncat:
443  * @cur:  the original xmlChar * array
444  * @add:  the xmlChar * array added
445  * @len:  the length of @add
446  *
447  * a strncat for array of xmlChar's, it will extend @cur with the len
448  * first bytes of @add. Note that if @len < 0 then this is an API error
449  * and NULL will be returned.
450  *
451  * Returns a new xmlChar *, the original @cur is reallocated and should
452  * not be freed.
453  */
454 
455 xmlChar *
xmlStrncat(xmlChar * cur,const xmlChar * add,int len)456 xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
457     int size;
458     xmlChar *ret;
459 
460     if ((add == NULL) || (len == 0))
461         return(cur);
462     if (len < 0)
463 	return(NULL);
464     if (cur == NULL)
465         return(xmlStrndup(add, len));
466 
467     size = xmlStrlen(cur);
468     if ((size < 0) || (size > INT_MAX - len))
469         return(NULL);
470     ret = (xmlChar *) xmlRealloc(cur, (size_t) size + len + 1);
471     if (ret == NULL) {
472         xmlFree(cur);
473         return(NULL);
474     }
475     memcpy(&ret[size], add, len);
476     ret[size + len] = 0;
477     return(ret);
478 }
479 
480 /**
481  * xmlStrncatNew:
482  * @str1:  first xmlChar string
483  * @str2:  second xmlChar string
484  * @len:  the len of @str2 or < 0
485  *
486  * same as xmlStrncat, but creates a new string.  The original
487  * two strings are not freed. If @len is < 0 then the length
488  * will be calculated automatically.
489  *
490  * Returns a new xmlChar * or NULL
491  */
492 xmlChar *
xmlStrncatNew(const xmlChar * str1,const xmlChar * str2,int len)493 xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) {
494     int size;
495     xmlChar *ret;
496 
497     if (len < 0) {
498         len = xmlStrlen(str2);
499         if (len < 0)
500             return(NULL);
501     }
502     if ((str2 == NULL) || (len == 0))
503         return(xmlStrdup(str1));
504     if (str1 == NULL)
505         return(xmlStrndup(str2, len));
506 
507     size = xmlStrlen(str1);
508     if ((size < 0) || (size > INT_MAX - len))
509         return(NULL);
510     ret = (xmlChar *) xmlMalloc((size_t) size + len + 1);
511     if (ret == NULL)
512         return(NULL);
513     memcpy(ret, str1, size);
514     memcpy(&ret[size], str2, len);
515     ret[size + len] = 0;
516     return(ret);
517 }
518 
519 /**
520  * xmlStrcat:
521  * @cur:  the original xmlChar * array
522  * @add:  the xmlChar * array added
523  *
524  * a strcat for array of xmlChar's. Since they are supposed to be
525  * encoded in UTF-8 or an encoding with 8bit based chars, we assume
526  * a termination mark of '0'.
527  *
528  * Returns a new xmlChar * containing the concatenated string. The original
529  * @cur is reallocated and should not be freed.
530  */
531 xmlChar *
xmlStrcat(xmlChar * cur,const xmlChar * add)532 xmlStrcat(xmlChar *cur, const xmlChar *add) {
533     const xmlChar *p = add;
534 
535     if (add == NULL) return(cur);
536     if (cur == NULL)
537         return(xmlStrdup(add));
538 
539     while (*p != 0) p++; /* non input consuming */
540     return(xmlStrncat(cur, add, p - add));
541 }
542 
543 /**
544  * xmlStrPrintf:
545  * @buf:   the result buffer.
546  * @len:   the result buffer length.
547  * @msg:   the message with printf formatting.
548  * @...:   extra parameters for the message.
549  *
550  * Formats @msg and places result into @buf.
551  *
552  * Returns the number of characters written to @buf or -1 if an error occurs.
553  */
554 int
xmlStrPrintf(xmlChar * buf,int len,const char * msg,...)555 xmlStrPrintf(xmlChar *buf, int len, const char *msg, ...) {
556     va_list args;
557     int ret;
558 
559     if((buf == NULL) || (msg == NULL)) {
560         return(-1);
561     }
562 
563     va_start(args, msg);
564     ret = vsnprintf((char *) buf, len, (const char *) msg, args);
565     va_end(args);
566     buf[len - 1] = 0; /* be safe ! */
567 
568     return(ret);
569 }
570 
571 /**
572  * xmlStrVPrintf:
573  * @buf:   the result buffer.
574  * @len:   the result buffer length.
575  * @msg:   the message with printf formatting.
576  * @ap:    extra parameters for the message.
577  *
578  * Formats @msg and places result into @buf.
579  *
580  * Returns the number of characters written to @buf or -1 if an error occurs.
581  */
582 int
xmlStrVPrintf(xmlChar * buf,int len,const char * msg,va_list ap)583 xmlStrVPrintf(xmlChar *buf, int len, const char *msg, va_list ap) {
584     int ret;
585 
586     if((buf == NULL) || (msg == NULL)) {
587         return(-1);
588     }
589 
590     ret = vsnprintf((char *) buf, len, (const char *) msg, ap);
591     buf[len - 1] = 0; /* be safe ! */
592 
593     return(ret);
594 }
595 
596 /**
597  * xmlStrVASPrintf:
598  * @out:  pointer to the resulting string
599  * @maxSize:  maximum size of the output buffer
600  * @fmt:  printf format string
601  * @ap:  arguments for format string
602  *
603  * Creates a newly allocated string according to format.
604  *
605  * Returns 0 on success, 1 if the result was truncated or on other
606  * errors, -1 if a memory allocation failed.
607  */
608 int
xmlStrVASPrintf(xmlChar ** out,int maxSize,const char * msg,va_list ap)609 xmlStrVASPrintf(xmlChar **out, int maxSize, const char *msg, va_list ap) {
610     char empty[1];
611     va_list copy;
612     xmlChar *buf;
613     int res, size;
614     int truncated = 0;
615 
616     if (out == NULL)
617         return(1);
618     *out = NULL;
619     if (msg == NULL)
620         return(1);
621     if (maxSize < 32)
622         maxSize = 32;
623 
624     va_copy(copy, ap);
625     res = vsnprintf(empty, 1, msg, copy);
626     va_end(copy);
627 
628     if (res > 0) {
629         /* snprintf seems to work according to C99. */
630 
631         if (res < maxSize) {
632             size = res + 1;
633         } else {
634             size = maxSize;
635             truncated = 1;
636         }
637         buf = xmlMalloc(size);
638         if (buf == NULL)
639             return(-1);
640         if (vsnprintf((char *) buf, size, msg, ap) < 0) {
641             xmlFree(buf);
642             return(1);
643         }
644     } else {
645         /*
646          * Unfortunately, older snprintf implementations don't follow the
647          * C99 spec. If the output exceeds the size of the buffer, they can
648          * return -1, 0 or the number of characters written instead of the
649          * needed size. Older MSCVRT also won't write a terminating null
650          * byte if the buffer is too small.
651          *
652          * If the value returned is non-negative and strictly less than
653          * the buffer size (without terminating null), the result should
654          * have been written completely, so we double the buffer size
655          * until this condition is true. This assumes that snprintf will
656          * eventually return a non-negative value. Otherwise, we will
657          * allocate more and more memory until we run out.
658          *
659          * Note that this code path is also executed on conforming
660          * platforms if the output is the empty string.
661          */
662 
663         buf = NULL;
664         size = 32;
665         while (1) {
666             buf = xmlMalloc(size);
667             if (buf == NULL)
668                 return(-1);
669 
670             va_copy(copy, ap);
671             res = vsnprintf((char *) buf, size, msg, copy);
672             va_end(copy);
673             if ((res >= 0) && (res < size - 1))
674                 break;
675 
676             if (size >= maxSize) {
677                 truncated = 1;
678                 break;
679             }
680 
681             xmlFree(buf);
682 
683             if (size > maxSize / 2)
684                 size = maxSize;
685             else
686                 size *= 2;
687         }
688     }
689 
690     /*
691      * If the output was truncated, make sure that the buffer doesn't
692      * end with a truncated UTF-8 sequence.
693      */
694     if (truncated != 0) {
695         int i = size - 1;
696 
697         while (i > 0) {
698             /* Break after ASCII */
699             if (buf[i-1] < 0x80)
700                 break;
701             i -= 1;
702             /* Break before non-ASCII */
703             if (buf[i] >= 0xc0)
704                 break;
705         }
706 
707         buf[i] = 0;
708     }
709 
710     *out = (xmlChar *) buf;
711     return(truncated);
712 }
713 
714 /**
715  * xmlStrASPrintf:
716  * @out:  pointer to the resulting string
717  * @maxSize:  maximum size of the output buffer
718  * @fmt:  printf format string
719  * @ap:  arguments for format string
720  *
721  * See xmlStrVASPrintf.
722  *
723  * Returns 0 on success, 1 if the result was truncated or on other
724  * errors, -1 if a memory allocation failed.
725  */
726 int
xmlStrASPrintf(xmlChar ** out,int maxSize,const char * msg,...)727 xmlStrASPrintf(xmlChar **out, int maxSize, const char *msg, ...) {
728     va_list ap;
729     int ret;
730 
731     va_start(ap, msg);
732     ret = xmlStrVASPrintf(out, maxSize, msg, ap);
733     va_end(ap);
734 
735     return(ret);
736 }
737 
738 /************************************************************************
739  *                                                                      *
740  *              Generic UTF8 handling routines                          *
741  *                                                                      *
742  * From rfc2044: encoding of the Unicode values on UTF-8:               *
743  *                                                                      *
744  * UCS-4 range (hex.)           UTF-8 octet sequence (binary)           *
745  * 0000 0000-0000 007F   0xxxxxxx                                       *
746  * 0000 0080-0000 07FF   110xxxxx 10xxxxxx                              *
747  * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx                     *
748  *                                                                      *
749  * I hope we won't use values > 0xFFFF anytime soon !                   *
750  *                                                                      *
751  ************************************************************************/
752 
753 
754 /**
755  * xmlUTF8Size:
756  * @utf: pointer to the UTF8 character
757  *
758  * calculates the internal size of a UTF8 character
759  *
760  * returns the numbers of bytes in the character, -1 on format error
761  */
762 int
xmlUTF8Size(const xmlChar * utf)763 xmlUTF8Size(const xmlChar *utf) {
764     xmlChar mask;
765     int len;
766 
767     if (utf == NULL)
768         return -1;
769     if (*utf < 0x80)
770         return 1;
771     /* check valid UTF8 character */
772     if (!(*utf & 0x40))
773         return -1;
774     /* determine number of bytes in char */
775     len = 2;
776     for (mask=0x20; mask != 0; mask>>=1) {
777         if (!(*utf & mask))
778             return len;
779         len++;
780     }
781     return -1;
782 }
783 
784 /**
785  * xmlUTF8Charcmp:
786  * @utf1: pointer to first UTF8 char
787  * @utf2: pointer to second UTF8 char
788  *
789  * compares the two UCS4 values
790  *
791  * returns result of the compare as with xmlStrncmp
792  */
793 int
xmlUTF8Charcmp(const xmlChar * utf1,const xmlChar * utf2)794 xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
795 
796     if (utf1 == NULL ) {
797         if (utf2 == NULL)
798             return 0;
799         return -1;
800     }
801     return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
802 }
803 
804 /**
805  * xmlUTF8Strlen:
806  * @utf:  a sequence of UTF-8 encoded bytes
807  *
808  * compute the length of an UTF8 string, it doesn't do a full UTF8
809  * checking of the content of the string.
810  *
811  * Returns the number of characters in the string or -1 in case of error
812  */
813 int
xmlUTF8Strlen(const xmlChar * utf)814 xmlUTF8Strlen(const xmlChar *utf) {
815     size_t ret = 0;
816 
817     if (utf == NULL)
818         return(-1);
819 
820     while (*utf != 0) {
821         if (utf[0] & 0x80) {
822             if ((utf[1] & 0xc0) != 0x80)
823                 return(-1);
824             if ((utf[0] & 0xe0) == 0xe0) {
825                 if ((utf[2] & 0xc0) != 0x80)
826                     return(-1);
827                 if ((utf[0] & 0xf0) == 0xf0) {
828                     if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
829                         return(-1);
830                     utf += 4;
831                 } else {
832                     utf += 3;
833                 }
834             } else {
835                 utf += 2;
836             }
837         } else {
838             utf++;
839         }
840         ret++;
841     }
842     return(ret > INT_MAX ? 0 : ret);
843 }
844 
845 /**
846  * xmlGetUTF8Char:
847  * @utf:  a sequence of UTF-8 encoded bytes
848  * @len:  a pointer to the minimum number of bytes present in
849  *        the sequence.  This is used to assure the next character
850  *        is completely contained within the sequence.
851  *
852  * Read the first UTF8 character from @utf
853  *
854  * Returns the char value or -1 in case of error, and sets *len to
855  *        the actual number of bytes consumed (0 in case of error)
856  */
857 int
xmlGetUTF8Char(const unsigned char * utf,int * len)858 xmlGetUTF8Char(const unsigned char *utf, int *len) {
859     unsigned int c;
860 
861     if (utf == NULL)
862         goto error;
863     if (len == NULL)
864         goto error;
865 
866     c = utf[0];
867     if (c < 0x80) {
868         if (*len < 1)
869             goto error;
870         /* 1-byte code */
871         *len = 1;
872     } else {
873         if ((*len < 2) || ((utf[1] & 0xc0) != 0x80))
874             goto error;
875         if (c < 0xe0) {
876             if (c < 0xc2)
877                 goto error;
878             /* 2-byte code */
879             *len = 2;
880             c = (c & 0x1f) << 6;
881             c |= utf[1] & 0x3f;
882         } else {
883             if ((*len < 3) || ((utf[2] & 0xc0) != 0x80))
884                 goto error;
885             if (c < 0xf0) {
886                 /* 3-byte code */
887                 *len = 3;
888                 c = (c & 0xf) << 12;
889                 c |= (utf[1] & 0x3f) << 6;
890                 c |= utf[2] & 0x3f;
891                 if ((c < 0x800) || ((c >= 0xd800) && (c < 0xe000)))
892                     goto error;
893             } else {
894                 if ((*len < 4) || ((utf[3] & 0xc0) != 0x80))
895                     goto error;
896                 *len = 4;
897                 /* 4-byte code */
898                 c = (c & 0x7) << 18;
899                 c |= (utf[1] & 0x3f) << 12;
900                 c |= (utf[2] & 0x3f) << 6;
901                 c |= utf[3] & 0x3f;
902                 if ((c < 0x10000) || (c >= 0x110000))
903                     goto error;
904             }
905         }
906     }
907     return(c);
908 
909 error:
910     if (len != NULL)
911 	*len = 0;
912     return(-1);
913 }
914 
915 /**
916  * xmlCheckUTF8:
917  * @utf: Pointer to putative UTF-8 encoded string.
918  *
919  * Checks @utf for being valid UTF-8. @utf is assumed to be
920  * null-terminated. This function is not super-strict, as it will
921  * allow longer UTF-8 sequences than necessary. Note that Java is
922  * capable of producing these sequences if provoked. Also note, this
923  * routine checks for the 4-byte maximum size, but does not check for
924  * 0x10ffff maximum value.
925  *
926  * Return value: true if @utf is valid.
927  **/
928 int
xmlCheckUTF8(const unsigned char * utf)929 xmlCheckUTF8(const unsigned char *utf)
930 {
931     int ix;
932     unsigned char c;
933 
934     if (utf == NULL)
935         return(0);
936     /*
937      * utf is a string of 1, 2, 3 or 4 bytes.  The valid strings
938      * are as follows (in "bit format"):
939      *    0xxxxxxx                                      valid 1-byte
940      *    110xxxxx 10xxxxxx                             valid 2-byte
941      *    1110xxxx 10xxxxxx 10xxxxxx                    valid 3-byte
942      *    11110xxx 10xxxxxx 10xxxxxx 10xxxxxx           valid 4-byte
943      */
944     while ((c = utf[0])) {      /* string is 0-terminated */
945         ix = 0;
946         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
947             ix = 1;
948 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
949 	    if ((utf[1] & 0xc0 ) != 0x80)
950 	        return 0;
951 	    ix = 2;
952 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
953 	    if (((utf[1] & 0xc0) != 0x80) ||
954 	        ((utf[2] & 0xc0) != 0x80))
955 		    return 0;
956 	    ix = 3;
957 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
958 	    if (((utf[1] & 0xc0) != 0x80) ||
959 	        ((utf[2] & 0xc0) != 0x80) ||
960 		((utf[3] & 0xc0) != 0x80))
961 		    return 0;
962 	    ix = 4;
963 	} else				/* unknown encoding */
964 	    return 0;
965         utf += ix;
966       }
967       return(1);
968 }
969 
970 /**
971  * xmlUTF8Strsize:
972  * @utf:  a sequence of UTF-8 encoded bytes
973  * @len:  the number of characters in the array
974  *
975  * storage size of an UTF8 string
976  * the behaviour is not guaranteed if the input string is not UTF-8
977  *
978  * Returns the storage size of
979  * the first 'len' characters of ARRAY
980  */
981 
982 int
xmlUTF8Strsize(const xmlChar * utf,int len)983 xmlUTF8Strsize(const xmlChar *utf, int len) {
984     const xmlChar *ptr=utf;
985     int ch;
986     size_t ret;
987 
988     if (utf == NULL)
989         return(0);
990 
991     if (len <= 0)
992         return(0);
993 
994     while ( len-- > 0) {
995         if ( !*ptr )
996             break;
997         if ( (ch = *ptr++) & 0x80)
998             while ((ch<<=1) & 0x80 ) {
999 		if (*ptr == 0) break;
1000                 ptr++;
1001 	    }
1002     }
1003     ret = ptr - utf;
1004     return (ret > INT_MAX ? 0 : ret);
1005 }
1006 
1007 
1008 /**
1009  * xmlUTF8Strndup:
1010  * @utf:  the input UTF8 *
1011  * @len:  the len of @utf (in chars)
1012  *
1013  * a strndup for array of UTF8's
1014  *
1015  * Returns a new UTF8 * or NULL
1016  */
1017 xmlChar *
xmlUTF8Strndup(const xmlChar * utf,int len)1018 xmlUTF8Strndup(const xmlChar *utf, int len) {
1019     xmlChar *ret;
1020     int i;
1021 
1022     if ((utf == NULL) || (len < 0)) return(NULL);
1023     i = xmlUTF8Strsize(utf, len);
1024     ret = (xmlChar *) xmlMallocAtomic((size_t) i + 1);
1025     if (ret == NULL) {
1026         return(NULL);
1027     }
1028     memcpy(ret, utf, i);
1029     ret[i] = 0;
1030     return(ret);
1031 }
1032 
1033 /**
1034  * xmlUTF8Strpos:
1035  * @utf:  the input UTF8 *
1036  * @pos:  the position of the desired UTF8 char (in chars)
1037  *
1038  * a function to provide the equivalent of fetching a
1039  * character from a string array
1040  *
1041  * Returns a pointer to the UTF8 character or NULL
1042  */
1043 const xmlChar *
xmlUTF8Strpos(const xmlChar * utf,int pos)1044 xmlUTF8Strpos(const xmlChar *utf, int pos) {
1045     int ch;
1046 
1047     if (utf == NULL) return(NULL);
1048     if (pos < 0)
1049         return(NULL);
1050     while (pos--) {
1051         if ((ch=*utf++) == 0) return(NULL);
1052         if ( ch & 0x80 ) {
1053             /* if not simple ascii, verify proper format */
1054             if ( (ch & 0xc0) != 0xc0 )
1055                 return(NULL);
1056             /* then skip over remaining bytes for this char */
1057             while ( (ch <<= 1) & 0x80 )
1058                 if ( (*utf++ & 0xc0) != 0x80 )
1059                     return(NULL);
1060         }
1061     }
1062     return((xmlChar *)utf);
1063 }
1064 
1065 /**
1066  * xmlUTF8Strloc:
1067  * @utf:  the input UTF8 *
1068  * @utfchar:  the UTF8 character to be found
1069  *
1070  * a function to provide the relative location of a UTF8 char
1071  *
1072  * Returns the relative character position of the desired char
1073  * or -1 if not found
1074  */
1075 int
xmlUTF8Strloc(const xmlChar * utf,const xmlChar * utfchar)1076 xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
1077     size_t i;
1078     int size;
1079     int ch;
1080 
1081     if (utf==NULL || utfchar==NULL) return -1;
1082     size = xmlUTF8Strsize(utfchar, 1);
1083         for(i=0; (ch=*utf) != 0; i++) {
1084             if (xmlStrncmp(utf, utfchar, size)==0)
1085                 return(i > INT_MAX ? 0 : i);
1086             utf++;
1087             if ( ch & 0x80 ) {
1088                 /* if not simple ascii, verify proper format */
1089                 if ( (ch & 0xc0) != 0xc0 )
1090                     return(-1);
1091                 /* then skip over remaining bytes for this char */
1092                 while ( (ch <<= 1) & 0x80 )
1093                     if ( (*utf++ & 0xc0) != 0x80 )
1094                         return(-1);
1095             }
1096         }
1097 
1098     return(-1);
1099 }
1100 /**
1101  * xmlUTF8Strsub:
1102  * @utf:  a sequence of UTF-8 encoded bytes
1103  * @start: relative pos of first char
1104  * @len:   total number to copy
1105  *
1106  * Create a substring from a given UTF-8 string
1107  * Note:  positions are given in units of UTF-8 chars
1108  *
1109  * Returns a pointer to a newly created string
1110  * or NULL if any problem
1111  */
1112 
1113 xmlChar *
xmlUTF8Strsub(const xmlChar * utf,int start,int len)1114 xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
1115     int i;
1116     int ch;
1117 
1118     if (utf == NULL) return(NULL);
1119     if (start < 0) return(NULL);
1120     if (len < 0) return(NULL);
1121 
1122     /*
1123      * Skip over any leading chars
1124      */
1125     for (i = 0;i < start;i++) {
1126         if ((ch=*utf++) == 0) return(NULL);
1127         if ( ch & 0x80 ) {
1128             /* if not simple ascii, verify proper format */
1129             if ( (ch & 0xc0) != 0xc0 )
1130                 return(NULL);
1131             /* then skip over remaining bytes for this char */
1132             while ( (ch <<= 1) & 0x80 )
1133                 if ( (*utf++ & 0xc0) != 0x80 )
1134                     return(NULL);
1135         }
1136     }
1137 
1138     return(xmlUTF8Strndup(utf, len));
1139 }
1140 
1141 /**
1142  * xmlEscapeFormatString:
1143  * @msg:  a pointer to the string in which to escape '%' characters.
1144  * Must be a heap-allocated buffer created by libxml2 that may be
1145  * returned, or that may be freed and replaced.
1146  *
1147  * Replaces the string pointed to by 'msg' with an escaped string.
1148  * Returns the same string with all '%' characters escaped.
1149  */
1150 xmlChar *
xmlEscapeFormatString(xmlChar ** msg)1151 xmlEscapeFormatString(xmlChar **msg)
1152 {
1153     xmlChar *msgPtr = NULL;
1154     xmlChar *result = NULL;
1155     xmlChar *resultPtr = NULL;
1156     size_t count = 0;
1157     size_t msgLen = 0;
1158     size_t resultLen = 0;
1159 
1160     if (!msg || !*msg)
1161         return(NULL);
1162 
1163     for (msgPtr = *msg; *msgPtr != '\0'; ++msgPtr) {
1164         ++msgLen;
1165         if (*msgPtr == '%')
1166             ++count;
1167     }
1168 
1169     if (count == 0)
1170         return(*msg);
1171 
1172     if ((count > INT_MAX) || (msgLen > INT_MAX - count))
1173         return(NULL);
1174     resultLen = msgLen + count + 1;
1175     result = (xmlChar *) xmlMallocAtomic(resultLen);
1176     if (result == NULL) {
1177         /* Clear *msg to prevent format string vulnerabilities in
1178            out-of-memory situations. */
1179         xmlFree(*msg);
1180         *msg = NULL;
1181         return(NULL);
1182     }
1183 
1184     for (msgPtr = *msg, resultPtr = result; *msgPtr != '\0'; ++msgPtr, ++resultPtr) {
1185         *resultPtr = *msgPtr;
1186         if (*msgPtr == '%')
1187             *(++resultPtr) = '%';
1188     }
1189     result[resultLen - 1] = '\0';
1190 
1191     xmlFree(*msg);
1192     *msg = result;
1193 
1194     return *msg;
1195 }
1196 
1197