1 /*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
16 * See Copyright for the status of this software.
17 *
18 * [email protected]
19 *
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <[email protected]>
21 */
22
23 #define IN_LIBXML
24 #include "libxml.h"
25
26 #include <string.h>
27 #include <limits.h>
28 #include <ctype.h>
29 #include <stdlib.h>
30
31 #ifdef LIBXML_ICONV_ENABLED
32 #include <errno.h>
33 #endif
34
35 #include <libxml/encoding.h>
36 #include <libxml/xmlmemory.h>
37 #include <libxml/parser.h>
38 #ifdef LIBXML_HTML_ENABLED
39 #include <libxml/HTMLparser.h>
40 #endif
41 #include <libxml/xmlerror.h>
42
43 #include "private/buf.h"
44 #include "private/enc.h"
45 #include "private/error.h"
46
47 #ifdef LIBXML_ICU_ENABLED
48 #include <unicode/ucnv.h>
49 /* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50 #define ICU_PIVOT_BUF_SIZE 1024
51 typedef struct _uconv_t uconv_t;
52 struct _uconv_t {
53 UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54 UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55 UChar pivot_buf[ICU_PIVOT_BUF_SIZE];
56 UChar *pivot_source;
57 UChar *pivot_target;
58 };
59 #endif
60
61 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63 struct _xmlCharEncodingAlias {
64 const char *name;
65 const char *alias;
66 };
67
68 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69 static int xmlCharEncodingAliasesNb = 0;
70 static int xmlCharEncodingAliasesMax = 0;
71
72 static int xmlLittleEndian = 1;
73
74 /************************************************************************
75 * *
76 * Conversions To/From UTF8 encoding *
77 * *
78 ************************************************************************/
79
80 /**
81 * asciiToUTF8:
82 * @out: a pointer to an array of bytes to store the result
83 * @outlen: the length of @out
84 * @in: a pointer to an array of ASCII chars
85 * @inlen: the length of @in
86 *
87 * Take a block of ASCII chars in and try to convert it to an UTF-8
88 * block of chars out.
89 *
90 * Returns the number of bytes written or an XML_ENC_ERR code.
91 *
92 * The value of @inlen after return is the number of octets consumed
93 * if the return value is positive, else unpredictable.
94 * The value of @outlen after return is the number of octets produced.
95 */
96 static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)97 asciiToUTF8(unsigned char* out, int *outlen,
98 const unsigned char* in, int *inlen) {
99 unsigned char* outstart = out;
100 const unsigned char* base = in;
101 const unsigned char* processed = in;
102 unsigned char* outend = out + *outlen;
103 const unsigned char* inend;
104 unsigned int c;
105
106 inend = in + (*inlen);
107 while ((in < inend) && (out - outstart + 5 < *outlen)) {
108 c= *in++;
109
110 if (out >= outend)
111 break;
112 if (c < 0x80) {
113 *out++ = c;
114 } else {
115 *outlen = out - outstart;
116 *inlen = processed - base;
117 return(XML_ENC_ERR_INPUT);
118 }
119
120 processed = (const unsigned char*) in;
121 }
122 *outlen = out - outstart;
123 *inlen = processed - base;
124 return(*outlen);
125 }
126
127 #ifdef LIBXML_OUTPUT_ENABLED
128 /**
129 * UTF8Toascii:
130 * @out: a pointer to an array of bytes to store the result
131 * @outlen: the length of @out
132 * @in: a pointer to an array of UTF-8 chars
133 * @inlen: the length of @in
134 *
135 * Take a block of UTF-8 chars in and try to convert it to an ASCII
136 * block of chars out.
137 *
138 * Returns the number of bytes written or an XML_ENC_ERR code.
139 *
140 * The value of @inlen after return is the number of octets consumed
141 * if the return value is positive, else unpredictable.
142 * The value of @outlen after return is the number of octets produced.
143 */
144 static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)145 UTF8Toascii(unsigned char* out, int *outlen,
146 const unsigned char* in, int *inlen) {
147 const unsigned char* processed = in;
148 const unsigned char* outend;
149 const unsigned char* outstart = out;
150 const unsigned char* instart = in;
151 const unsigned char* inend;
152 unsigned int c, d;
153 int trailing;
154
155 if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
156 return(XML_ENC_ERR_INTERNAL);
157 if (in == NULL) {
158 /*
159 * initialization nothing to do
160 */
161 *outlen = 0;
162 *inlen = 0;
163 return(0);
164 }
165 inend = in + (*inlen);
166 outend = out + (*outlen);
167 while (in < inend) {
168 d = *in++;
169 if (d < 0x80) { c= d; trailing= 0; }
170 else if (d < 0xC0) {
171 /* trailing byte in leading position */
172 *outlen = out - outstart;
173 *inlen = processed - instart;
174 return(XML_ENC_ERR_INPUT);
175 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
176 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
177 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
178 else {
179 /* no chance for this in Ascii */
180 *outlen = out - outstart;
181 *inlen = processed - instart;
182 return(XML_ENC_ERR_INPUT);
183 }
184
185 if (inend - in < trailing) {
186 break;
187 }
188
189 for ( ; trailing; trailing--) {
190 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
191 break;
192 c <<= 6;
193 c |= d & 0x3F;
194 }
195
196 /* assertion: c is a single UTF-4 value */
197 if (c < 0x80) {
198 if (out >= outend)
199 break;
200 *out++ = c;
201 } else {
202 /* no chance for this in Ascii */
203 *outlen = out - outstart;
204 *inlen = processed - instart;
205 return(XML_ENC_ERR_INPUT);
206 }
207 processed = in;
208 }
209 *outlen = out - outstart;
210 *inlen = processed - instart;
211 return(*outlen);
212 }
213 #endif /* LIBXML_OUTPUT_ENABLED */
214
215 /**
216 * isolat1ToUTF8:
217 * @out: a pointer to an array of bytes to store the result
218 * @outlen: the length of @out
219 * @in: a pointer to an array of ISO Latin 1 chars
220 * @inlen: the length of @in
221 *
222 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
223 * block of chars out.
224 *
225 * Returns the number of bytes written or an XML_ENC_ERR code.
226 *
227 * The value of @inlen after return is the number of octets consumed
228 * if the return value is positive, else unpredictable.
229 * The value of @outlen after return is the number of octets produced.
230 */
231 int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)232 isolat1ToUTF8(unsigned char* out, int *outlen,
233 const unsigned char* in, int *inlen) {
234 unsigned char* outstart = out;
235 const unsigned char* base = in;
236 unsigned char* outend;
237 const unsigned char* inend;
238 const unsigned char* instop;
239
240 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
241 return(XML_ENC_ERR_INTERNAL);
242
243 outend = out + *outlen;
244 inend = in + (*inlen);
245 instop = inend;
246
247 while ((in < inend) && (out < outend - 1)) {
248 if (*in >= 0x80) {
249 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
250 *out++ = ((*in) & 0x3F) | 0x80;
251 ++in;
252 }
253 if ((instop - in) > (outend - out)) instop = in + (outend - out);
254 while ((in < instop) && (*in < 0x80)) {
255 *out++ = *in++;
256 }
257 }
258 if ((in < inend) && (out < outend) && (*in < 0x80)) {
259 *out++ = *in++;
260 }
261 *outlen = out - outstart;
262 *inlen = in - base;
263 return(*outlen);
264 }
265
266 /**
267 * UTF8ToUTF8:
268 * @out: a pointer to an array of bytes to store the result
269 * @outlen: the length of @out
270 * @inb: a pointer to an array of UTF-8 chars
271 * @inlenb: the length of @in in UTF-8 chars
272 *
273 * No op copy operation for UTF8 handling.
274 *
275 * Returns the number of bytes written or an XML_ENC_ERR code.
276 *
277 * The value of *inlen after return is the number of octets consumed
278 * if the return value is positive, else unpredictable.
279 */
280 static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)281 UTF8ToUTF8(unsigned char* out, int *outlen,
282 const unsigned char* inb, int *inlenb)
283 {
284 int len;
285
286 if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
287 return(XML_ENC_ERR_INTERNAL);
288 if (inb == NULL) {
289 /* inb == NULL means output is initialized. */
290 *outlen = 0;
291 *inlenb = 0;
292 return(0);
293 }
294 if (*outlen > *inlenb) {
295 len = *inlenb;
296 } else {
297 len = *outlen;
298 }
299 if (len < 0)
300 return(XML_ENC_ERR_INTERNAL);
301
302 /*
303 * FIXME: Conversion functions must assure valid UTF-8, so we have
304 * to check for UTF-8 validity. Preferably, this converter shouldn't
305 * be used at all.
306 */
307 memcpy(out, inb, len);
308
309 *outlen = len;
310 *inlenb = len;
311 return(*outlen);
312 }
313
314
315 #ifdef LIBXML_OUTPUT_ENABLED
316 /**
317 * UTF8Toisolat1:
318 * @out: a pointer to an array of bytes to store the result
319 * @outlen: the length of @out
320 * @in: a pointer to an array of UTF-8 chars
321 * @inlen: the length of @in
322 *
323 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
324 * block of chars out.
325 *
326 * Returns the number of bytes written or an XML_ENC_ERR code.
327 *
328 * The value of @inlen after return is the number of octets consumed
329 * if the return value is positive, else unpredictable.
330 * The value of @outlen after return is the number of octets produced.
331 */
332 int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)333 UTF8Toisolat1(unsigned char* out, int *outlen,
334 const unsigned char* in, int *inlen) {
335 const unsigned char* processed = in;
336 const unsigned char* outend;
337 const unsigned char* outstart = out;
338 const unsigned char* instart = in;
339 const unsigned char* inend;
340 unsigned int c, d;
341 int trailing;
342
343 if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
344 return(XML_ENC_ERR_INTERNAL);
345 if (in == NULL) {
346 /*
347 * initialization nothing to do
348 */
349 *outlen = 0;
350 *inlen = 0;
351 return(0);
352 }
353 inend = in + (*inlen);
354 outend = out + (*outlen);
355 while (in < inend) {
356 d = *in++;
357 if (d < 0x80) { c= d; trailing= 0; }
358 else if (d < 0xC0) {
359 /* trailing byte in leading position */
360 *outlen = out - outstart;
361 *inlen = processed - instart;
362 return(XML_ENC_ERR_INPUT);
363 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
364 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
365 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
366 else {
367 /* no chance for this in IsoLat1 */
368 *outlen = out - outstart;
369 *inlen = processed - instart;
370 return(XML_ENC_ERR_INPUT);
371 }
372
373 if (inend - in < trailing) {
374 break;
375 }
376
377 for ( ; trailing; trailing--) {
378 if (in >= inend)
379 break;
380 if (((d= *in++) & 0xC0) != 0x80) {
381 *outlen = out - outstart;
382 *inlen = processed - instart;
383 return(XML_ENC_ERR_INPUT);
384 }
385 c <<= 6;
386 c |= d & 0x3F;
387 }
388
389 /* assertion: c is a single UTF-4 value */
390 if (c <= 0xFF) {
391 if (out >= outend)
392 break;
393 *out++ = c;
394 } else {
395 /* no chance for this in IsoLat1 */
396 *outlen = out - outstart;
397 *inlen = processed - instart;
398 return(XML_ENC_ERR_INPUT);
399 }
400 processed = in;
401 }
402 *outlen = out - outstart;
403 *inlen = processed - instart;
404 return(*outlen);
405 }
406 #endif /* LIBXML_OUTPUT_ENABLED */
407
408 /**
409 * UTF16LEToUTF8:
410 * @out: a pointer to an array of bytes to store the result
411 * @outlen: the length of @out
412 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
413 * @inlenb: the length of @in in UTF-16LE chars
414 *
415 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
416 * block of chars out. This function assumes the endian property
417 * is the same between the native type of this machine and the
418 * inputed one.
419 *
420 * Returns the number of bytes written or an XML_ENC_ERR code.
421 *
422 * The value of *inlen after return is the number of octets consumed
423 * if the return value is positive, else unpredictable.
424 */
425 static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)426 UTF16LEToUTF8(unsigned char* out, int *outlen,
427 const unsigned char* inb, int *inlenb)
428 {
429 unsigned char* outstart = out;
430 const unsigned char* processed = inb;
431 unsigned char* outend;
432 unsigned short* in = (unsigned short *) (void *) inb;
433 unsigned short* inend;
434 unsigned int c, d, inlen;
435 unsigned char *tmp;
436 int bits;
437
438 if (*outlen == 0) {
439 *inlenb = 0;
440 return(0);
441 }
442 outend = out + *outlen;
443 if ((*inlenb % 2) == 1)
444 (*inlenb)--;
445 inlen = *inlenb / 2;
446 inend = in + inlen;
447 while ((in < inend) && (out - outstart + 5 < *outlen)) {
448 if (xmlLittleEndian) {
449 c= *in++;
450 } else {
451 tmp = (unsigned char *) in;
452 c = *tmp++;
453 c = c | (*tmp << 8);
454 in++;
455 }
456 if ((c & 0xFC00) == 0xD800) { /* surrogates */
457 if (in >= inend) { /* handle split mutli-byte characters */
458 break;
459 }
460 if (xmlLittleEndian) {
461 d = *in++;
462 } else {
463 tmp = (unsigned char *) in;
464 d = *tmp++;
465 d = d | (*tmp << 8);
466 in++;
467 }
468 if ((d & 0xFC00) == 0xDC00) {
469 c &= 0x03FF;
470 c <<= 10;
471 c |= d & 0x03FF;
472 c += 0x10000;
473 }
474 else {
475 *outlen = out - outstart;
476 *inlenb = processed - inb;
477 return(XML_ENC_ERR_INPUT);
478 }
479 }
480
481 /* assertion: c is a single UTF-4 value */
482 if (out >= outend)
483 break;
484 if (c < 0x80) { *out++= c; bits= -6; }
485 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
486 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
487 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
488
489 for ( ; bits >= 0; bits-= 6) {
490 if (out >= outend)
491 break;
492 *out++= ((c >> bits) & 0x3F) | 0x80;
493 }
494 processed = (const unsigned char*) in;
495 }
496 *outlen = out - outstart;
497 *inlenb = processed - inb;
498 return(*outlen);
499 }
500
501 #ifdef LIBXML_OUTPUT_ENABLED
502 /**
503 * UTF8ToUTF16LE:
504 * @outb: a pointer to an array of bytes to store the result
505 * @outlen: the length of @outb
506 * @in: a pointer to an array of UTF-8 chars
507 * @inlen: the length of @in
508 *
509 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
510 * block of chars out.
511 *
512 * Returns the number of bytes written or an XML_ENC_ERR code.
513 */
514 static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)515 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
516 const unsigned char* in, int *inlen)
517 {
518 unsigned short* out = (unsigned short *) (void *) outb;
519 const unsigned char* processed = in;
520 const unsigned char *const instart = in;
521 unsigned short* outstart= out;
522 unsigned short* outend;
523 const unsigned char* inend;
524 unsigned int c, d;
525 int trailing;
526 unsigned char *tmp;
527 unsigned short tmp1, tmp2;
528
529 /* UTF16LE encoding has no BOM */
530 if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
531 return(XML_ENC_ERR_INTERNAL);
532 if (in == NULL) {
533 *outlen = 0;
534 *inlen = 0;
535 return(0);
536 }
537 inend= in + *inlen;
538 outend = out + (*outlen / 2);
539 while (in < inend) {
540 d= *in++;
541 if (d < 0x80) { c= d; trailing= 0; }
542 else if (d < 0xC0) {
543 /* trailing byte in leading position */
544 *outlen = (out - outstart) * 2;
545 *inlen = processed - instart;
546 return(XML_ENC_ERR_INPUT);
547 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
548 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
549 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
550 else {
551 /* no chance for this in UTF-16 */
552 *outlen = (out - outstart) * 2;
553 *inlen = processed - instart;
554 return(XML_ENC_ERR_INPUT);
555 }
556
557 if (inend - in < trailing) {
558 break;
559 }
560
561 for ( ; trailing; trailing--) {
562 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
563 break;
564 c <<= 6;
565 c |= d & 0x3F;
566 }
567
568 /* assertion: c is a single UTF-4 value */
569 if (c < 0x10000) {
570 if (out >= outend)
571 break;
572 if (xmlLittleEndian) {
573 *out++ = c;
574 } else {
575 tmp = (unsigned char *) out;
576 *tmp = (unsigned char) c; /* Explicit truncation */
577 *(tmp + 1) = c >> 8 ;
578 out++;
579 }
580 }
581 else if (c < 0x110000) {
582 if (out+1 >= outend)
583 break;
584 c -= 0x10000;
585 if (xmlLittleEndian) {
586 *out++ = 0xD800 | (c >> 10);
587 *out++ = 0xDC00 | (c & 0x03FF);
588 } else {
589 tmp1 = 0xD800 | (c >> 10);
590 tmp = (unsigned char *) out;
591 *tmp = (unsigned char) tmp1; /* Explicit truncation */
592 *(tmp + 1) = tmp1 >> 8;
593 out++;
594
595 tmp2 = 0xDC00 | (c & 0x03FF);
596 tmp = (unsigned char *) out;
597 *tmp = (unsigned char) tmp2; /* Explicit truncation */
598 *(tmp + 1) = tmp2 >> 8;
599 out++;
600 }
601 }
602 else
603 break;
604 processed = in;
605 }
606 *outlen = (out - outstart) * 2;
607 *inlen = processed - instart;
608 return(*outlen);
609 }
610
611 /**
612 * UTF8ToUTF16:
613 * @outb: a pointer to an array of bytes to store the result
614 * @outlen: the length of @outb
615 * @in: a pointer to an array of UTF-8 chars
616 * @inlen: the length of @in
617 *
618 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
619 * block of chars out.
620 *
621 * Returns the number of bytes written or an XML_ENC_ERR code.
622 */
623 static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)624 UTF8ToUTF16(unsigned char* outb, int *outlen,
625 const unsigned char* in, int *inlen)
626 {
627 if (in == NULL) {
628 /*
629 * initialization, add the Byte Order Mark for UTF-16LE
630 */
631 if (*outlen >= 2) {
632 outb[0] = 0xFF;
633 outb[1] = 0xFE;
634 *outlen = 2;
635 *inlen = 0;
636 return(2);
637 }
638 *outlen = 0;
639 *inlen = 0;
640 return(0);
641 }
642 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
643 }
644 #endif /* LIBXML_OUTPUT_ENABLED */
645
646 /**
647 * UTF16BEToUTF8:
648 * @out: a pointer to an array of bytes to store the result
649 * @outlen: the length of @out
650 * @inb: a pointer to an array of UTF-16 passed as a byte array
651 * @inlenb: the length of @in in UTF-16 chars
652 *
653 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
654 * block of chars out. This function assumes the endian property
655 * is the same between the native type of this machine and the
656 * inputed one.
657 *
658 * Returns the number of bytes written or an XML_ENC_ERR code.
659 *
660 * The value of *inlen after return is the number of octets consumed
661 * if the return value is positive, else unpredictable.
662 */
663 static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)664 UTF16BEToUTF8(unsigned char* out, int *outlen,
665 const unsigned char* inb, int *inlenb)
666 {
667 unsigned char* outstart = out;
668 const unsigned char* processed = inb;
669 unsigned char* outend;
670 unsigned short* in = (unsigned short *) (void *) inb;
671 unsigned short* inend;
672 unsigned int c, d, inlen;
673 unsigned char *tmp;
674 int bits;
675
676 if (*outlen == 0) {
677 *inlenb = 0;
678 return(0);
679 }
680 outend = out + *outlen;
681 if ((*inlenb % 2) == 1)
682 (*inlenb)--;
683 inlen = *inlenb / 2;
684 inend= in + inlen;
685 while ((in < inend) && (out - outstart + 5 < *outlen)) {
686 if (xmlLittleEndian) {
687 tmp = (unsigned char *) in;
688 c = *tmp++;
689 c = (c << 8) | *tmp;
690 in++;
691 } else {
692 c= *in++;
693 }
694 if ((c & 0xFC00) == 0xD800) { /* surrogates */
695 if (in >= inend) { /* handle split mutli-byte characters */
696 break;
697 }
698 if (xmlLittleEndian) {
699 tmp = (unsigned char *) in;
700 d = *tmp++;
701 d = (d << 8) | *tmp;
702 in++;
703 } else {
704 d= *in++;
705 }
706 if ((d & 0xFC00) == 0xDC00) {
707 c &= 0x03FF;
708 c <<= 10;
709 c |= d & 0x03FF;
710 c += 0x10000;
711 }
712 else {
713 *outlen = out - outstart;
714 *inlenb = processed - inb;
715 return(XML_ENC_ERR_INPUT);
716 }
717 }
718
719 /* assertion: c is a single UTF-4 value */
720 if (out >= outend)
721 break;
722 if (c < 0x80) { *out++= c; bits= -6; }
723 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
724 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
725 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
726
727 for ( ; bits >= 0; bits-= 6) {
728 if (out >= outend)
729 break;
730 *out++= ((c >> bits) & 0x3F) | 0x80;
731 }
732 processed = (const unsigned char*) in;
733 }
734 *outlen = out - outstart;
735 *inlenb = processed - inb;
736 return(*outlen);
737 }
738
739 #ifdef LIBXML_OUTPUT_ENABLED
740 /**
741 * UTF8ToUTF16BE:
742 * @outb: a pointer to an array of bytes to store the result
743 * @outlen: the length of @outb
744 * @in: a pointer to an array of UTF-8 chars
745 * @inlen: the length of @in
746 *
747 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
748 * block of chars out.
749 *
750 * Returns the number of bytes written or an XML_ENC_ERR code.
751 */
752 static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)753 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
754 const unsigned char* in, int *inlen)
755 {
756 unsigned short* out = (unsigned short *) (void *) outb;
757 const unsigned char* processed = in;
758 const unsigned char *const instart = in;
759 unsigned short* outstart= out;
760 unsigned short* outend;
761 const unsigned char* inend;
762 unsigned int c, d;
763 int trailing;
764 unsigned char *tmp;
765 unsigned short tmp1, tmp2;
766
767 /* UTF-16BE has no BOM */
768 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL))
769 return(XML_ENC_ERR_INTERNAL);
770 if (in == NULL) {
771 *outlen = 0;
772 *inlen = 0;
773 return(0);
774 }
775 inend= in + *inlen;
776 outend = out + (*outlen / 2);
777 while (in < inend) {
778 d= *in++;
779 if (d < 0x80) { c= d; trailing= 0; }
780 else if (d < 0xC0) {
781 /* trailing byte in leading position */
782 *outlen = out - outstart;
783 *inlen = processed - instart;
784 return(XML_ENC_ERR_INPUT);
785 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
786 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
787 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
788 else {
789 /* no chance for this in UTF-16 */
790 *outlen = out - outstart;
791 *inlen = processed - instart;
792 return(XML_ENC_ERR_INPUT);
793 }
794
795 if (inend - in < trailing) {
796 break;
797 }
798
799 for ( ; trailing; trailing--) {
800 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
801 c <<= 6;
802 c |= d & 0x3F;
803 }
804
805 /* assertion: c is a single UTF-4 value */
806 if (c < 0x10000) {
807 if (out >= outend) break;
808 if (xmlLittleEndian) {
809 tmp = (unsigned char *) out;
810 *tmp = c >> 8;
811 *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
812 out++;
813 } else {
814 *out++ = c;
815 }
816 }
817 else if (c < 0x110000) {
818 if (out+1 >= outend) break;
819 c -= 0x10000;
820 if (xmlLittleEndian) {
821 tmp1 = 0xD800 | (c >> 10);
822 tmp = (unsigned char *) out;
823 *tmp = tmp1 >> 8;
824 *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
825 out++;
826
827 tmp2 = 0xDC00 | (c & 0x03FF);
828 tmp = (unsigned char *) out;
829 *tmp = tmp2 >> 8;
830 *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
831 out++;
832 } else {
833 *out++ = 0xD800 | (c >> 10);
834 *out++ = 0xDC00 | (c & 0x03FF);
835 }
836 }
837 else
838 break;
839 processed = in;
840 }
841 *outlen = (out - outstart) * 2;
842 *inlen = processed - instart;
843 return(*outlen);
844 }
845 #endif /* LIBXML_OUTPUT_ENABLED */
846
847 /************************************************************************
848 * *
849 * Generic encoding handling routines *
850 * *
851 ************************************************************************/
852
853 /**
854 * xmlDetectCharEncoding:
855 * @in: a pointer to the first bytes of the XML entity, must be at least
856 * 2 bytes long (at least 4 if encoding is UTF4 variant).
857 * @len: pointer to the length of the buffer
858 *
859 * Guess the encoding of the entity using the first bytes of the entity content
860 * according to the non-normative appendix F of the XML-1.0 recommendation.
861 *
862 * Returns one of the XML_CHAR_ENCODING_... values.
863 */
864 xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)865 xmlDetectCharEncoding(const unsigned char* in, int len)
866 {
867 if (in == NULL)
868 return(XML_CHAR_ENCODING_NONE);
869 if (len >= 4) {
870 if ((in[0] == 0x00) && (in[1] == 0x00) &&
871 (in[2] == 0x00) && (in[3] == 0x3C))
872 return(XML_CHAR_ENCODING_UCS4BE);
873 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
874 (in[2] == 0x00) && (in[3] == 0x00))
875 return(XML_CHAR_ENCODING_UCS4LE);
876 if ((in[0] == 0x00) && (in[1] == 0x00) &&
877 (in[2] == 0x3C) && (in[3] == 0x00))
878 return(XML_CHAR_ENCODING_UCS4_2143);
879 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
880 (in[2] == 0x00) && (in[3] == 0x00))
881 return(XML_CHAR_ENCODING_UCS4_3412);
882 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
883 (in[2] == 0xA7) && (in[3] == 0x94))
884 return(XML_CHAR_ENCODING_EBCDIC);
885 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
886 (in[2] == 0x78) && (in[3] == 0x6D))
887 return(XML_CHAR_ENCODING_UTF8);
888 /*
889 * Although not part of the recommendation, we also
890 * attempt an "auto-recognition" of UTF-16LE and
891 * UTF-16BE encodings.
892 */
893 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
894 (in[2] == 0x3F) && (in[3] == 0x00))
895 return(XML_CHAR_ENCODING_UTF16LE);
896 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
897 (in[2] == 0x00) && (in[3] == 0x3F))
898 return(XML_CHAR_ENCODING_UTF16BE);
899 }
900 if (len >= 3) {
901 /*
902 * Errata on XML-1.0 June 20 2001
903 * We now allow an UTF8 encoded BOM
904 */
905 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
906 (in[2] == 0xBF))
907 return(XML_CHAR_ENCODING_UTF8);
908 }
909 /* For UTF-16 we can recognize by the BOM */
910 if (len >= 2) {
911 if ((in[0] == 0xFE) && (in[1] == 0xFF))
912 return(XML_CHAR_ENCODING_UTF16BE);
913 if ((in[0] == 0xFF) && (in[1] == 0xFE))
914 return(XML_CHAR_ENCODING_UTF16LE);
915 }
916 return(XML_CHAR_ENCODING_NONE);
917 }
918
919 /**
920 * xmlCleanupEncodingAliases:
921 *
922 * Unregisters all aliases
923 */
924 void
xmlCleanupEncodingAliases(void)925 xmlCleanupEncodingAliases(void) {
926 int i;
927
928 if (xmlCharEncodingAliases == NULL)
929 return;
930
931 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
932 if (xmlCharEncodingAliases[i].name != NULL)
933 xmlFree((char *) xmlCharEncodingAliases[i].name);
934 if (xmlCharEncodingAliases[i].alias != NULL)
935 xmlFree((char *) xmlCharEncodingAliases[i].alias);
936 }
937 xmlCharEncodingAliasesNb = 0;
938 xmlCharEncodingAliasesMax = 0;
939 xmlFree(xmlCharEncodingAliases);
940 xmlCharEncodingAliases = NULL;
941 }
942
943 /**
944 * xmlGetEncodingAlias:
945 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
946 *
947 * Lookup an encoding name for the given alias.
948 *
949 * Returns NULL if not found, otherwise the original name
950 */
951 const char *
xmlGetEncodingAlias(const char * alias)952 xmlGetEncodingAlias(const char *alias) {
953 int i;
954 char upper[100];
955
956 if (alias == NULL)
957 return(NULL);
958
959 if (xmlCharEncodingAliases == NULL)
960 return(NULL);
961
962 for (i = 0;i < 99;i++) {
963 upper[i] = (char) toupper((unsigned char) alias[i]);
964 if (upper[i] == 0) break;
965 }
966 upper[i] = 0;
967
968 /*
969 * Walk down the list looking for a definition of the alias
970 */
971 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
972 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
973 return(xmlCharEncodingAliases[i].name);
974 }
975 }
976 return(NULL);
977 }
978
979 /**
980 * xmlAddEncodingAlias:
981 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
982 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
983 *
984 * Registers an alias @alias for an encoding named @name. Existing alias
985 * will be overwritten.
986 *
987 * Returns 0 in case of success, -1 in case of error
988 */
989 int
xmlAddEncodingAlias(const char * name,const char * alias)990 xmlAddEncodingAlias(const char *name, const char *alias) {
991 int i;
992 char upper[100];
993 char *nameCopy, *aliasCopy;
994
995 if ((name == NULL) || (alias == NULL))
996 return(-1);
997
998 for (i = 0;i < 99;i++) {
999 upper[i] = (char) toupper((unsigned char) alias[i]);
1000 if (upper[i] == 0) break;
1001 }
1002 upper[i] = 0;
1003
1004 if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1005 xmlCharEncodingAliasPtr tmp;
1006 size_t newSize = xmlCharEncodingAliasesMax ?
1007 xmlCharEncodingAliasesMax * 2 :
1008 20;
1009
1010 tmp = (xmlCharEncodingAliasPtr)
1011 xmlRealloc(xmlCharEncodingAliases,
1012 newSize * sizeof(xmlCharEncodingAlias));
1013 if (tmp == NULL)
1014 return(-1);
1015 xmlCharEncodingAliases = tmp;
1016 xmlCharEncodingAliasesMax = newSize;
1017 }
1018
1019 /*
1020 * Walk down the list looking for a definition of the alias
1021 */
1022 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1023 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1024 /*
1025 * Replace the definition.
1026 */
1027 nameCopy = xmlMemStrdup(name);
1028 if (nameCopy == NULL)
1029 return(-1);
1030 xmlFree((char *) xmlCharEncodingAliases[i].name);
1031 xmlCharEncodingAliases[i].name = nameCopy;
1032 return(0);
1033 }
1034 }
1035 /*
1036 * Add the definition
1037 */
1038 nameCopy = xmlMemStrdup(name);
1039 if (nameCopy == NULL)
1040 return(-1);
1041 aliasCopy = xmlMemStrdup(upper);
1042 if (aliasCopy == NULL) {
1043 xmlFree(nameCopy);
1044 return(-1);
1045 }
1046 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = nameCopy;
1047 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = aliasCopy;
1048 xmlCharEncodingAliasesNb++;
1049 return(0);
1050 }
1051
1052 /**
1053 * xmlDelEncodingAlias:
1054 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1055 *
1056 * Unregisters an encoding alias @alias
1057 *
1058 * Returns 0 in case of success, -1 in case of error
1059 */
1060 int
xmlDelEncodingAlias(const char * alias)1061 xmlDelEncodingAlias(const char *alias) {
1062 int i;
1063
1064 if (alias == NULL)
1065 return(-1);
1066
1067 if (xmlCharEncodingAliases == NULL)
1068 return(-1);
1069 /*
1070 * Walk down the list looking for a definition of the alias
1071 */
1072 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1073 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1074 xmlFree((char *) xmlCharEncodingAliases[i].name);
1075 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1076 xmlCharEncodingAliasesNb--;
1077 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1078 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1079 return(0);
1080 }
1081 }
1082 return(-1);
1083 }
1084
1085 /**
1086 * xmlParseCharEncoding:
1087 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1088 *
1089 * Compare the string to the encoding schemes already known. Note
1090 * that the comparison is case insensitive accordingly to the section
1091 * [XML] 4.3.3 Character Encoding in Entities.
1092 *
1093 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1094 * if not recognized.
1095 */
1096 xmlCharEncoding
xmlParseCharEncoding(const char * name)1097 xmlParseCharEncoding(const char* name)
1098 {
1099 const char *alias;
1100 char upper[500];
1101 int i;
1102
1103 if (name == NULL)
1104 return(XML_CHAR_ENCODING_NONE);
1105
1106 /*
1107 * Do the alias resolution
1108 */
1109 alias = xmlGetEncodingAlias(name);
1110 if (alias != NULL)
1111 name = alias;
1112
1113 for (i = 0;i < 499;i++) {
1114 upper[i] = (char) toupper((unsigned char) name[i]);
1115 if (upper[i] == 0) break;
1116 }
1117 upper[i] = 0;
1118
1119 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1120 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1121 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1122
1123 /*
1124 * NOTE: if we were able to parse this, the endianness of UTF16 is
1125 * already found and in use
1126 */
1127 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1128 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1129
1130 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1131 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1132 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1133
1134 /*
1135 * NOTE: if we were able to parse this, the endianness of UCS4 is
1136 * already found and in use
1137 */
1138 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1139 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1140 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1141
1142
1143 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1144 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1145 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1146
1147 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1148 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1149 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1150
1151 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1152 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1153 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1154 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1155 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1156 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1157 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1158
1159 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1160 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1161 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1162
1163 return(XML_CHAR_ENCODING_ERROR);
1164 }
1165
1166 /**
1167 * xmlGetCharEncodingName:
1168 * @enc: the encoding
1169 *
1170 * The "canonical" name for XML encoding.
1171 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1172 * Section 4.3.3 Character Encoding in Entities
1173 *
1174 * Returns the canonical name for the given encoding
1175 */
1176
1177 const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1178 xmlGetCharEncodingName(xmlCharEncoding enc) {
1179 switch (enc) {
1180 case XML_CHAR_ENCODING_ERROR:
1181 return(NULL);
1182 case XML_CHAR_ENCODING_NONE:
1183 return(NULL);
1184 case XML_CHAR_ENCODING_UTF8:
1185 return("UTF-8");
1186 case XML_CHAR_ENCODING_UTF16LE:
1187 return("UTF-16");
1188 case XML_CHAR_ENCODING_UTF16BE:
1189 return("UTF-16");
1190 case XML_CHAR_ENCODING_EBCDIC:
1191 return("EBCDIC");
1192 case XML_CHAR_ENCODING_UCS4LE:
1193 return("ISO-10646-UCS-4");
1194 case XML_CHAR_ENCODING_UCS4BE:
1195 return("ISO-10646-UCS-4");
1196 case XML_CHAR_ENCODING_UCS4_2143:
1197 return("ISO-10646-UCS-4");
1198 case XML_CHAR_ENCODING_UCS4_3412:
1199 return("ISO-10646-UCS-4");
1200 case XML_CHAR_ENCODING_UCS2:
1201 return("ISO-10646-UCS-2");
1202 case XML_CHAR_ENCODING_8859_1:
1203 return("ISO-8859-1");
1204 case XML_CHAR_ENCODING_8859_2:
1205 return("ISO-8859-2");
1206 case XML_CHAR_ENCODING_8859_3:
1207 return("ISO-8859-3");
1208 case XML_CHAR_ENCODING_8859_4:
1209 return("ISO-8859-4");
1210 case XML_CHAR_ENCODING_8859_5:
1211 return("ISO-8859-5");
1212 case XML_CHAR_ENCODING_8859_6:
1213 return("ISO-8859-6");
1214 case XML_CHAR_ENCODING_8859_7:
1215 return("ISO-8859-7");
1216 case XML_CHAR_ENCODING_8859_8:
1217 return("ISO-8859-8");
1218 case XML_CHAR_ENCODING_8859_9:
1219 return("ISO-8859-9");
1220 case XML_CHAR_ENCODING_2022_JP:
1221 return("ISO-2022-JP");
1222 case XML_CHAR_ENCODING_SHIFT_JIS:
1223 return("Shift-JIS");
1224 case XML_CHAR_ENCODING_EUC_JP:
1225 return("EUC-JP");
1226 case XML_CHAR_ENCODING_ASCII:
1227 return(NULL);
1228 }
1229 return(NULL);
1230 }
1231
1232 /************************************************************************
1233 * *
1234 * Char encoding handlers *
1235 * *
1236 ************************************************************************/
1237
1238 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1239 defined(LIBXML_ISO8859X_ENABLED)
1240
1241 #define DECLARE_ISO_FUNCS(n) \
1242 static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1243 const unsigned char* in, int *inlen); \
1244 static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1245 const unsigned char* in, int *inlen);
1246
1247 /** DOC_DISABLE */
1248 DECLARE_ISO_FUNCS(2)
1249 DECLARE_ISO_FUNCS(3)
1250 DECLARE_ISO_FUNCS(4)
1251 DECLARE_ISO_FUNCS(5)
1252 DECLARE_ISO_FUNCS(6)
1253 DECLARE_ISO_FUNCS(7)
1254 DECLARE_ISO_FUNCS(8)
1255 DECLARE_ISO_FUNCS(9)
1256 DECLARE_ISO_FUNCS(10)
1257 DECLARE_ISO_FUNCS(11)
1258 DECLARE_ISO_FUNCS(13)
1259 DECLARE_ISO_FUNCS(14)
1260 DECLARE_ISO_FUNCS(15)
1261 DECLARE_ISO_FUNCS(16)
1262 /** DOC_ENABLE */
1263
1264 #endif /* LIBXML_ISO8859X_ENABLED */
1265
1266 #ifdef LIBXML_ICONV_ENABLED
1267 #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1268 #else
1269 #define EMPTY_ICONV
1270 #endif
1271
1272 #ifdef LIBXML_ICU_ENABLED
1273 #define EMPTY_UCONV , NULL, NULL
1274 #else
1275 #define EMPTY_UCONV
1276 #endif
1277
1278 #define MAKE_HANDLER(name, in, out) \
1279 { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1280
1281 static const xmlCharEncodingHandler defaultHandlers[] = {
1282 MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1283 #ifdef LIBXML_OUTPUT_ENABLED
1284 ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1285 ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1286 ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1287 ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1288 ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1289 ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1290 #ifdef LIBXML_HTML_ENABLED
1291 ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1292 #endif
1293 #else
1294 ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1295 ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1296 ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1297 ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1298 ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1299 ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1300 #endif /* LIBXML_OUTPUT_ENABLED */
1301
1302 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1303 defined(LIBXML_ISO8859X_ENABLED)
1304 ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1305 ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1306 ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1307 ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1308 ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1309 ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1310 ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1311 ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1312 ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1313 ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1314 ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1315 ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1316 ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1317 ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1318 #endif
1319 };
1320
1321 #define NUM_DEFAULT_HANDLERS \
1322 (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1323
1324 static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1325 static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1326 static const xmlCharEncodingHandler *xmlLatin1Handler = &defaultHandlers[4];
1327 static const xmlCharEncodingHandler *xmlAsciiHandler = &defaultHandlers[5];
1328
1329 /* the size should be growable, but it's not a big deal ... */
1330 #define MAX_ENCODING_HANDLERS 50
1331 static xmlCharEncodingHandlerPtr *handlers = NULL;
1332 static int nbCharEncodingHandler = 0;
1333
1334 /**
1335 * xmlNewCharEncodingHandler:
1336 * @name: the encoding name, in UTF-8 format (ASCII actually)
1337 * @input: the xmlCharEncodingInputFunc to read that encoding
1338 * @output: the xmlCharEncodingOutputFunc to write that encoding
1339 *
1340 * Create and registers an xmlCharEncodingHandler.
1341 *
1342 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1343 */
1344 xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1345 xmlNewCharEncodingHandler(const char *name,
1346 xmlCharEncodingInputFunc input,
1347 xmlCharEncodingOutputFunc output) {
1348 xmlCharEncodingHandlerPtr handler;
1349 const char *alias;
1350 char upper[500];
1351 int i;
1352 char *up = NULL;
1353
1354 /*
1355 * Do the alias resolution
1356 */
1357 alias = xmlGetEncodingAlias(name);
1358 if (alias != NULL)
1359 name = alias;
1360
1361 /*
1362 * Keep only the uppercase version of the encoding.
1363 */
1364 if (name == NULL)
1365 return(NULL);
1366 for (i = 0;i < 499;i++) {
1367 upper[i] = (char) toupper((unsigned char) name[i]);
1368 if (upper[i] == 0) break;
1369 }
1370 upper[i] = 0;
1371 up = xmlMemStrdup(upper);
1372 if (up == NULL)
1373 return(NULL);
1374
1375 /*
1376 * allocate and fill-up an handler block.
1377 */
1378 handler = (xmlCharEncodingHandlerPtr)
1379 xmlMalloc(sizeof(xmlCharEncodingHandler));
1380 if (handler == NULL) {
1381 xmlFree(up);
1382 return(NULL);
1383 }
1384 memset(handler, 0, sizeof(xmlCharEncodingHandler));
1385 handler->input = input;
1386 handler->output = output;
1387 handler->name = up;
1388
1389 #ifdef LIBXML_ICONV_ENABLED
1390 handler->iconv_in = NULL;
1391 handler->iconv_out = NULL;
1392 #endif
1393 #ifdef LIBXML_ICU_ENABLED
1394 handler->uconv_in = NULL;
1395 handler->uconv_out = NULL;
1396 #endif
1397
1398 /*
1399 * registers and returns the handler.
1400 */
1401 xmlRegisterCharEncodingHandler(handler);
1402 return(handler);
1403 }
1404
1405 /**
1406 * xmlInitCharEncodingHandlers:
1407 *
1408 * DEPRECATED: Alias for xmlInitParser.
1409 */
1410 void
xmlInitCharEncodingHandlers(void)1411 xmlInitCharEncodingHandlers(void) {
1412 xmlInitParser();
1413 }
1414
1415 /**
1416 * xmlInitEncodingInternal:
1417 *
1418 * Initialize the char encoding support.
1419 */
1420 void
xmlInitEncodingInternal(void)1421 xmlInitEncodingInternal(void) {
1422 unsigned short int tst = 0x1234;
1423 unsigned char *ptr = (unsigned char *) &tst;
1424
1425 if (*ptr == 0x12) xmlLittleEndian = 0;
1426 else xmlLittleEndian = 1;
1427 }
1428
1429 /**
1430 * xmlCleanupCharEncodingHandlers:
1431 *
1432 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1433 * to free global state but see the warnings there. xmlCleanupParser
1434 * should be only called once at program exit. In most cases, you don't
1435 * have call cleanup functions at all.
1436 *
1437 * Cleanup the memory allocated for the char encoding support, it
1438 * unregisters all the encoding handlers and the aliases.
1439 */
1440 void
xmlCleanupCharEncodingHandlers(void)1441 xmlCleanupCharEncodingHandlers(void) {
1442 xmlCleanupEncodingAliases();
1443
1444 if (handlers == NULL) return;
1445
1446 for (;nbCharEncodingHandler > 0;) {
1447 nbCharEncodingHandler--;
1448 if (handlers[nbCharEncodingHandler] != NULL) {
1449 if (handlers[nbCharEncodingHandler]->name != NULL)
1450 xmlFree(handlers[nbCharEncodingHandler]->name);
1451 xmlFree(handlers[nbCharEncodingHandler]);
1452 }
1453 }
1454 xmlFree(handlers);
1455 handlers = NULL;
1456 nbCharEncodingHandler = 0;
1457 }
1458
1459 /**
1460 * xmlRegisterCharEncodingHandler:
1461 * @handler: the xmlCharEncodingHandlerPtr handler block
1462 *
1463 * Register the char encoding handler, surprising, isn't it ?
1464 */
1465 void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1466 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1467 if (handler == NULL)
1468 return;
1469 if (handlers == NULL) {
1470 handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1471 if (handlers == NULL)
1472 goto free_handler;
1473 }
1474
1475 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS)
1476 goto free_handler;
1477 handlers[nbCharEncodingHandler++] = handler;
1478 return;
1479
1480 free_handler:
1481 if (handler != NULL) {
1482 if (handler->name != NULL) {
1483 xmlFree(handler->name);
1484 }
1485 xmlFree(handler);
1486 }
1487 }
1488
1489 #ifdef LIBXML_ICONV_ENABLED
1490 static int
xmlCreateIconvHandler(const char * name,xmlCharEncodingHandler ** out)1491 xmlCreateIconvHandler(const char *name, xmlCharEncodingHandler **out) {
1492 xmlCharEncodingHandlerPtr enc = NULL;
1493 iconv_t icv_in = (iconv_t) -1;
1494 iconv_t icv_out = (iconv_t) -1;
1495 int ret;
1496
1497 *out = NULL;
1498
1499 icv_in = iconv_open("UTF-8", name);
1500 if (icv_in == (iconv_t) -1) {
1501 if (errno == EINVAL)
1502 ret = XML_ERR_UNSUPPORTED_ENCODING;
1503 else if (errno == ENOMEM)
1504 ret = XML_ERR_NO_MEMORY;
1505 else
1506 ret = XML_ERR_SYSTEM;
1507 goto error;
1508 }
1509
1510 icv_out = iconv_open(name, "UTF-8");
1511 if (icv_out == (iconv_t) -1) {
1512 if (errno == EINVAL)
1513 ret = XML_ERR_UNSUPPORTED_ENCODING;
1514 else if (errno == ENOMEM)
1515 ret = XML_ERR_NO_MEMORY;
1516 else
1517 ret = XML_ERR_SYSTEM;
1518 goto error;
1519 }
1520
1521 enc = xmlMalloc(sizeof(*enc));
1522 if (enc == NULL) {
1523 ret = XML_ERR_NO_MEMORY;
1524 goto error;
1525 }
1526 memset(enc, 0, sizeof(*enc));
1527
1528 enc->name = xmlMemStrdup(name);
1529 if (enc->name == NULL) {
1530 ret = XML_ERR_NO_MEMORY;
1531 goto error;
1532 }
1533 enc->iconv_in = icv_in;
1534 enc->iconv_out = icv_out;
1535
1536 *out = enc;
1537 return(0);
1538
1539 error:
1540 if (enc != NULL)
1541 xmlFree(enc);
1542 if (icv_in != (iconv_t) -1)
1543 iconv_close(icv_in);
1544 if (icv_out != (iconv_t) -1)
1545 iconv_close(icv_out);
1546 return(ret);
1547 }
1548 #endif /* LIBXML_ICONV_ENABLED */
1549
1550 #ifdef LIBXML_ICU_ENABLED
1551 static int
openIcuConverter(const char * name,int toUnicode,uconv_t ** out)1552 openIcuConverter(const char* name, int toUnicode, uconv_t **out)
1553 {
1554 UErrorCode status;
1555 uconv_t *conv;
1556
1557 *out = NULL;
1558
1559 conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
1560 if (conv == NULL)
1561 return(XML_ERR_NO_MEMORY);
1562
1563 conv->pivot_source = conv->pivot_buf;
1564 conv->pivot_target = conv->pivot_buf;
1565
1566 status = U_ZERO_ERROR;
1567 conv->uconv = ucnv_open(name, &status);
1568 if (U_FAILURE(status))
1569 goto error;
1570
1571 status = U_ZERO_ERROR;
1572 if (toUnicode) {
1573 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
1574 NULL, NULL, NULL, &status);
1575 }
1576 else {
1577 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
1578 NULL, NULL, NULL, &status);
1579 }
1580 if (U_FAILURE(status))
1581 goto error;
1582
1583 status = U_ZERO_ERROR;
1584 conv->utf8 = ucnv_open("UTF-8", &status);
1585 if (U_FAILURE(status))
1586 goto error;
1587
1588 *out = conv;
1589 return(0);
1590
1591 error:
1592 if (conv->uconv)
1593 ucnv_close(conv->uconv);
1594 xmlFree(conv);
1595
1596 if (status == U_FILE_ACCESS_ERROR)
1597 return(XML_ERR_UNSUPPORTED_ENCODING);
1598 if (status == U_MEMORY_ALLOCATION_ERROR)
1599 return(XML_ERR_NO_MEMORY);
1600 return(XML_ERR_SYSTEM);
1601 }
1602
1603 static void
closeIcuConverter(uconv_t * conv)1604 closeIcuConverter(uconv_t *conv)
1605 {
1606 if (conv == NULL)
1607 return;
1608 ucnv_close(conv->uconv);
1609 ucnv_close(conv->utf8);
1610 xmlFree(conv);
1611 }
1612
1613 static int
xmlCreateUconvHandler(const char * name,xmlCharEncodingHandler ** out)1614 xmlCreateUconvHandler(const char *name, xmlCharEncodingHandler **out) {
1615 xmlCharEncodingHandlerPtr enc = NULL;
1616 uconv_t *ucv_in = NULL;
1617 uconv_t *ucv_out = NULL;
1618 int ret;
1619
1620 ret = openIcuConverter(name, 1, &ucv_in);
1621 if (ret != 0)
1622 goto error;
1623 ret = openIcuConverter(name, 0, &ucv_out);
1624 if (ret != 0)
1625 goto error;
1626
1627 enc = (xmlCharEncodingHandlerPtr)
1628 xmlMalloc(sizeof(xmlCharEncodingHandler));
1629 if (enc == NULL) {
1630 ret = XML_ERR_NO_MEMORY;
1631 goto error;
1632 }
1633 memset(enc, 0, sizeof(xmlCharEncodingHandler));
1634
1635 enc->name = xmlMemStrdup(name);
1636 if (enc->name == NULL) {
1637 ret = XML_ERR_NO_MEMORY;
1638 goto error;
1639 }
1640 enc->input = NULL;
1641 enc->output = NULL;
1642 enc->uconv_in = ucv_in;
1643 enc->uconv_out = ucv_out;
1644
1645 *out = enc;
1646 return(0);
1647
1648 error:
1649 if (enc != NULL)
1650 xmlFree(enc);
1651 if (ucv_in != NULL)
1652 closeIcuConverter(ucv_in);
1653 if (ucv_out != NULL)
1654 closeIcuConverter(ucv_out);
1655 return(ret);
1656 }
1657 #endif /* LIBXML_ICU_ENABLED */
1658
1659 /**
1660 * xmlFindExtraHandler:
1661 * @name: a string describing the char encoding.
1662 * @output: boolean, use handler for output
1663 * @out: pointer to resulting handler
1664 *
1665 * Search the non-default handlers for an exact match.
1666 *
1667 * Returns 0 on success, 1 if no handler was found, -1 if a memory
1668 * allocation failed.
1669 */
1670 static int
xmlFindExtraHandler(const char * name,int output,xmlCharEncodingHandler ** out)1671 xmlFindExtraHandler(const char *name, int output,
1672 xmlCharEncodingHandler **out) {
1673 int ret;
1674 int i;
1675
1676 (void) ret;
1677
1678 if (handlers != NULL) {
1679 for (i = 0; i < nbCharEncodingHandler; i++) {
1680 xmlCharEncodingHandler *handler = handlers[i];
1681
1682 if (!xmlStrcasecmp((const xmlChar *) name,
1683 (const xmlChar *) handler->name)) {
1684 if (output) {
1685 if (handler->output != NULL) {
1686 *out = handler;
1687 return(0);
1688 }
1689 } else {
1690 if (handler->input != NULL) {
1691 *out = handler;
1692 return(0);
1693 }
1694 }
1695 }
1696 }
1697 }
1698
1699 #ifdef LIBXML_ICONV_ENABLED
1700 ret = xmlCreateIconvHandler(name, out);
1701 if (*out != NULL)
1702 return(0);
1703 if (ret != XML_ERR_UNSUPPORTED_ENCODING)
1704 return(ret);
1705 #endif /* LIBXML_ICONV_ENABLED */
1706
1707 #ifdef LIBXML_ICU_ENABLED
1708 ret = xmlCreateUconvHandler(name, out);
1709 if (*out != NULL)
1710 return(0);
1711 if (ret != XML_ERR_UNSUPPORTED_ENCODING)
1712 return(ret);
1713 #endif /* LIBXML_ICU_ENABLED */
1714
1715 return(XML_ERR_UNSUPPORTED_ENCODING);
1716 }
1717
1718 /**
1719 * xmlFindHandler:
1720 * @name: a string describing the char encoding.
1721 * @output: boolean, use handler for output
1722 * @out: pointer to resulting handler
1723 *
1724 * Search all handlers for an exact match.
1725 *
1726 * Returns 0 on success, 1 if no handler was found, -1 if a memory
1727 * allocation failed.
1728 */
1729 static int
xmlFindHandler(const char * name,int output,xmlCharEncodingHandler ** out)1730 xmlFindHandler(const char *name, int output, xmlCharEncodingHandler **out) {
1731 int i;
1732
1733 /*
1734 * Check for default handlers
1735 */
1736 for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1737 xmlCharEncodingHandler *handler;
1738
1739 handler = (xmlCharEncodingHandler *) &defaultHandlers[i];
1740
1741 if (xmlStrcasecmp((const xmlChar *) name,
1742 (const xmlChar *) handler->name) == 0) {
1743 if (output) {
1744 if (handler->output != NULL) {
1745 *out = handler;
1746 return(0);
1747 }
1748 } else {
1749 if (handler->input != NULL) {
1750 *out = handler;
1751 return(0);
1752 }
1753 }
1754 }
1755 }
1756
1757 /*
1758 * Check for other handlers
1759 */
1760 return(xmlFindExtraHandler(name, output, out));
1761 }
1762
1763 /**
1764 * xmlLookupCharEncodingHandler:
1765 * @enc: an xmlCharEncoding value.
1766 * @out: pointer to result
1767 *
1768 * Find or create a handler matching the encoding. If no default or
1769 * registered handler could be found, try to create a handler using
1770 * iconv or ICU if supported.
1771 *
1772 * The handler must be closed with xmlCharEncCloseFunc.
1773 *
1774 * Available since 2.13.0.
1775 *
1776 * Returns an xmlParserErrors error code.
1777 */
1778 int
xmlLookupCharEncodingHandler(xmlCharEncoding enc,xmlCharEncodingHandler ** out)1779 xmlLookupCharEncodingHandler(xmlCharEncoding enc,
1780 xmlCharEncodingHandler **out) {
1781 const char *name = NULL;
1782 static const char *const ebcdicNames[] = {
1783 "EBCDIC", "ebcdic", "EBCDIC-US", "IBM-037"
1784 };
1785 static const char *const ucs4Names[] = {
1786 "ISO-10646-UCS-4", "UCS-4", "UCS4"
1787 };
1788 static const char *const ucs2Names[] = {
1789 "ISO-10646-UCS-2", "UCS-2", "UCS2"
1790 };
1791 static const char *const shiftJisNames[] = {
1792 "SHIFT-JIS", "SHIFT_JIS", "Shift_JIS",
1793 };
1794 const char *const *names = NULL;
1795 int numNames = 0;
1796 int ret;
1797 int i;
1798
1799 if (out == NULL)
1800 return(XML_ERR_ARGUMENT);
1801 *out = NULL;
1802
1803 switch (enc) {
1804 case XML_CHAR_ENCODING_ERROR:
1805 return(XML_ERR_UNSUPPORTED_ENCODING);
1806 case XML_CHAR_ENCODING_NONE:
1807 return(0);
1808 case XML_CHAR_ENCODING_UTF8:
1809 return(0);
1810 case XML_CHAR_ENCODING_UTF16LE:
1811 *out = (xmlCharEncodingHandler *) xmlUTF16LEHandler;
1812 return(0);
1813 case XML_CHAR_ENCODING_UTF16BE:
1814 *out = (xmlCharEncodingHandler *) xmlUTF16BEHandler;
1815 return(0);
1816 case XML_CHAR_ENCODING_EBCDIC:
1817 names = ebcdicNames;
1818 numNames = sizeof(ebcdicNames) / sizeof(ebcdicNames[0]);
1819 break;
1820 case XML_CHAR_ENCODING_UCS4BE:
1821 case XML_CHAR_ENCODING_UCS4LE:
1822 names = ucs4Names;
1823 numNames = sizeof(ucs4Names) / sizeof(ucs4Names[0]);
1824 break;
1825 case XML_CHAR_ENCODING_UCS4_2143:
1826 break;
1827 case XML_CHAR_ENCODING_UCS4_3412:
1828 break;
1829 case XML_CHAR_ENCODING_UCS2:
1830 names = ucs2Names;
1831 numNames = sizeof(ucs2Names) / sizeof(ucs2Names[0]);
1832 break;
1833
1834 case XML_CHAR_ENCODING_ASCII:
1835 *out = (xmlCharEncodingHandler *) xmlAsciiHandler;
1836 return(0);
1837 case XML_CHAR_ENCODING_8859_1:
1838 *out = (xmlCharEncodingHandler *) xmlLatin1Handler;
1839 return(0);
1840 case XML_CHAR_ENCODING_8859_2:
1841 name = "ISO-8859-2";
1842 break;
1843 case XML_CHAR_ENCODING_8859_3:
1844 name = "ISO-8859-3";
1845 break;
1846 case XML_CHAR_ENCODING_8859_4:
1847 name = "ISO-8859-4";
1848 break;
1849 case XML_CHAR_ENCODING_8859_5:
1850 name = "ISO-8859-5";
1851 break;
1852 case XML_CHAR_ENCODING_8859_6:
1853 name = "ISO-8859-6";
1854 break;
1855 case XML_CHAR_ENCODING_8859_7:
1856 name = "ISO-8859-7";
1857 break;
1858 case XML_CHAR_ENCODING_8859_8:
1859 name = "ISO-8859-8";
1860 break;
1861 case XML_CHAR_ENCODING_8859_9:
1862 name = "ISO-8859-9";
1863 break;
1864
1865 case XML_CHAR_ENCODING_2022_JP:
1866 name = "ISO-2022-JP";
1867 break;
1868 case XML_CHAR_ENCODING_SHIFT_JIS:
1869 names = shiftJisNames;
1870 numNames = sizeof(shiftJisNames) / sizeof(shiftJisNames[0]);
1871 break;
1872 case XML_CHAR_ENCODING_EUC_JP:
1873 name = "EUC-JP";
1874 break;
1875 default:
1876 break;
1877 }
1878
1879 if (name != NULL)
1880 return(xmlFindExtraHandler(name, 0, out));
1881
1882 if (names != NULL) {
1883 for (i = 0; i < numNames; i++) {
1884 ret = xmlFindExtraHandler(names[i], 0, out);
1885 if (*out != NULL)
1886 return(0);
1887 if (ret != XML_ERR_UNSUPPORTED_ENCODING)
1888 return(ret);
1889 }
1890 }
1891
1892 return(XML_ERR_UNSUPPORTED_ENCODING);
1893 }
1894
1895 /**
1896 * xmlGetCharEncodingHandler:
1897 * @enc: an xmlCharEncoding value.
1898 *
1899 * DEPRECATED: Use xmlLookupCharEncodingHandler which has better error
1900 * reporting.
1901 *
1902 * Returns the handler or NULL if no handler was found or an error
1903 * occurred.
1904 */
1905 xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1906 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1907 xmlCharEncodingHandler *ret;
1908
1909 xmlLookupCharEncodingHandler(enc, &ret);
1910 return(ret);
1911 }
1912
1913 /**
1914 * xmlOpenCharEncodingHandler:
1915 * @name: a string describing the char encoding.
1916 * @output: boolean, use handler for output
1917 * @out: pointer to result
1918 *
1919 * Find or create a handler matching the encoding. If no default or
1920 * registered handler could be found, try to create a handler using
1921 * iconv or ICU if supported.
1922 *
1923 * The handler must be closed with xmlCharEncCloseFunc.
1924 *
1925 * Available since 2.13.0.
1926 *
1927 * Returns an xmlParserErrors error code.
1928 */
1929 int
xmlOpenCharEncodingHandler(const char * name,int output,xmlCharEncodingHandler ** out)1930 xmlOpenCharEncodingHandler(const char *name, int output,
1931 xmlCharEncodingHandler **out) {
1932 const char *nalias;
1933 const char *norig;
1934 xmlCharEncoding enc;
1935 int ret;
1936
1937 if (out == NULL)
1938 return(XML_ERR_ARGUMENT);
1939 *out = NULL;
1940
1941 if (name == NULL)
1942 return(XML_ERR_ARGUMENT);
1943
1944 /*
1945 * Do the alias resolution
1946 */
1947 norig = name;
1948 nalias = xmlGetEncodingAlias(name);
1949 if (nalias != NULL)
1950 name = nalias;
1951
1952 ret = xmlFindHandler(name, output, out);
1953 if (*out != NULL)
1954 return(0);
1955 if (ret != XML_ERR_UNSUPPORTED_ENCODING)
1956 return(ret);
1957
1958 /*
1959 * Fallback using the canonical names
1960 */
1961 enc = xmlParseCharEncoding(norig);
1962 return(xmlLookupCharEncodingHandler(enc, out));
1963 }
1964
1965 /**
1966 * xmlFindCharEncodingHandler:
1967 * @name: a string describing the char encoding.
1968 *
1969 * DEPRECATED: Use xmlOpenCharEncodingHandler which has better error
1970 * reporting.
1971 *
1972 * Returns the handler or NULL if no handler was found or an error
1973 * occurred.
1974 */
1975 xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1976 xmlFindCharEncodingHandler(const char *name) {
1977 xmlCharEncodingHandler *ret;
1978
1979 xmlOpenCharEncodingHandler(name, 0, &ret);
1980 return(ret);
1981 }
1982
1983 /************************************************************************
1984 * *
1985 * ICONV based generic conversion functions *
1986 * *
1987 ************************************************************************/
1988
1989 #ifdef LIBXML_ICONV_ENABLED
1990 /**
1991 * xmlIconvWrapper:
1992 * @cd: iconv converter data structure
1993 * @out: a pointer to an array of bytes to store the result
1994 * @outlen: the length of @out
1995 * @in: a pointer to an array of input bytes
1996 * @inlen: the length of @in
1997 *
1998 * Returns an XML_ENC_ERR code.
1999 *
2000 * The value of @inlen after return is the number of octets consumed
2001 * as the return value is positive, else unpredictable.
2002 * The value of @outlen after return is the number of octets produced.
2003 */
2004 static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)2005 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
2006 const unsigned char *in, int *inlen) {
2007 size_t icv_inlen, icv_outlen;
2008 const char *icv_in = (const char *) in;
2009 char *icv_out = (char *) out;
2010 size_t ret;
2011
2012 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
2013 if (outlen != NULL) *outlen = 0;
2014 return(XML_ENC_ERR_INTERNAL);
2015 }
2016 icv_inlen = *inlen;
2017 icv_outlen = *outlen;
2018 /*
2019 * Some versions take const, other versions take non-const input.
2020 */
2021 ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
2022 *inlen -= icv_inlen;
2023 *outlen -= icv_outlen;
2024 if (ret == (size_t) -1) {
2025 if (errno == EILSEQ)
2026 return(XML_ENC_ERR_INPUT);
2027 if (errno == E2BIG)
2028 return(XML_ENC_ERR_SPACE);
2029 if (errno == EINVAL)
2030 return(XML_ENC_ERR_PARTIAL);
2031 return(XML_ENC_ERR_INTERNAL);
2032 }
2033 return(XML_ENC_ERR_SUCCESS);
2034 }
2035 #endif /* LIBXML_ICONV_ENABLED */
2036
2037 /************************************************************************
2038 * *
2039 * ICU based generic conversion functions *
2040 * *
2041 ************************************************************************/
2042
2043 #ifdef LIBXML_ICU_ENABLED
2044 /**
2045 * xmlUconvWrapper:
2046 * @cd: ICU uconverter data structure
2047 * @toUnicode : non-zero if toUnicode. 0 otherwise.
2048 * @out: a pointer to an array of bytes to store the result
2049 * @outlen: the length of @out
2050 * @in: a pointer to an array of input bytes
2051 * @inlen: the length of @in
2052 *
2053 * Returns an XML_ENC_ERR code.
2054 *
2055 * The value of @inlen after return is the number of octets consumed
2056 * as the return value is positive, else unpredictable.
2057 * The value of @outlen after return is the number of octets produced.
2058 */
2059 static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)2060 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
2061 const unsigned char *in, int *inlen) {
2062 const char *ucv_in = (const char *) in;
2063 char *ucv_out = (char *) out;
2064 UErrorCode err = U_ZERO_ERROR;
2065
2066 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
2067 if (outlen != NULL) *outlen = 0;
2068 return(XML_ENC_ERR_INTERNAL);
2069 }
2070
2071 /*
2072 * Note that the ICU API is stateful. It can always consume a certain
2073 * amount of input even if the output buffer would overflow. The
2074 * remaining input must be processed by calling ucnv_convertEx with a
2075 * possibly empty input buffer.
2076 *
2077 * ucnv_convertEx is always called with reset and flush set to 0,
2078 * so we don't mess up the state. This should never generate
2079 * U_TRUNCATED_CHAR_FOUND errors.
2080 *
2081 * This also means that ICU xmlCharEncodingHandlers should never be
2082 * reused. It would be a lot nicer if there was a way to emulate the
2083 * stateless iconv API.
2084 */
2085 if (toUnicode) {
2086 /* encoding => UTF-16 => UTF-8 */
2087 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
2088 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
2089 &cd->pivot_source, &cd->pivot_target,
2090 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
2091 } else {
2092 /* UTF-8 => UTF-16 => encoding */
2093 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
2094 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
2095 &cd->pivot_source, &cd->pivot_target,
2096 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
2097 }
2098 *inlen = ucv_in - (const char*) in;
2099 *outlen = ucv_out - (char *) out;
2100 if (U_SUCCESS(err)) {
2101 return(XML_ENC_ERR_SUCCESS);
2102 }
2103 if (err == U_BUFFER_OVERFLOW_ERROR)
2104 return(XML_ENC_ERR_SPACE);
2105 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
2106 return(XML_ENC_ERR_INPUT);
2107 return(XML_ENC_ERR_PARTIAL);
2108 }
2109 #endif /* LIBXML_ICU_ENABLED */
2110
2111 /************************************************************************
2112 * *
2113 * The real API used by libxml for on-the-fly conversion *
2114 * *
2115 ************************************************************************/
2116
2117 /**
2118 * xmlEncConvertError:
2119 * @code: XML_ENC_ERR code
2120 *
2121 * Convert XML_ENC_ERR to libxml2 error codes.
2122 */
2123 static int
xmlEncConvertError(int code)2124 xmlEncConvertError(int code) {
2125 int ret;
2126
2127 switch (code) {
2128 case XML_ENC_ERR_SUCCESS:
2129 ret = XML_ERR_OK;
2130 break;
2131 case XML_ENC_ERR_INPUT:
2132 ret = XML_ERR_INVALID_ENCODING;
2133 break;
2134 case XML_ENC_ERR_MEMORY:
2135 ret = XML_ERR_NO_MEMORY;
2136 break;
2137 default:
2138 ret = XML_ERR_INTERNAL_ERROR;
2139 break;
2140 }
2141
2142 return(ret);
2143 }
2144
2145 /**
2146 * xmlEncInputChunk:
2147 * @handler: encoding handler
2148 * @out: a pointer to an array of bytes to store the result
2149 * @outlen: the length of @out
2150 * @in: a pointer to an array of input bytes
2151 * @inlen: the length of @in
2152 *
2153 * The value of @inlen after return is the number of octets consumed
2154 * as the return value is 0, else unpredictable.
2155 * The value of @outlen after return is the number of octets produced.
2156 *
2157 * Returns an XML_ENC_ERR code.
2158 */
2159 int
xmlEncInputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)2160 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2161 int *outlen, const unsigned char *in, int *inlen) {
2162 int ret;
2163
2164 if (handler->input != NULL) {
2165 int oldinlen = *inlen;
2166
2167 ret = handler->input(out, outlen, in, inlen);
2168 if (ret >= 0) {
2169 /*
2170 * The built-in converters don't signal XML_ENC_ERR_SPACE.
2171 */
2172 if (*inlen < oldinlen) {
2173 if (*outlen > 0)
2174 ret = XML_ENC_ERR_SPACE;
2175 else
2176 ret = XML_ENC_ERR_PARTIAL;
2177 } else {
2178 ret = XML_ENC_ERR_SUCCESS;
2179 }
2180 }
2181 }
2182 #ifdef LIBXML_ICONV_ENABLED
2183 else if (handler->iconv_in != NULL) {
2184 ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2185 }
2186 #endif /* LIBXML_ICONV_ENABLED */
2187 #ifdef LIBXML_ICU_ENABLED
2188 else if (handler->uconv_in != NULL) {
2189 ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen);
2190 }
2191 #endif /* LIBXML_ICU_ENABLED */
2192 else {
2193 *outlen = 0;
2194 *inlen = 0;
2195 ret = XML_ENC_ERR_INTERNAL;
2196 }
2197
2198 /* Ignore partial errors when reading. */
2199 if (ret == XML_ENC_ERR_PARTIAL)
2200 ret = XML_ENC_ERR_SUCCESS;
2201
2202 return(ret);
2203 }
2204
2205 /**
2206 * xmlEncOutputChunk:
2207 * @handler: encoding handler
2208 * @out: a pointer to an array of bytes to store the result
2209 * @outlen: the length of @out
2210 * @in: a pointer to an array of input bytes
2211 * @inlen: the length of @in
2212 *
2213 * Returns an XML_ENC_ERR code.
2214 *
2215 * The value of @inlen after return is the number of octets consumed
2216 * as the return value is 0, else unpredictable.
2217 * The value of @outlen after return is the number of octets produced.
2218 */
2219 static int
xmlEncOutputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)2220 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2221 int *outlen, const unsigned char *in, int *inlen) {
2222 int ret;
2223
2224 if (handler->output != NULL) {
2225 int oldinlen = *inlen;
2226
2227 ret = handler->output(out, outlen, in, inlen);
2228 if (ret >= 0) {
2229 /*
2230 * The built-in converters don't signal XML_ENC_ERR_SPACE.
2231 */
2232 if (*inlen < oldinlen) {
2233 if (*outlen > 0)
2234 ret = XML_ENC_ERR_SPACE;
2235 else
2236 ret = XML_ENC_ERR_PARTIAL;
2237 } else {
2238 ret = XML_ENC_ERR_SUCCESS;
2239 }
2240 }
2241 }
2242 #ifdef LIBXML_ICONV_ENABLED
2243 else if (handler->iconv_out != NULL) {
2244 ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2245 }
2246 #endif /* LIBXML_ICONV_ENABLED */
2247 #ifdef LIBXML_ICU_ENABLED
2248 else if (handler->uconv_out != NULL) {
2249 ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen);
2250 }
2251 #endif /* LIBXML_ICU_ENABLED */
2252 else {
2253 *outlen = 0;
2254 *inlen = 0;
2255 ret = XML_ENC_ERR_INTERNAL;
2256 }
2257
2258 /* We shouldn't generate partial sequences when writing. */
2259 if (ret == XML_ENC_ERR_PARTIAL)
2260 ret = XML_ENC_ERR_INTERNAL;
2261
2262 return(ret);
2263 }
2264
2265 /**
2266 * xmlCharEncFirstLine:
2267 * @handler: char encoding transformation data structure
2268 * @out: an xmlBuffer for the output.
2269 * @in: an xmlBuffer for the input
2270 *
2271 * DEPERECATED: Don't use.
2272 *
2273 * Returns the number of bytes written or an XML_ENC_ERR code.
2274 */
2275 int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2276 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2277 xmlBufferPtr in) {
2278 return(xmlCharEncInFunc(handler, out, in));
2279 }
2280
2281 /**
2282 * xmlCharEncInput:
2283 * @input: a parser input buffer
2284 *
2285 * Generic front-end for the encoding handler on parser input
2286 *
2287 * Returns the number of bytes written or an XML_ENC_ERR code.
2288 */
2289 int
xmlCharEncInput(xmlParserInputBufferPtr input)2290 xmlCharEncInput(xmlParserInputBufferPtr input)
2291 {
2292 int ret;
2293 size_t avail;
2294 size_t toconv;
2295 int c_in;
2296 int c_out;
2297 xmlBufPtr in;
2298 xmlBufPtr out;
2299 const xmlChar *inData;
2300 size_t inTotal = 0;
2301
2302 if ((input == NULL) || (input->encoder == NULL) ||
2303 (input->buffer == NULL) || (input->raw == NULL))
2304 return(XML_ENC_ERR_INTERNAL);
2305 out = input->buffer;
2306 in = input->raw;
2307
2308 toconv = xmlBufUse(in);
2309 if (toconv == 0)
2310 return (0);
2311 inData = xmlBufContent(in);
2312 inTotal = 0;
2313
2314 do {
2315 c_in = toconv > INT_MAX / 2 ? INT_MAX / 2 : toconv;
2316
2317 avail = xmlBufAvail(out);
2318 if (avail > INT_MAX)
2319 avail = INT_MAX;
2320 if (avail < 4096) {
2321 if (xmlBufGrow(out, 4096) < 0) {
2322 input->error = XML_ERR_NO_MEMORY;
2323 return(XML_ENC_ERR_MEMORY);
2324 }
2325 avail = xmlBufAvail(out);
2326 }
2327
2328 c_in = toconv;
2329 c_out = avail;
2330 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2331 inData, &c_in);
2332 inTotal += c_in;
2333 inData += c_in;
2334 toconv -= c_in;
2335 xmlBufAddLen(out, c_out);
2336 } while (ret == XML_ENC_ERR_SPACE);
2337
2338 xmlBufShrink(in, inTotal);
2339
2340 if (input->rawconsumed > ULONG_MAX - (unsigned long)c_in)
2341 input->rawconsumed = ULONG_MAX;
2342 else
2343 input->rawconsumed += c_in;
2344
2345 if (((ret != 0) && (c_out == 0)) ||
2346 (ret == XML_ENC_ERR_MEMORY)) {
2347 if (input->error == 0)
2348 input->error = xmlEncConvertError(ret);
2349 return(ret);
2350 }
2351
2352 return (c_out);
2353 }
2354
2355 /**
2356 * xmlCharEncInFunc:
2357 * @handler: char encoding transformation data structure
2358 * @out: an xmlBuffer for the output.
2359 * @in: an xmlBuffer for the input
2360 *
2361 * Generic front-end for the encoding handler input function
2362 *
2363 * Returns the number of bytes written or an XML_ENC_ERR code.
2364 */
2365 int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2366 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2367 xmlBufferPtr in)
2368 {
2369 int ret;
2370 int written;
2371 int toconv;
2372
2373 if (handler == NULL)
2374 return(XML_ENC_ERR_INTERNAL);
2375 if (out == NULL)
2376 return(XML_ENC_ERR_INTERNAL);
2377 if (in == NULL)
2378 return(XML_ENC_ERR_INTERNAL);
2379
2380 toconv = in->use;
2381 if (toconv == 0)
2382 return (0);
2383 written = out->size - out->use -1; /* count '\0' */
2384 if (toconv * 2 >= written) {
2385 xmlBufferGrow(out, out->size + toconv * 2);
2386 written = out->size - out->use - 1;
2387 }
2388 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2389 in->content, &toconv);
2390 xmlBufferShrink(in, toconv);
2391 out->use += written;
2392 out->content[out->use] = 0;
2393
2394 return (written? written : ret);
2395 }
2396
2397 #ifdef LIBXML_OUTPUT_ENABLED
2398 /**
2399 * xmlCharEncOutput:
2400 * @output: a parser output buffer
2401 * @init: is this an initialization call without data
2402 *
2403 * Generic front-end for the encoding handler on parser output
2404 * a first call with @init == 1 has to be made first to initiate the
2405 * output in case of non-stateless encoding needing to initiate their
2406 * state or the output (like the BOM in UTF16).
2407 * In case of UTF8 sequence conversion errors for the given encoder,
2408 * the content will be automatically remapped to a CharRef sequence.
2409 *
2410 * Returns the number of bytes written or an XML_ENC_ERR code.
2411 */
2412 int
xmlCharEncOutput(xmlOutputBufferPtr output,int init)2413 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2414 {
2415 int ret;
2416 size_t written;
2417 int writtentot = 0;
2418 size_t toconv;
2419 int c_in;
2420 int c_out;
2421 xmlBufPtr in;
2422 xmlBufPtr out;
2423
2424 if ((output == NULL) || (output->encoder == NULL) ||
2425 (output->buffer == NULL) || (output->conv == NULL))
2426 return(XML_ENC_ERR_INTERNAL);
2427 out = output->conv;
2428 in = output->buffer;
2429
2430 retry:
2431
2432 written = xmlBufAvail(out);
2433
2434 /*
2435 * First specific handling of the initialization call
2436 */
2437 if (init) {
2438 c_in = 0;
2439 c_out = written;
2440 /* TODO: Check return value. */
2441 xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2442 NULL, &c_in);
2443 xmlBufAddLen(out, c_out);
2444 return(c_out);
2445 }
2446
2447 /*
2448 * Conversion itself.
2449 */
2450 toconv = xmlBufUse(in);
2451 if (toconv > 64 * 1024)
2452 toconv = 64 * 1024;
2453 if (toconv * 4 >= written) {
2454 if (xmlBufGrow(out, toconv * 4) < 0) {
2455 ret = XML_ENC_ERR_MEMORY;
2456 goto error;
2457 }
2458 written = xmlBufAvail(out);
2459 }
2460 if (written > 256 * 1024)
2461 written = 256 * 1024;
2462
2463 c_in = toconv;
2464 c_out = written;
2465 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2466 xmlBufContent(in), &c_in);
2467 xmlBufShrink(in, c_in);
2468 xmlBufAddLen(out, c_out);
2469 writtentot += c_out;
2470
2471 if (ret == XML_ENC_ERR_SPACE)
2472 goto retry;
2473
2474 /*
2475 * Attempt to handle error cases
2476 */
2477 if (ret == XML_ENC_ERR_INPUT) {
2478 xmlChar charref[20];
2479 int len = xmlBufUse(in);
2480 xmlChar *content = xmlBufContent(in);
2481 int cur, charrefLen;
2482
2483 cur = xmlGetUTF8Char(content, &len);
2484 if (cur <= 0)
2485 goto error;
2486
2487 /*
2488 * Removes the UTF8 sequence, and replace it by a charref
2489 * and continue the transcoding phase, hoping the error
2490 * did not mangle the encoder state.
2491 */
2492 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2493 "&#%d;", cur);
2494 xmlBufShrink(in, len);
2495 xmlBufGrow(out, charrefLen * 4);
2496 c_out = xmlBufAvail(out);
2497 c_in = charrefLen;
2498 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2499 charref, &c_in);
2500 if ((ret < 0) || (c_in != charrefLen)) {
2501 ret = XML_ENC_ERR_INTERNAL;
2502 goto error;
2503 }
2504
2505 xmlBufAddLen(out, c_out);
2506 writtentot += c_out;
2507 goto retry;
2508 }
2509
2510 error:
2511 if (((writtentot <= 0) && (ret != 0)) ||
2512 (ret == XML_ENC_ERR_MEMORY)) {
2513 if (output->error == 0)
2514 output->error = xmlEncConvertError(ret);
2515 return(ret);
2516 }
2517
2518 return(writtentot);
2519 }
2520 #endif
2521
2522 /**
2523 * xmlCharEncOutFunc:
2524 * @handler: char encoding transformation data structure
2525 * @out: an xmlBuffer for the output.
2526 * @in: an xmlBuffer for the input
2527 *
2528 * Generic front-end for the encoding handler output function
2529 * a first call with @in == NULL has to be made firs to initiate the
2530 * output in case of non-stateless encoding needing to initiate their
2531 * state or the output (like the BOM in UTF16).
2532 * In case of UTF8 sequence conversion errors for the given encoder,
2533 * the content will be automatically remapped to a CharRef sequence.
2534 *
2535 * Returns the number of bytes written or an XML_ENC_ERR code.
2536 */
2537 int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2538 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2539 xmlBufferPtr in) {
2540 int ret;
2541 int written;
2542 int writtentot = 0;
2543 int toconv;
2544
2545 if (handler == NULL) return(XML_ENC_ERR_INTERNAL);
2546 if (out == NULL) return(XML_ENC_ERR_INTERNAL);
2547
2548 retry:
2549
2550 written = out->size - out->use;
2551
2552 if (written > 0)
2553 written--; /* Gennady: count '/0' */
2554
2555 /*
2556 * First specific handling of in = NULL, i.e. the initialization call
2557 */
2558 if (in == NULL) {
2559 toconv = 0;
2560 /* TODO: Check return value. */
2561 xmlEncOutputChunk(handler, &out->content[out->use], &written,
2562 NULL, &toconv);
2563 out->use += written;
2564 out->content[out->use] = 0;
2565 return(0);
2566 }
2567
2568 /*
2569 * Conversion itself.
2570 */
2571 toconv = in->use;
2572 if (toconv * 4 >= written) {
2573 xmlBufferGrow(out, toconv * 4);
2574 written = out->size - out->use - 1;
2575 }
2576 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2577 in->content, &toconv);
2578 xmlBufferShrink(in, toconv);
2579 out->use += written;
2580 writtentot += written;
2581 out->content[out->use] = 0;
2582
2583 if (ret == XML_ENC_ERR_SPACE)
2584 goto retry;
2585
2586 /*
2587 * Attempt to handle error cases
2588 */
2589 if (ret == XML_ENC_ERR_INPUT) {
2590 xmlChar charref[20];
2591 int len = in->use;
2592 const xmlChar *utf = (const xmlChar *) in->content;
2593 int cur, charrefLen;
2594
2595 cur = xmlGetUTF8Char(utf, &len);
2596 if (cur <= 0)
2597 return(ret);
2598
2599 /*
2600 * Removes the UTF8 sequence, and replace it by a charref
2601 * and continue the transcoding phase, hoping the error
2602 * did not mangle the encoder state.
2603 */
2604 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2605 "&#%d;", cur);
2606 xmlBufferShrink(in, len);
2607 xmlBufferGrow(out, charrefLen * 4);
2608 written = out->size - out->use - 1;
2609 toconv = charrefLen;
2610 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2611 charref, &toconv);
2612 if ((ret < 0) || (toconv != charrefLen))
2613 return(XML_ENC_ERR_INTERNAL);
2614
2615 out->use += written;
2616 writtentot += written;
2617 out->content[out->use] = 0;
2618 goto retry;
2619 }
2620 return(writtentot ? writtentot : ret);
2621 }
2622
2623 /**
2624 * xmlCharEncCloseFunc:
2625 * @handler: char encoding transformation data structure
2626 *
2627 * Generic front-end for encoding handler close function
2628 *
2629 * Returns 0 if success, or -1 in case of error
2630 */
2631 int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2632 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2633 int ret = 0;
2634 int tofree = 0;
2635 int i = 0;
2636
2637 if (handler == NULL) return(-1);
2638
2639 for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2640 if (handler == &defaultHandlers[i])
2641 return(0);
2642 }
2643
2644 if (handlers != NULL) {
2645 for (i = 0;i < nbCharEncodingHandler; i++) {
2646 if (handler == handlers[i])
2647 return(0);
2648 }
2649 }
2650 #ifdef LIBXML_ICONV_ENABLED
2651 /*
2652 * Iconv handlers can be used only once, free the whole block.
2653 * and the associated icon resources.
2654 */
2655 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2656 tofree = 1;
2657 if (handler->iconv_out != NULL) {
2658 if (iconv_close(handler->iconv_out))
2659 ret = -1;
2660 handler->iconv_out = NULL;
2661 }
2662 if (handler->iconv_in != NULL) {
2663 if (iconv_close(handler->iconv_in))
2664 ret = -1;
2665 handler->iconv_in = NULL;
2666 }
2667 }
2668 #endif /* LIBXML_ICONV_ENABLED */
2669 #ifdef LIBXML_ICU_ENABLED
2670 if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2671 tofree = 1;
2672 if (handler->uconv_out != NULL) {
2673 closeIcuConverter(handler->uconv_out);
2674 handler->uconv_out = NULL;
2675 }
2676 if (handler->uconv_in != NULL) {
2677 closeIcuConverter(handler->uconv_in);
2678 handler->uconv_in = NULL;
2679 }
2680 }
2681 #endif
2682 if (tofree) {
2683 /* free up only dynamic handlers iconv/uconv */
2684 if (handler->name != NULL)
2685 xmlFree(handler->name);
2686 handler->name = NULL;
2687 xmlFree(handler);
2688 }
2689
2690 return(ret);
2691 }
2692
2693 /**
2694 * xmlByteConsumed:
2695 * @ctxt: an XML parser context
2696 *
2697 * This function provides the current index of the parser relative
2698 * to the start of the current entity. This function is computed in
2699 * bytes from the beginning starting at zero and finishing at the
2700 * size in byte of the file if parsing a file. The function is
2701 * of constant cost if the input is UTF-8 but can be costly if run
2702 * on non-UTF-8 input.
2703 *
2704 * Returns the index in bytes from the beginning of the entity or -1
2705 * in case the index could not be computed.
2706 */
2707 long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2708 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2709 xmlParserInputPtr in;
2710
2711 if (ctxt == NULL) return(-1);
2712 in = ctxt->input;
2713 if (in == NULL) return(-1);
2714 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2715 unsigned int unused = 0;
2716 xmlCharEncodingHandler * handler = in->buf->encoder;
2717 /*
2718 * Encoding conversion, compute the number of unused original
2719 * bytes from the input not consumed and subtract that from
2720 * the raw consumed value, this is not a cheap operation
2721 */
2722 if (in->end - in->cur > 0) {
2723 unsigned char convbuf[32000];
2724 const unsigned char *cur = (const unsigned char *)in->cur;
2725 int toconv = in->end - in->cur, written = 32000;
2726
2727 int ret;
2728
2729 do {
2730 toconv = in->end - cur;
2731 written = 32000;
2732 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2733 cur, &toconv);
2734 if ((ret != XML_ENC_ERR_SUCCESS) && (ret != XML_ENC_ERR_SPACE))
2735 return(-1);
2736 unused += written;
2737 cur += toconv;
2738 } while (ret == XML_ENC_ERR_SPACE);
2739 }
2740 if (in->buf->rawconsumed < unused)
2741 return(-1);
2742 return(in->buf->rawconsumed - unused);
2743 }
2744 return(in->consumed + (in->cur - in->base));
2745 }
2746
2747 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2748 #ifdef LIBXML_ISO8859X_ENABLED
2749
2750 /**
2751 * UTF8ToISO8859x:
2752 * @out: a pointer to an array of bytes to store the result
2753 * @outlen: the length of @out
2754 * @in: a pointer to an array of UTF-8 chars
2755 * @inlen: the length of @in
2756 * @xlattable: the 2-level transcoding table
2757 *
2758 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2759 * block of chars out.
2760 *
2761 * Returns the number of bytes written or an XML_ENC_ERR code.
2762 *
2763 * The value of @inlen after return is the number of octets consumed
2764 * as the return value is positive, else unpredictable.
2765 * The value of @outlen after return is the number of octets consumed.
2766 */
2767 static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,const unsigned char * const xlattable)2768 UTF8ToISO8859x(unsigned char* out, int *outlen,
2769 const unsigned char* in, int *inlen,
2770 const unsigned char* const xlattable) {
2771 const unsigned char* outstart = out;
2772 const unsigned char* inend;
2773 const unsigned char* instart = in;
2774 const unsigned char* processed = in;
2775
2776 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2777 (xlattable == NULL))
2778 return(XML_ENC_ERR_INTERNAL);
2779 if (in == NULL) {
2780 /*
2781 * initialization nothing to do
2782 */
2783 *outlen = 0;
2784 *inlen = 0;
2785 return(0);
2786 }
2787 inend = in + (*inlen);
2788 while (in < inend) {
2789 unsigned char d = *in++;
2790 if (d < 0x80) {
2791 *out++ = d;
2792 } else if (d < 0xC0) {
2793 /* trailing byte in leading position */
2794 *outlen = out - outstart;
2795 *inlen = processed - instart;
2796 return(XML_ENC_ERR_INPUT);
2797 } else if (d < 0xE0) {
2798 unsigned char c;
2799 if (!(in < inend)) {
2800 /* trailing byte not in input buffer */
2801 *outlen = out - outstart;
2802 *inlen = processed - instart;
2803 return(XML_ENC_ERR_PARTIAL);
2804 }
2805 c = *in++;
2806 if ((c & 0xC0) != 0x80) {
2807 /* not a trailing byte */
2808 *outlen = out - outstart;
2809 *inlen = processed - instart;
2810 return(XML_ENC_ERR_INPUT);
2811 }
2812 c = c & 0x3F;
2813 d = d & 0x1F;
2814 d = xlattable [48 + c + xlattable [d] * 64];
2815 if (d == 0) {
2816 /* not in character set */
2817 *outlen = out - outstart;
2818 *inlen = processed - instart;
2819 return(XML_ENC_ERR_INPUT);
2820 }
2821 *out++ = d;
2822 } else if (d < 0xF0) {
2823 unsigned char c1;
2824 unsigned char c2;
2825 if (!(in < inend - 1)) {
2826 /* trailing bytes not in input buffer */
2827 *outlen = out - outstart;
2828 *inlen = processed - instart;
2829 return(XML_ENC_ERR_PARTIAL);
2830 }
2831 c1 = *in++;
2832 if ((c1 & 0xC0) != 0x80) {
2833 /* not a trailing byte (c1) */
2834 *outlen = out - outstart;
2835 *inlen = processed - instart;
2836 return(XML_ENC_ERR_INPUT);
2837 }
2838 c2 = *in++;
2839 if ((c2 & 0xC0) != 0x80) {
2840 /* not a trailing byte (c2) */
2841 *outlen = out - outstart;
2842 *inlen = processed - instart;
2843 return(XML_ENC_ERR_INPUT);
2844 }
2845 c1 = c1 & 0x3F;
2846 c2 = c2 & 0x3F;
2847 d = d & 0x0F;
2848 d = xlattable [48 + c2 + xlattable [48 + c1 +
2849 xlattable [32 + d] * 64] * 64];
2850 if (d == 0) {
2851 /* not in character set */
2852 *outlen = out - outstart;
2853 *inlen = processed - instart;
2854 return(XML_ENC_ERR_INPUT);
2855 }
2856 *out++ = d;
2857 } else {
2858 /* cannot transcode >= U+010000 */
2859 *outlen = out - outstart;
2860 *inlen = processed - instart;
2861 return(XML_ENC_ERR_INPUT);
2862 }
2863 processed = in;
2864 }
2865 *outlen = out - outstart;
2866 *inlen = processed - instart;
2867 return(*outlen);
2868 }
2869
2870 /**
2871 * ISO8859xToUTF8
2872 * @out: a pointer to an array of bytes to store the result
2873 * @outlen: the length of @out
2874 * @in: a pointer to an array of ISO Latin 1 chars
2875 * @inlen: the length of @in
2876 *
2877 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2878 * block of chars out.
2879 *
2880 * Returns the number of bytes written or an XML_ENC_ERR code.
2881 *
2882 * The value of @inlen after return is the number of octets consumed
2883 * The value of @outlen after return is the number of octets produced.
2884 */
2885 static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)2886 ISO8859xToUTF8(unsigned char* out, int *outlen,
2887 const unsigned char* in, int *inlen,
2888 unsigned short const *unicodetable) {
2889 unsigned char* outstart = out;
2890 unsigned char* outend;
2891 const unsigned char* instart = in;
2892 const unsigned char* inend;
2893 const unsigned char* instop;
2894 unsigned int c;
2895
2896 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2897 (in == NULL) || (unicodetable == NULL))
2898 return(XML_ENC_ERR_INTERNAL);
2899 outend = out + *outlen;
2900 inend = in + *inlen;
2901 instop = inend;
2902
2903 while ((in < inend) && (out < outend - 2)) {
2904 if (*in >= 0x80) {
2905 c = unicodetable [*in - 0x80];
2906 if (c == 0) {
2907 /* undefined code point */
2908 *outlen = out - outstart;
2909 *inlen = in - instart;
2910 return(XML_ENC_ERR_INPUT);
2911 }
2912 if (c < 0x800) {
2913 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2914 *out++ = (c & 0x3F) | 0x80;
2915 } else {
2916 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2917 *out++ = ((c >> 6) & 0x3F) | 0x80;
2918 *out++ = (c & 0x3F) | 0x80;
2919 }
2920 ++in;
2921 }
2922 if (instop - in > outend - out) instop = in + (outend - out);
2923 while ((*in < 0x80) && (in < instop)) {
2924 *out++ = *in++;
2925 }
2926 }
2927 if ((in < inend) && (out < outend) && (*in < 0x80)) {
2928 *out++ = *in++;
2929 }
2930 if ((in < inend) && (out < outend) && (*in < 0x80)) {
2931 *out++ = *in++;
2932 }
2933 *outlen = out - outstart;
2934 *inlen = in - instart;
2935 return (*outlen);
2936 }
2937
2938
2939 /************************************************************************
2940 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2941 ************************************************************************/
2942
2943 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2944 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2945 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2946 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2947 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2948 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2949 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2950 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2951 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2952 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2953 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2954 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2955 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2956 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2957 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2958 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2959 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2960 };
2961
2962 static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2963 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2964 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2965 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2966 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2967 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2968 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2969 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2970 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2971 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2972 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2973 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2974 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2975 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2976 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2977 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2978 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2979 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2980 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2981 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2982 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2983 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2984 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2985 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2986 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2987 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2988 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2989 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2990 };
2991
2992 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2993 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2994 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2995 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2996 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2997 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2998 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2999 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3000 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3001 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3002 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3003 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3004 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3005 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3006 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3007 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3008 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3009 };
3010
3011 static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3012 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3013 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3014 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3015 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3016 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3017 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3018 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3019 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3020 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3021 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3022 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3023 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3024 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3025 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3026 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3027 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3028 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3029 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3030 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3031 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3033 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3034 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3035 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3036 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3037 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3038 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3039 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3040 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3041 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3042 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3043 };
3044
3045 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3046 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3047 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3048 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3049 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3050 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3051 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3052 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3053 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3054 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3055 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3056 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3057 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3058 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3059 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3060 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3061 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3062 };
3063
3064 static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3065 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3066 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3068 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3069 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3071 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3072 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3073 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3074 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3075 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3076 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3077 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3078 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3079 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3080 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3081 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3082 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3083 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3084 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3085 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3086 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3087 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3088 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3089 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3090 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3091 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3092 };
3093
3094 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3095 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3096 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3097 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3098 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3099 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3100 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3101 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3102 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3103 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3104 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3105 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3106 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3107 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3108 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3109 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3110 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3111 };
3112
3113 static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3114 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3115 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3116 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3117 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3122 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3123 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3124 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3126 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3127 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3128 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3129 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3130 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3131 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3133 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3136 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141 };
3142
3143 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3144 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3145 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3146 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3147 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3148 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3149 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3150 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3151 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3152 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3153 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3154 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3155 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3156 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3157 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3158 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3159 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3160 };
3161
3162 static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3163 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3165 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3171 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3172 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3173 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3175 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3176 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3177 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3179 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3180 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3181 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3182 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3183 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186 };
3187
3188 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3189 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3190 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3191 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3192 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3193 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3194 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3195 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3196 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3197 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3198 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3199 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3200 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3201 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3202 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3203 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3204 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3205 };
3206
3207 static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3208 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3209 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3216 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3217 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3218 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3219 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3220 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3222 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3223 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3225 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3232 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3233 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3234 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3235 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3236 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3239 };
3240
3241 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3242 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3243 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3244 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3245 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3246 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3247 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3248 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3249 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3250 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3251 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3252 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3253 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3254 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3255 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3256 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3257 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3258 };
3259
3260 static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3261 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3263 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3269 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3270 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3271 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3272 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3278 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3280 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3283 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3285 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3286 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3290 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3291 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292 };
3293
3294 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3295 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3296 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3297 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3298 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3299 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3300 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3301 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3302 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3303 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3304 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3305 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3306 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3307 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3308 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3309 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3310 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3311 };
3312
3313 static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3314 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3322 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3323 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3324 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3325 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3326 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3327 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3328 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3329 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3331 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3335 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337 };
3338
3339 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3340 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3341 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3342 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3343 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3344 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3345 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3346 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3347 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3348 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3349 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3350 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3351 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3352 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3353 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3354 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3355 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3356 };
3357
3358 static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3359 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3367 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3368 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3369 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3370 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3371 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3372 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3373 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3374 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3375 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3377 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3378 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3387 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3388 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3389 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3390 };
3391
3392 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3393 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3394 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3395 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3396 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3397 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3398 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3399 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3400 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3401 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3402 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3403 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3404 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3405 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3406 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3407 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3408 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3409 };
3410
3411 static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3412 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3420 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3421 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3427 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3428 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3429 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3430 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3431 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3436 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3437 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439 };
3440
3441 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3442 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3443 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3444 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3445 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3446 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3447 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3448 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3449 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3450 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3451 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3452 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3453 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3454 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3455 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3456 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3457 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3458 };
3459
3460 static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3461 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3466 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3467 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3468 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3469 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3470 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3471 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3472 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3481 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3482 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3483 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3484 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3485 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3486 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3487 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3488 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3489 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3490 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3491 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3492 };
3493
3494 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3495 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3496 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3497 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3498 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3499 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3500 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3501 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3502 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3503 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3504 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3505 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3506 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3507 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3508 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3509 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3510 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3511 };
3512
3513 static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3514 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3521 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3522 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3523 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3524 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3525 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3526 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3529 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3530 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3531 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3532 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3534 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3543 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3544 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3545 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3546 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3547 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3548 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3549 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3550 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3551 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3552 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3553 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3554 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3555 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3556 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3557 };
3558
3559 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3560 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3561 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3562 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3563 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3564 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3565 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3566 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3567 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3568 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3569 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3570 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3571 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3572 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3573 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3574 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3575 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3576 };
3577
3578 static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3579 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3584 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3585 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3586 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3587 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3588 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3589 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3590 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3594 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3595 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3596 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3597 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3598 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3599 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3600 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3601 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3602 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3603 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3604 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3605 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3606 };
3607
3608 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3609 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3610 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3611 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3612 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3613 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3614 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3615 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3616 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3617 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3618 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3619 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3620 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3621 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3622 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3623 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3624 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3625 };
3626
3627 static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3628 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3636 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3637 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3638 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3639 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3640 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3641 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3644 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3645 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3647 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3648 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3650 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3651 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3652 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3653 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3654 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3655 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3656 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3657 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3658 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3659 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3660 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3661 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3664 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3665 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3666 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3667 };
3668
3669
3670 /*
3671 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3672 */
3673
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3674 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3675 const unsigned char* in, int *inlen) {
3676 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3677 }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3678 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3679 const unsigned char* in, int *inlen) {
3680 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3681 }
3682
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3683 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3684 const unsigned char* in, int *inlen) {
3685 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3686 }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3687 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3688 const unsigned char* in, int *inlen) {
3689 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3690 }
3691
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3692 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3693 const unsigned char* in, int *inlen) {
3694 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3695 }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3696 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3697 const unsigned char* in, int *inlen) {
3698 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3699 }
3700
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3701 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3702 const unsigned char* in, int *inlen) {
3703 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3704 }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3705 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3706 const unsigned char* in, int *inlen) {
3707 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3708 }
3709
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3710 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3711 const unsigned char* in, int *inlen) {
3712 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3713 }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3714 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3715 const unsigned char* in, int *inlen) {
3716 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3717 }
3718
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3719 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3720 const unsigned char* in, int *inlen) {
3721 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3722 }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3723 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3724 const unsigned char* in, int *inlen) {
3725 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3726 }
3727
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3728 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3729 const unsigned char* in, int *inlen) {
3730 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3731 }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3732 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3733 const unsigned char* in, int *inlen) {
3734 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3735 }
3736
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3737 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3738 const unsigned char* in, int *inlen) {
3739 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3740 }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3741 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3742 const unsigned char* in, int *inlen) {
3743 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3744 }
3745
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3746 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3747 const unsigned char* in, int *inlen) {
3748 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3749 }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3750 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3751 const unsigned char* in, int *inlen) {
3752 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3753 }
3754
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3755 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3756 const unsigned char* in, int *inlen) {
3757 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3758 }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3759 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3760 const unsigned char* in, int *inlen) {
3761 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3762 }
3763
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3764 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3765 const unsigned char* in, int *inlen) {
3766 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3767 }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3768 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3769 const unsigned char* in, int *inlen) {
3770 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3771 }
3772
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3773 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3774 const unsigned char* in, int *inlen) {
3775 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3776 }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3777 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3778 const unsigned char* in, int *inlen) {
3779 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3780 }
3781
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3782 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3783 const unsigned char* in, int *inlen) {
3784 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3785 }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3786 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3787 const unsigned char* in, int *inlen) {
3788 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3789 }
3790
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3791 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3792 const unsigned char* in, int *inlen) {
3793 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3794 }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3795 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3796 const unsigned char* in, int *inlen) {
3797 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3798 }
3799
3800 #endif
3801 #endif
3802
3803