1 /* 2 * Summary: interface for the encoding conversion functions 3 * Description: interface for the encoding conversion functions needed for 4 * XML basic encoding and iconv() support. 5 * 6 * Related specs are 7 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies 8 * [ISO-10646] UTF-8 and UTF-16 in Annexes 9 * [ISO-8859-1] ISO Latin-1 characters codes. 10 * [UNICODE] The Unicode Consortium, "The Unicode Standard -- 11 * Worldwide Character Encoding -- Version 1.0", Addison- 12 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is 13 * described in Unicode Technical Report #4. 14 * [US-ASCII] Coded Character Set--7-bit American Standard Code for 15 * Information Interchange, ANSI X3.4-1986. 16 * 17 * Copy: See Copyright for the status of this software. 18 * 19 * Author: Daniel Veillard 20 */ 21 22 #ifndef __XML_CHAR_ENCODING_H__ 23 #define __XML_CHAR_ENCODING_H__ 24 25 #include <libxml/xmlversion.h> 26 27 #ifdef __cplusplus 28 extern "C" { 29 #endif 30 31 typedef enum { 32 XML_ENC_ERR_SUCCESS = 0, 33 XML_ENC_ERR_INTERNAL = -1, 34 XML_ENC_ERR_INPUT = -2, 35 XML_ENC_ERR_SPACE = -3, 36 XML_ENC_ERR_MEMORY = -4 37 } xmlCharEncError; 38 39 /* 40 * xmlCharEncoding: 41 * 42 * Predefined values for some standard encodings. 43 */ 44 typedef enum { 45 XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ 46 XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ 47 XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */ 48 XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */ 49 XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */ 50 XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */ 51 XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */ 52 XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */ 53 XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */ 54 XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */ 55 XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */ 56 XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */ 57 XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */ 58 XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */ 59 XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */ 60 XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */ 61 XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */ 62 XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */ 63 XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */ 64 XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */ 65 XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */ 66 XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */ 67 XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ 68 XML_CHAR_ENCODING_ASCII= 22,/* pure ASCII */ 69 /* Available since 2.14.0 */ 70 XML_CHAR_ENCODING_UTF16= 23,/* UTF-16 native */ 71 XML_CHAR_ENCODING_HTML= 24,/* HTML (output only) */ 72 XML_CHAR_ENCODING_8859_10= 25,/* ISO-8859-10 */ 73 XML_CHAR_ENCODING_8859_11= 26,/* ISO-8859-11 */ 74 XML_CHAR_ENCODING_8859_13= 27,/* ISO-8859-13 */ 75 XML_CHAR_ENCODING_8859_14= 28,/* ISO-8859-14 */ 76 XML_CHAR_ENCODING_8859_15= 29,/* ISO-8859-15 */ 77 XML_CHAR_ENCODING_8859_16= 30 /* ISO-8859-16 */ 78 } xmlCharEncoding; 79 80 /** 81 * xmlCharEncodingInputFunc: 82 * @out: a pointer to an array of bytes to store the UTF-8 result 83 * @outlen: the length of @out 84 * @in: a pointer to an array of chars in the original encoding 85 * @inlen: the length of @in 86 * 87 * Convert characters to UTF-8. 88 * 89 * On success, the value of @inlen after return is the number of 90 * bytes consumed and @outlen is the number of bytes produced. 91 * 92 * Returns the number of bytes written or an XML_ENC_ERR code. 93 */ 94 typedef int (*xmlCharEncodingInputFunc)(unsigned char *out, int *outlen, 95 const unsigned char *in, int *inlen); 96 97 98 /** 99 * xmlCharEncodingOutputFunc: 100 * @out: a pointer to an array of bytes to store the result 101 * @outlen: the length of @out 102 * @in: a pointer to an array of UTF-8 chars 103 * @inlen: the length of @in 104 * 105 * Convert characters from UTF-8. 106 * 107 * On success, the value of @inlen after return is the number of 108 * bytes consumed and @outlen is the number of bytes produced. 109 * 110 * Returns the number of bytes written or an XML_ENC_ERR code. 111 */ 112 typedef int (*xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen, 113 const unsigned char *in, int *inlen); 114 115 116 /** 117 * xmlCharEncConvFunc: 118 * @vctxt: conversion context 119 * @out: a pointer to an array of bytes to store the result 120 * @outlen: the length of @out 121 * @in: a pointer to an array of input bytes 122 * @inlen: the length of @in 123 * 124 * Convert between character encodings. 125 * 126 * On success, the value of @inlen after return is the number of 127 * bytes consumed and @outlen is the number of bytes produced. 128 * 129 * Returns the number of bytes written or an XML_ENC_ERR code. 130 */ 131 typedef int 132 (*xmlCharEncConvFunc)(unsigned char *out, int *outlen, 133 const unsigned char *in, int *inlen, void *vctxt); 134 135 /** 136 * xmlCharEncConvCtxtDtor: 137 * @vctxt: conversion context 138 * 139 * Free a conversion context. 140 */ 141 typedef void 142 (*xmlCharEncConvCtxtDtor)(void *vctxt); 143 144 typedef struct { 145 xmlCharEncConvFunc input; 146 xmlCharEncConvFunc output; 147 xmlCharEncConvCtxtDtor ctxtDtor; 148 void *inputCtxt; 149 void *outputCtxt; 150 } xmlCharEncConverter; 151 152 /** 153 * xmlCharEncConvImpl: 154 * vctxt: user data 155 * name: encoding name 156 * conv: pointer to xmlCharEncConverter struct 157 * 158 * If this function returns XML_ERR_OK, it must fill the @conv struct 159 * with a conversion function, and optional destructor and optional 160 * input and output conversion contexts. 161 * 162 * Returns an xmlParserErrors code. 163 */ 164 typedef int 165 (*xmlCharEncConvImpl)(void *vctxt, const char *name, 166 xmlCharEncConverter *conv); 167 168 /* 169 * Block defining the handlers for non UTF-8 encodings. 170 * 171 * This structure will be made private. 172 */ 173 typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; 174 typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; 175 struct _xmlCharEncodingHandler { 176 char *name XML_DEPRECATED_MEMBER; 177 xmlCharEncodingInputFunc input XML_DEPRECATED_MEMBER; 178 xmlCharEncodingOutputFunc output XML_DEPRECATED_MEMBER; 179 #ifdef LIBXML_ICONV_ENABLED 180 void *iconv_in XML_DEPRECATED_MEMBER; 181 void *iconv_out XML_DEPRECATED_MEMBER; 182 #endif /* LIBXML_ICONV_ENABLED */ 183 void *inputCtxt XML_DEPRECATED_MEMBER; 184 void *outputCtxt XML_DEPRECATED_MEMBER; 185 xmlCharEncConvCtxtDtor ctxtDtor XML_DEPRECATED_MEMBER; 186 int flags XML_DEPRECATED_MEMBER; 187 }; 188 189 /* 190 * Interfaces for encoding handlers. 191 */ 192 XML_DEPRECATED 193 XMLPUBFUN void 194 xmlInitCharEncodingHandlers (void); 195 XML_DEPRECATED 196 XMLPUBFUN void 197 xmlCleanupCharEncodingHandlers (void); 198 XMLPUBFUN void 199 xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler); 200 XMLPUBFUN int 201 xmlLookupCharEncodingHandler (xmlCharEncoding enc, 202 xmlCharEncodingHandlerPtr *out); 203 XMLPUBFUN int 204 xmlOpenCharEncodingHandler (const char *name, 205 int output, 206 xmlCharEncodingHandlerPtr *out); 207 XMLPUBFUN int 208 xmlCreateCharEncodingHandler (const char *name, 209 int output, 210 xmlCharEncConvImpl impl, 211 void *implCtxt, 212 xmlCharEncodingHandlerPtr *out); 213 XMLPUBFUN xmlCharEncodingHandlerPtr 214 xmlGetCharEncodingHandler (xmlCharEncoding enc); 215 XMLPUBFUN xmlCharEncodingHandlerPtr 216 xmlFindCharEncodingHandler (const char *name); 217 XMLPUBFUN xmlCharEncodingHandlerPtr 218 xmlNewCharEncodingHandler (const char *name, 219 xmlCharEncodingInputFunc input, 220 xmlCharEncodingOutputFunc output); 221 222 /* 223 * Interfaces for encoding names and aliases. 224 */ 225 XMLPUBFUN int 226 xmlAddEncodingAlias (const char *name, 227 const char *alias); 228 XMLPUBFUN int 229 xmlDelEncodingAlias (const char *alias); 230 XMLPUBFUN const char * 231 xmlGetEncodingAlias (const char *alias); 232 XMLPUBFUN void 233 xmlCleanupEncodingAliases (void); 234 XMLPUBFUN xmlCharEncoding 235 xmlParseCharEncoding (const char *name); 236 XMLPUBFUN const char * 237 xmlGetCharEncodingName (xmlCharEncoding enc); 238 239 /* 240 * Interfaces directly used by the parsers. 241 */ 242 XMLPUBFUN xmlCharEncoding 243 xmlDetectCharEncoding (const unsigned char *in, 244 int len); 245 246 /** DOC_DISABLE */ 247 struct _xmlBuffer; 248 /** DOC_ENABLE */ 249 XMLPUBFUN int 250 xmlCharEncOutFunc (xmlCharEncodingHandler *handler, 251 struct _xmlBuffer *out, 252 struct _xmlBuffer *in); 253 254 XMLPUBFUN int 255 xmlCharEncInFunc (xmlCharEncodingHandler *handler, 256 struct _xmlBuffer *out, 257 struct _xmlBuffer *in); 258 XML_DEPRECATED 259 XMLPUBFUN int 260 xmlCharEncFirstLine (xmlCharEncodingHandler *handler, 261 struct _xmlBuffer *out, 262 struct _xmlBuffer *in); 263 XMLPUBFUN int 264 xmlCharEncCloseFunc (xmlCharEncodingHandler *handler); 265 266 /* 267 * Export a few useful functions 268 */ 269 #ifdef LIBXML_OUTPUT_ENABLED 270 XMLPUBFUN int 271 UTF8Toisolat1 (unsigned char *out, 272 int *outlen, 273 const unsigned char *in, 274 int *inlen); 275 #endif /* LIBXML_OUTPUT_ENABLED */ 276 XMLPUBFUN int 277 isolat1ToUTF8 (unsigned char *out, 278 int *outlen, 279 const unsigned char *in, 280 int *inlen); 281 #ifdef __cplusplus 282 } 283 #endif 284 285 #endif /* __XML_CHAR_ENCODING_H__ */ 286