1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1997-2011,2014-2015 International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * Date Name Description 9 * 06/21/00 aliu Creation. 10 ******************************************************************************* 11 */ 12 13 #ifndef UTRANS_H 14 #define UTRANS_H 15 16 #include "unicode/utypes.h" 17 18 #if !UCONFIG_NO_TRANSLITERATION 19 20 #include "unicode/urep.h" 21 #include "unicode/parseerr.h" 22 #include "unicode/uenum.h" 23 24 #if U_SHOW_CPLUSPLUS_API 25 #include "unicode/localpointer.h" 26 #endif // U_SHOW_CPLUSPLUS_API 27 28 /******************************************************************** 29 * General Notes 30 ******************************************************************** 31 */ 32 /** 33 * @addtogroup icu4c ICU4C 34 * @{ 35 * \file 36 * \brief C API: Transliterator 37 * 38 * <h2> Transliteration </h2> 39 * The data structures and functions described in this header provide 40 * transliteration services. Transliteration services are implemented 41 * as C++ classes. The comments and documentation in this header 42 * assume the reader is familiar with the C++ headers translit.h and 43 * associated documentation. 44 * 45 * A significant but incomplete subset of the C++ transliteration 46 * services are available to C code through this header. In order to 47 * access more complex transliteration services, refer to the C++ 48 * headers and documentation. 49 * 50 * There are two sets of functions for working with transliterator IDs: 51 * 52 * An old, deprecated set uses char * IDs, which works for true and pure 53 * identifiers that these APIs were designed for, 54 * for example "Cyrillic-Latin". 55 * It does not work when the ID contains filters ("[:Script=Cyrl:]") 56 * or even a complete set of rules because then the ID string contains more 57 * than just "invariant" characters (see utypes.h). 58 * 59 * A new set of functions replaces the old ones and uses UChar * IDs, 60 * paralleling the UnicodeString IDs in the C++ API. (New in ICU 2.8.) 61 */ 62 63 /******************************************************************** 64 * Data Structures 65 ********************************************************************/ 66 67 /** 68 * An opaque transliterator for use in C. Open with utrans_openxxx() 69 * and close with utrans_close() when done. Equivalent to the C++ class 70 * Transliterator and its subclasses. 71 * @see Transliterator 72 * \xrefitem stable "Stable" "Stable List" ICU 2.0 73 */ 74 typedef void* UTransliterator; 75 76 /** 77 * Direction constant indicating the direction in a transliterator, 78 * e.g., the forward or reverse rules of a RuleBasedTransliterator. 79 * Specified when a transliterator is opened. An "A-B" transliterator 80 * transliterates A to B when operating in the forward direction, and 81 * B to A when operating in the reverse direction. 82 * \xrefitem stable "Stable" "Stable List" ICU 2.0 83 */ 84 typedef enum UTransDirection { 85 86 /** 87 * UTRANS_FORWARD means from <source> to <target> for a 88 * transliterator with ID <source>-<target>. For a transliterator 89 * opened using a rule, it means forward direction rules, e.g., 90 * "A > B". 91 */ 92 UTRANS_FORWARD, 93 94 /** 95 * UTRANS_REVERSE means from <target> to <source> for a 96 * transliterator with ID <source>-<target>. For a transliterator 97 * opened using a rule, it means reverse direction rules, e.g., 98 * "A < B". 99 */ 100 UTRANS_REVERSE 101 102 } UTransDirection; 103 104 /** 105 * Position structure for utrans_transIncremental() incremental 106 * transliteration. This structure defines two substrings of the text 107 * being transliterated. The first region, [contextStart, 108 * contextLimit), defines what characters the transliterator will read 109 * as context. The second region, [start, limit), defines what 110 * characters will actually be transliterated. The second region 111 * should be a subset of the first. 112 * 113 * <p>After a transliteration operation, some of the indices in this 114 * structure will be modified. See the field descriptions for 115 * details. 116 * 117 * <p>contextStart <= start <= limit <= contextLimit 118 * 119 * <p>Note: All index values in this structure must be at code point 120 * boundaries. That is, none of them may occur between two code units 121 * of a surrogate pair. If any index does split a surrogate pair, 122 * results are unspecified. 123 * 124 * \xrefitem stable "Stable" "Stable List" ICU 2.0 125 */ 126 typedef struct UTransPosition { 127 128 /** 129 * Beginning index, inclusive, of the context to be considered for 130 * a transliteration operation. The transliterator will ignore 131 * anything before this index. INPUT/OUTPUT parameter: This parameter 132 * is updated by a transliteration operation to reflect the maximum 133 * amount of antecontext needed by a transliterator. 134 * \xrefitem stable "Stable" "Stable List" ICU 2.4 135 */ 136 int32_t contextStart; 137 138 /** 139 * Ending index, exclusive, of the context to be considered for a 140 * transliteration operation. The transliterator will ignore 141 * anything at or after this index. INPUT/OUTPUT parameter: This 142 * parameter is updated to reflect changes in the length of the 143 * text, but points to the same logical position in the text. 144 * \xrefitem stable "Stable" "Stable List" ICU 2.4 145 */ 146 int32_t contextLimit; 147 148 /** 149 * Beginning index, inclusive, of the text to be transliterated. 150 * INPUT/OUTPUT parameter: This parameter is advanced past 151 * characters that have already been transliterated by a 152 * transliteration operation. 153 * \xrefitem stable "Stable" "Stable List" ICU 2.4 154 */ 155 int32_t start; 156 157 /** 158 * Ending index, exclusive, of the text to be transliterated. 159 * INPUT/OUTPUT parameter: This parameter is updated to reflect 160 * changes in the length of the text, but points to the same 161 * logical position in the text. 162 * \xrefitem stable "Stable" "Stable List" ICU 2.4 163 */ 164 int32_t limit; 165 166 } UTransPosition; 167 168 /******************************************************************** 169 * General API 170 ********************************************************************/ 171 172 /** 173 * Open a custom transliterator, given a custom rules string 174 * OR 175 * a system transliterator, given its ID. 176 * Any non-NULL result from this function should later be closed with 177 * utrans_close(). 178 * 179 * @param id a valid transliterator ID 180 * @param idLength the length of the ID string, or -1 if NUL-terminated 181 * @param dir the desired direction 182 * @param rules the transliterator rules. See the C++ header rbt.h for 183 * rules syntax. If NULL then a system transliterator matching 184 * the ID is returned. 185 * @param rulesLength the length of the rules, or -1 if the rules 186 * are NUL-terminated. 187 * @param parseError a pointer to a UParseError struct to receive the details 188 * of any parsing errors. This parameter may be NULL if no 189 * parsing error details are desired. 190 * @param pErrorCode a pointer to the UErrorCode 191 * @return a transliterator pointer that may be passed to other 192 * utrans_xxx() functions, or NULL if the open call fails. 193 * \xrefitem stable "Stable" "Stable List" ICU 2.8 194 */ 195 U_CAPI UTransliterator* U_EXPORT2 196 utrans_openU(const UChar *id, 197 int32_t idLength, 198 UTransDirection dir, 199 const UChar *rules, 200 int32_t rulesLength, 201 UParseError *parseError, 202 UErrorCode *pErrorCode) __INTRODUCED_IN(__ANDROID_API_T__); 203 204 205 206 /** 207 * Open an inverse of an existing transliterator. For this to work, 208 * the inverse must be registered with the system. For example, if 209 * the Transliterator "A-B" is opened, and then its inverse is opened, 210 * the result is the Transliterator "B-A", if such a transliterator is 211 * registered with the system. Otherwise the result is NULL and a 212 * failing UErrorCode is set. Any non-NULL result from this function 213 * should later be closed with utrans_close(). 214 * 215 * @param trans the transliterator to open the inverse of. 216 * @param status a pointer to the UErrorCode 217 * @return a pointer to a newly-opened transliterator that is the 218 * inverse of trans, or NULL if the open call fails. 219 * \xrefitem stable "Stable" "Stable List" ICU 2.0 220 */ 221 U_CAPI UTransliterator* U_EXPORT2 222 utrans_openInverse(const UTransliterator* trans, 223 UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__); 224 225 226 227 /** 228 * Create a copy of a transliterator. Any non-NULL result from this 229 * function should later be closed with utrans_close(). 230 * 231 * @param trans the transliterator to be copied. 232 * @param status a pointer to the UErrorCode 233 * @return a transliterator pointer that may be passed to other 234 * utrans_xxx() functions, or NULL if the clone call fails. 235 * \xrefitem stable "Stable" "Stable List" ICU 2.0 236 */ 237 U_CAPI UTransliterator* U_EXPORT2 238 utrans_clone(const UTransliterator* trans, 239 UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__); 240 241 242 243 /** 244 * Close a transliterator. Any non-NULL pointer returned by 245 * utrans_openXxx() or utrans_clone() should eventually be closed. 246 * @param trans the transliterator to be closed. 247 * \xrefitem stable "Stable" "Stable List" ICU 2.0 248 */ 249 U_CAPI void U_EXPORT2 250 utrans_close(UTransliterator* trans) __INTRODUCED_IN(__ANDROID_API_T__); 251 252 253 254 #if U_SHOW_CPLUSPLUS_API 255 256 U_NAMESPACE_BEGIN 257 258 /** 259 * \class LocalUTransliteratorPointer 260 * "Smart pointer" class, closes a UTransliterator via utrans_close(). 261 * For most methods see the LocalPointerBase base class. 262 * 263 * @see LocalPointerBase 264 * @see LocalPointer 265 * \xrefitem stable "Stable" "Stable List" ICU 4.4 266 */ 267 U_DEFINE_LOCAL_OPEN_POINTER(LocalUTransliteratorPointer, UTransliterator, utrans_close); 268 269 U_NAMESPACE_END 270 271 #endif 272 273 274 275 276 277 278 279 /** 280 * Set the filter used by a transliterator. A filter can be used to 281 * make the transliterator pass certain characters through untouched. 282 * The filter is expressed using a UnicodeSet pattern. If the 283 * filterPattern is NULL or the empty string, then the transliterator 284 * will be reset to use no filter. 285 * 286 * @param trans the transliterator 287 * @param filterPattern a pattern string, in the form accepted by 288 * UnicodeSet, specifying which characters to apply the 289 * transliteration to. May be NULL or the empty string to indicate no 290 * filter. 291 * @param filterPatternLen the length of filterPattern, or -1 if 292 * filterPattern is zero-terminated 293 * @param status a pointer to the UErrorCode 294 * @see UnicodeSet 295 * \xrefitem stable "Stable" "Stable List" ICU 2.0 296 */ 297 U_CAPI void U_EXPORT2 298 utrans_setFilter(UTransliterator* trans, 299 const UChar* filterPattern, 300 int32_t filterPatternLen, 301 UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__); 302 303 304 305 306 307 /** 308 * Return a UEnumeration for the available transliterators. 309 * 310 * @param pErrorCode Pointer to the UErrorCode in/out parameter. 311 * @return UEnumeration for the available transliterators. 312 * Close with uenum_close(). 313 * 314 * \xrefitem stable "Stable" "Stable List" ICU 2.8 315 */ 316 U_CAPI UEnumeration * U_EXPORT2 317 utrans_openIDs(UErrorCode *pErrorCode) __INTRODUCED_IN(__ANDROID_API_T__); 318 319 320 321 /******************************************************************** 322 * Transliteration API 323 ********************************************************************/ 324 325 /** 326 * Transliterate a segment of a UReplaceable string. The string is 327 * passed in as a UReplaceable pointer rep and a UReplaceableCallbacks 328 * function pointer struct repFunc. Functions in the repFunc struct 329 * will be called in order to modify the rep string. 330 * 331 * @param trans the transliterator 332 * @param rep a pointer to the string. This will be passed to the 333 * repFunc functions. 334 * @param repFunc a set of function pointers that will be used to 335 * modify the string pointed to by rep. 336 * @param start the beginning index, inclusive; <code>0 <= start <= 337 * limit</code>. 338 * @param limit pointer to the ending index, exclusive; <code>start <= 339 * limit <= repFunc->length(rep)</code>. Upon return, *limit will 340 * contain the new limit index. The text previously occupying 341 * <code>[start, limit)</code> has been transliterated, possibly to a 342 * string of a different length, at <code>[start, 343 * </code><em>new-limit</em><code>)</code>, where <em>new-limit</em> 344 * is the return value. 345 * @param status a pointer to the UErrorCode 346 * \xrefitem stable "Stable" "Stable List" ICU 2.0 347 */ 348 U_CAPI void U_EXPORT2 349 utrans_trans(const UTransliterator* trans, 350 UReplaceable* rep, 351 const UReplaceableCallbacks* repFunc, 352 int32_t start, 353 int32_t* limit, 354 UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__); 355 356 357 358 /** 359 * Transliterate the portion of the UReplaceable text buffer that can 360 * be transliterated unambiguously. This method is typically called 361 * after new text has been inserted, e.g. as a result of a keyboard 362 * event. The transliterator will try to transliterate characters of 363 * <code>rep</code> between <code>index.cursor</code> and 364 * <code>index.limit</code>. Characters before 365 * <code>index.cursor</code> will not be changed. 366 * 367 * <p>Upon return, values in <code>index</code> will be updated. 368 * <code>index.start</code> will be advanced to the first 369 * character that future calls to this method will read. 370 * <code>index.cursor</code> and <code>index.limit</code> will 371 * be adjusted to delimit the range of text that future calls to 372 * this method may change. 373 * 374 * <p>Typical usage of this method begins with an initial call 375 * with <code>index.start</code> and <code>index.limit</code> 376 * set to indicate the portion of <code>text</code> to be 377 * transliterated, and <code>index.cursor == index.start</code>. 378 * Thereafter, <code>index</code> can be used without 379 * modification in future calls, provided that all changes to 380 * <code>text</code> are made via this method. 381 * 382 * <p>This method assumes that future calls may be made that will 383 * insert new text into the buffer. As a result, it only performs 384 * unambiguous transliterations. After the last call to this method, 385 * there may be untransliterated text that is waiting for more input 386 * to resolve an ambiguity. In order to perform these pending 387 * transliterations, clients should call utrans_trans() with a start 388 * of index.start and a limit of index.end after the last call to this 389 * method has been made. 390 * 391 * @param trans the transliterator 392 * @param rep a pointer to the string. This will be passed to the 393 * repFunc functions. 394 * @param repFunc a set of function pointers that will be used to 395 * modify the string pointed to by rep. 396 * @param pos a struct containing the start and limit indices of the 397 * text to be read and the text to be transliterated 398 * @param status a pointer to the UErrorCode 399 * \xrefitem stable "Stable" "Stable List" ICU 2.0 400 */ 401 U_CAPI void U_EXPORT2 402 utrans_transIncremental(const UTransliterator* trans, 403 UReplaceable* rep, 404 const UReplaceableCallbacks* repFunc, 405 UTransPosition* pos, 406 UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__); 407 408 409 410 /** 411 * Transliterate a segment of a UChar* string. The string is passed 412 * in in a UChar* buffer. The string is modified in place. If the 413 * result is longer than textCapacity, it is truncated. The actual 414 * length of the result is returned in *textLength, if textLength is 415 * non-NULL. *textLength may be greater than textCapacity, but only 416 * textCapacity UChars will be written to *text, including the zero 417 * terminator. 418 * 419 * @param trans the transliterator 420 * @param text a pointer to a buffer containing the text to be 421 * transliterated on input and the result text on output. 422 * @param textLength a pointer to the length of the string in text. 423 * If the length is -1 then the string is assumed to be 424 * zero-terminated. Upon return, the new length is stored in 425 * *textLength. If textLength is NULL then the string is assumed to 426 * be zero-terminated. 427 * @param textCapacity the length of the text buffer 428 * @param start the beginning index, inclusive; <code>0 <= start <= 429 * limit</code>. 430 * @param limit pointer to the ending index, exclusive; <code>start <= 431 * limit <= repFunc->length(rep)</code>. Upon return, *limit will 432 * contain the new limit index. The text previously occupying 433 * <code>[start, limit)</code> has been transliterated, possibly to a 434 * string of a different length, at <code>[start, 435 * </code><em>new-limit</em><code>)</code>, where <em>new-limit</em> 436 * is the return value. 437 * @param status a pointer to the UErrorCode 438 * \xrefitem stable "Stable" "Stable List" ICU 2.0 439 */ 440 U_CAPI void U_EXPORT2 441 utrans_transUChars(const UTransliterator* trans, 442 UChar* text, 443 int32_t* textLength, 444 int32_t textCapacity, 445 int32_t start, 446 int32_t* limit, 447 UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__); 448 449 450 451 /** 452 * Transliterate the portion of the UChar* text buffer that can be 453 * transliterated unambiguously. See utrans_transIncremental(). The 454 * string is passed in in a UChar* buffer. The string is modified in 455 * place. If the result is longer than textCapacity, it is truncated. 456 * The actual length of the result is returned in *textLength, if 457 * textLength is non-NULL. *textLength may be greater than 458 * textCapacity, but only textCapacity UChars will be written to 459 * *text, including the zero terminator. See utrans_transIncremental() 460 * for usage details. 461 * 462 * @param trans the transliterator 463 * @param text a pointer to a buffer containing the text to be 464 * transliterated on input and the result text on output. 465 * @param textLength a pointer to the length of the string in text. 466 * If the length is -1 then the string is assumed to be 467 * zero-terminated. Upon return, the new length is stored in 468 * *textLength. If textLength is NULL then the string is assumed to 469 * be zero-terminated. 470 * @param textCapacity the length of the text buffer 471 * @param pos a struct containing the start and limit indices of the 472 * text to be read and the text to be transliterated 473 * @param status a pointer to the UErrorCode 474 * @see utrans_transIncremental 475 * \xrefitem stable "Stable" "Stable List" ICU 2.0 476 */ 477 U_CAPI void U_EXPORT2 478 utrans_transIncrementalUChars(const UTransliterator* trans, 479 UChar* text, 480 int32_t* textLength, 481 int32_t textCapacity, 482 UTransPosition* pos, 483 UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__); 484 485 486 487 /** 488 * Create a rule string that can be passed to utrans_openU to recreate this 489 * transliterator. 490 * 491 * @param trans The transliterator 492 * @param escapeUnprintable if true then convert unprintable characters to their 493 * hex escape representations, \\uxxxx or \\Uxxxxxxxx. 494 * Unprintable characters are those other than 495 * U+000A, U+0020..U+007E. 496 * @param result A pointer to a buffer to receive the rules. 497 * @param resultLength The maximum size of result. 498 * @param status A pointer to the UErrorCode. In case of error status, the 499 * contents of result are undefined. 500 * @return int32_t The length of the rule string (may be greater than resultLength, 501 * in which case an error is returned). 502 * \xrefitem stable "Stable" "Stable List" ICU 53 503 */ 504 U_CAPI int32_t U_EXPORT2 505 utrans_toRules( const UTransliterator* trans, 506 UBool escapeUnprintable, 507 UChar* result, int32_t resultLength, 508 UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__); 509 510 511 512 513 514 /* deprecated API ----------------------------------------------------------- */ 515 516 #ifndef U_HIDE_DEPRECATED_API 517 518 /* see utrans.h documentation for why these functions are deprecated */ 519 520 521 522 523 524 525 526 527 528 #endif /* U_HIDE_DEPRECATED_API */ 529 530 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 531 532 #endif 533 534 /** @} */ // addtogroup 535